From 10a9e70cb8eb4ea56a59210e192213e2236e670a Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Thu, 2 May 2019 20:54:20 -0500
Subject: [PATCH] deleting dedundant code

---
 workflow/process_scripts                   |   2 +-
 workflow/scripts/DEA_htseq.R               |   1 -
 workflow/scripts/build_ballgown.R          |  16 --
 workflow/scripts/concat_cts.pl             | 101 ---------
 workflow/scripts/concat_edgeR.pl           |  31 ---
 workflow/scripts/concat_fpkm.pl            |  63 ------
 workflow/scripts/dea.R                     | 242 ---------------------
 workflow/scripts/gencode_genename.pl       |  22 --
 workflow/scripts/uniform_integrated_vcf.pl |  63 ------
 9 files changed, 1 insertion(+), 540 deletions(-)
 delete mode 120000 workflow/scripts/DEA_htseq.R
 delete mode 100755 workflow/scripts/build_ballgown.R
 delete mode 100755 workflow/scripts/concat_cts.pl
 delete mode 100755 workflow/scripts/concat_edgeR.pl
 delete mode 100755 workflow/scripts/concat_fpkm.pl
 delete mode 100644 workflow/scripts/dea.R
 delete mode 100755 workflow/scripts/gencode_genename.pl
 delete mode 100755 workflow/scripts/uniform_integrated_vcf.pl

diff --git a/workflow/process_scripts b/workflow/process_scripts
index 93d52e9..56d8030 160000
--- a/workflow/process_scripts
+++ b/workflow/process_scripts
@@ -1 +1 @@
-Subproject commit 93d52e978cfe4f4bdc5f71e2e9a1af15ed60cf80
+Subproject commit 56d8030ee74ba8665eed40d74cc18caf359d7423
diff --git a/workflow/scripts/DEA_htseq.R b/workflow/scripts/DEA_htseq.R
deleted file mode 120000
index 03646f1..0000000
--- a/workflow/scripts/DEA_htseq.R
+++ /dev/null
@@ -1 +0,0 @@
-dea.R
\ No newline at end of file
diff --git a/workflow/scripts/build_ballgown.R b/workflow/scripts/build_ballgown.R
deleted file mode 100755
index b74bfa2..0000000
--- a/workflow/scripts/build_ballgown.R
+++ /dev/null
@@ -1,16 +0,0 @@
-library(ballgown)
-
-args<-commandArgs(TRUE)
-
-sample_dirs <- args
-samples <- gsub('_stringtie','',sample_dirs)
-design <- "design.txt"
-
-samtbl <- read.table(file=design,header=TRUE,sep='\t')
-
-bg <- ballgown(samples=sample_dirs, meas='all')
-samples <- gsub('_stringtie','',sampleNames(bg))
-mergetbl <- merge(as.data.frame(samples),samtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
-pData(bg) = data.frame(id=samples, group=as.character(mergetbl$SampleGroup))
-
-save(bg,file="bg.rda")
diff --git a/workflow/scripts/concat_cts.pl b/workflow/scripts/concat_cts.pl
deleted file mode 100755
index 7626a06..0000000
--- a/workflow/scripts/concat_cts.pl
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/perl -w
-#merge_tables.pl
-
-use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
-use File::Basename;
-
-my %opt = ();
-my $results = GetOptions (\%opt,'genenames|g=s','outdir|o=s','help|h');
-
-#$gnames = $opt{genenames};
-
-open SYM, "<genenames.txt" or die $!;
-my %symbs = ();
-while (my $line = <SYM>) {
-    chomp($line);
-    my ($chrom,$start,$end,$ensembl,$symbol,$type) = split(/\t/,$line);
-    $ensembl = (split(/\./,$ensembl))[0];
-    $names{$symbol} = {ensembl=>$ensembl,type=>$type};
-}
-
-my @files = @ARGV;
-foreach $file (@files) {
-    chomp($file);
-    open IN, "<$file" or die $!;
-    $fname = basename($file);
-    my $sample = (split(/\./,$fname))[0];
-    $samps{$sample} = 1;
-    my $command = <IN>;
-    my $head = <IN>;
-    chomp($head);
-    my @colnames = split(/\t/,$head);
-    while (my $line = <IN>) {
-	chomp($line);
-	my @row = split(/\t/,$line);
-	my $gene = $row[0];
-	my $ct = $row[-1];
-	my $type = $names{$gene}{'type'};
-	$type = $gene unless ($names{$gene}{'type'});
-	$types{$type} = 1;
-	$total{$sample} += $ct;
-	$readcts{$sample}{$type} += $ct;
-	next if($gene =~ m/^__/);
-	$cts{$gene}{$sample} = $ct;
-    }
-    close IN;
-}
-
-my @gtypes = sort {$a cmp $b} keys %types;
-open OUT, ">$opt{outdir}\/countTable.stats.txt" or die $!;
-print OUT join("\t","Sample","Type","ReadPerc","ReadCt","TotalReads"),"\n";
-
-foreach $sample (sort {$a cmp $b} keys %readcts) {
-    my @line;
-    my $total = 0;
-    foreach $type (@gtypes) {
-	$ct = 0;
-	$ct = sprintf("%.2e",$readcts{$sample}{$type}/$total{$sample}) if ($readcts{$sample}{$type});
-	print OUT join("\t",$sample,$type,$ct,$readcts{$sample}{$type},$total{$sample}),"\n";
-    }
-}
-
-my %exc = (HBB=>1,HBD=>1,HBBP1=>1,
-	   HBG1=>1,HBG2=>1,HBE1=>1,
-	   HBZ=>1,HBZP1=>1,HBA2=>1,HBA1=>1);
-
-open OUT, ">$opt{outdir}\/countTable.txt" or die $!;
-open CPM, ">$opt{outdir}\/countTable.logCPM.txt" or die $!;
-my @samples = sort {$a cmp $b} keys %samps;
-print OUT join("\t",'ENSEMBL','SYMBOL','TYPE',@samples),"\n";
-print CPM join("\t",'ENSEMBL','SYMBOL','TYPE',@samples),"\n";
-
-foreach $gene (keys %cts) {
-    my @line;
-    my @cpm;
-    foreach $sample (@samples) {
-	unless ($cts{$gene}{$sample}) {
-	    $cts{$gene}{$sample} = 0;
-	}
-	$cpm = ($cts{$gene}{$sample}/$total{$sample})*1e6;
-	push @cpm, sprintf("%.2f",log2($cpm));
-	push @line, sprintf("%0.f",$cts{$gene}{$sample});
-    }
-    my @sortct = sort {$b cmp $a} @cpm;
-    #next unless ($sortct[0] >= 1);
-    next if ($names{$gene}{type} eq 'rRNA');
-    next if ($exc{$gene});
-    print OUT join("\t",$names{$gene}{ensembl},$gene,
-		   $names{$gene}{type},@line),"\n";
-    print CPM join("\t",$names{$gene}{ensembl},$gene,
-		   $names{$gene}{type},@cpm),"\n";
-}
-close OUT;
-close CPM;
-sub log2 {
-    $n = shift @_;
-    if ($n < 1) {
-	return 0;
-    }else {
-	return(log($n)/log(2));
-    }
-}
diff --git a/workflow/scripts/concat_edgeR.pl b/workflow/scripts/concat_edgeR.pl
deleted file mode 100755
index e736e82..0000000
--- a/workflow/scripts/concat_edgeR.pl
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/perl -w
-#concat_edgeR.pl
-
-#symbol	chrom	start	end	ensembl	type	logFC	logCPM	PValue	monocytes	neutrophils	rawP	fdr	bonf
-
-my @files = @ARGV;
-open OUT, ">edgeR.results.txt" or die $!;
-print OUT join("\t",'G1','G2','ENSEMBL','SYMBOL','TYPE','logFC','logCPM',
-	       'pval','fdr','G1.Mean','G2.Mean'),"\n";
-foreach $f (@files) {
-    open IN, "<$f" or die $!;
-    my ($fname,$ext) = split(/\.edgeR\./,$f);
-    $fname =~ s/edgeR\///;
-    my ($g1,$g2) = split(/_/,$fname);
-    my $head = <IN>;
-    chomp($head);
-    my @colnames = split(/\t/,$head);
-    while (my $line = <IN>) {
-	chomp($line);
-	my @row = split(/\t/,$line);
-	my %hash;
-	foreach my $i (0..$#row) {
-	    $hash{$colnames[$i]} = $row[$i];
-	}
-	print OUT join("\t",$g1,$g2,$hash{ensembl},$hash{symbol},$hash{type},
-		      sprintf("%.2f",$hash{logFC}),
-		       sprintf("%.2f",$hash{logCPM}),
-		       sprintf("%.1e",$hash{rawP}),sprintf("%.1e",$hash{fdr}),
-		       sprintf("%.0f",$hash{$g1}),sprintf("%.0f",$hash{$g2})),"\n";
-    }
-}
diff --git a/workflow/scripts/concat_fpkm.pl b/workflow/scripts/concat_fpkm.pl
deleted file mode 100755
index a0eebb1..0000000
--- a/workflow/scripts/concat_fpkm.pl
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/perl -w
-#merge_tables.pl
-
-use Getopt::Long qw(:config no_ignore_case no_auto_abbrev);
-use File::Basename;
-
-my %opt = ();
-my $results = GetOptions (\%opt,'genenames|g=s','outdir|o=s','help|h');
-
-#$gnames = $opt{genenames};
-
-open SYM, "<genenames.txt" or die $!;
-my %symbs = ();
-while (my $line = <SYM>) {
-    chomp($line);
-    my ($chrom,$start,$end,$ensembl,$symbol,$type) = split(/\t/,$line);
-    $ensembl = (split(/\./,$ensembl))[0];
-    $names{$symbol} = {ensembl=>$ensembl,type=>$type};
-}
-
-my @files = @ARGV;
-foreach $file (@files) {
-    chomp($file);
-    open IN, "<$file" or die $!;
-    $fname = basename($file);
-    my $sample = (split(/\./,$fname))[0];
-    $samps{$sample} = 1;
-    my $command = <IN>;
-    my $head = <IN>;
-    chomp($head);
-    my @colnames = split(/\t/,$head);
-    while (my $line = <IN>) {
-	chomp($line);
-	my ($ensid,$gene,$chr,$strand,$start,$end,$cov,$fpkm,$tmp) = split(/\t/,$line);
-	$cts{$gene}{$sample} = $fpkm;
-    }
-    close IN;
-}
-
-my %exc = (HBB=>1,HBD=>1,HBBP1=>1,
-	   HBG1=>1,HBG2=>1,HBE1=>1,
-	   HBZ=>1,HBZP1=>1,HBA2=>1,HBA1=>1);
-
-open OUT, ">$opt{outdir}\/countTable.fpkm.txt" or die $!;
-my @samples = sort {$a cmp $b} keys %samps;
-print OUT join("\t",'ENSEMBL','SYMBOL','TYPE',@samples),"\n";
-
-foreach $gene (keys %cts) {
-    my @line;
-    foreach $sample (@samples) {
-	unless ($cts{$gene}{$sample}) {
-	    $cts{$gene}{$sample} = 0;
-	}
-	push @line, $cts{$gene}{$sample};
-    }
-    my @sortct = sort {$b cmp $a} @line;
-    #next unless ($sortct[0] >= 1);
-    next if ($names{$gene}{type} eq 'rRNA');
-    next if ($exc{$gene});
-    print OUT join("\t",$names{$gene}{ensembl},$gene,
-		   $names{$gene}{type},@line),"\n";
-}
-close OUT;
diff --git a/workflow/scripts/dea.R b/workflow/scripts/dea.R
deleted file mode 100644
index 4327db1..0000000
--- a/workflow/scripts/dea.R
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/cm/shared/apps/R/intel/3.2.1/bin/Rscript
-library(edgeR)
-library(DESeq2)
-library("RColorBrewer")
-library("gplots")
-library(qusage)
-
-rowMax <- function(x) apply(x,1,max)
-col.grp <- function(n,b) {
-  colorvec <- vector(mode="character", length=length(n))
-  plotcols <- rainbow(length(b))
-  for (i in 1:length(n)) {
-    for (j in 1:length(b)) {
-      if ( n[i] == b[j] ) {
-        colorvec[i] = plotcols[j]
-      }
-    }
-  }
-  c(colorvec)
-}
-
-
-#################### Read in Data ################################
-genenames <- read.table(file="genenames.txt",header=TRUE,sep='\t')
-
-tbl <- read.table('countTable.txt',header=TRUE,sep="\t")
-tbl2 <- read.table('countTable.logCPM.txt',header=TRUE,sep="\t")
-ct <- tbl[,4:length(tbl)]
-row.names(ct) <- tbl$ENSEMBL
-
-samples<- names(ct)
-
-dtbl <- read.table('design.txt',header=TRUE,sep="\t")
-samtbl <- merge(as.data.frame(samples),dtbl,by.x="samples",by.y="SampleID",all.x=TRUE,sort=FALSE)
-grpnames <- levels(factor(as.character(samtbl$SampleGroup)))
-samtbl$SampleGroup <- factor(samtbl$SampleGroup, levels=grpnames)
-
-colData <- samtbl[c('SampleGroup','SubjectID')]
-row.names(colData) <- samtbl$samples
-
-dds <- DESeqDataSetFromMatrix(countData=ct,colData= colData,design= ~ SampleGroup)
-dds <- dds[ rowMax(counts(dds)) > 30, ]
-dds <- dds[ colSums(counts(dds)) > 1000000]
-
-if (nrow(counts(dds)) < 1) {
-  print(paste("Samples are filtered if there is < 1M reads.  There are less than no remaining sample(s) after this filter",sep=' '))
-  q()
-}
-
-countTable <- counts(dds)
-#grps <- as.character(levels(colData(dds)$SampleGroup))
-grps <- as.character(colData(dds)$SampleGroup)
-col.blocks <-col.grp(grps,levels(factor(grps)))
-libSizes <- as.vector(colSums(countTable))
-
-logcpm <- tbl2[,4:length(tbl2)]
-row.names(logcpm) <- tbl2$SYMBOL
-
-tmp.tab <- aggregate(t(logcpm),by=list(as.character(samtbl$SampleGroup)),FUN=mean)
-row.names(tmp.tab) <- tmp.tab$Group.1
-mean.by.group <- round(log2(t(tmp.tab[,c(2:ncol(tmp.tab))])+1), digits = 2)
-
-#################### Run DESEQ2 ################################
-dds <- DESeq(dds)
-rld <- rlogTransformation(dds, blind=TRUE)
-sampleDists <- dist(t(assay(rld)))
-
-png(file="samples_heatmap.png",bg ="white",height=768,width=1024)
-par(mar=c(7,4,4,2)+0.1) 
-heatmap.2(as.matrix(sampleDists), col = bluered(100),RowSideColors = col.blocks,srtRow=45,srtCol=45,trace="none", margins=c(8,8), cexRow = 1.5, cexCol = 1.5)
-dev.off()
-
-#Compare Samples using PCA
-png(file="pca.png",bg ="white",height=768,width=1024)
-print(plotPCA(rld, intgroup="SampleGroup"),col.hab=col.blocks)
-dev.off()
-
-#Do all pairwise comparisons
-contrast <- resultsNames(dds)
-cond<- levels(colData(dds)$SampleGroup)
-a <- length(cond)-1
-for (i in 1:a) {
-  for (j in 2:length(cond)) {
-    if (i == j) {
-      next
-    } else {
-      res <- as.data.frame(results(dds,contrast=c("SampleGroup",cond[i],cond[j])))
-      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-      output$rawP <- output$pvalue
-      output$logFC <- output$log2FoldChange
-      output$fdr <- output$padj
-      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-      write.table(output,file=paste(cond[i],'_',cond[j],'.deseq2.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-      filt.out <- na.omit(output[output$fdr < 0.05,])
-      if (nrow(filt.out) > 2) {
-      	 subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
-	 subset <- subset[!apply(subset, 1, function(x) {any(x == 0)}),]
-      	 gnames <- filt.out[c('ensembl','symbol')]
-      	 s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-      	 STREE <- hclust(dist(t(subset)))
-      	 zscores <- scale(t(subset))
-      	 ngenes <- length(colnames(zscores))
-      	 textscale <- (1/(ngenes/30))
-      	 if (textscale > 1) {
-            textscale <-1
-      	 }
-      	 if (textscale < 0.1) {
-            textscale <- 0.1
-      	 }
-      	 png(file=paste(cond[i],'_',cond[j],'.heatmap.deseq2.png',sep=""),height=768,width=1024)
-      	 heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-      	 legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-      	 dev.off()
-      }
-    }
-  }
-}
-
-###################### Run EdgeR ########################
-###################### Run QuSage ######################
-
-MSIG.geneSets <- read.gmt('geneset.gmt')
-
-design <- model.matrix(~grps)
-d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
-d <- calcNormFactors(d)
-d <- estimateCommonDisp(d)
-png(file="mds.png",bg ="white",height=768,width=1024)
-plotMDS(d, labels=grps,col=col.blocks, cex.axis=1.5, cex.lab=1.5, cex=1.5)
-op <- par(cex = 1.5)
-legend("topleft",legend=grpnames,col=rainbow(length(grpnames)),pch=20)
-dev.off()
-cond <-levels(d$samples$group)
-colnames(design) <- levels(d$samples$group)
-a <- length(cond)-1
-for (i in 1:a) {
-  for (j in 2:length(cond)) {
-    if (i == j) {
-      next
-    } else {
-      c <- exactTest(d, pair=c(cond[j],cond[i]))
-      res <- c$table
-      res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-      output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-      output$rawP <- output$PValue
-      output$logFC <- output$logFC
-      output$fdr <- p.adjust(output$rawP, method ='fdr')
-      output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-      write.table(output,file=paste(cond[i],'_',cond[j],'.edgeR.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-      filt.out <- na.omit(output[output$fdr < 0.05,])
-      if (nrow(filt.out) > 2) {
-      subset <- logcpm[row.names(logcpm) %in% filt.out$symbol,]
-      subset <- subset[!apply(subset, 1, function(x) {any(x == 0)}),]
-      gnames <- filt.out[c('ensembl','symbol')]
-      s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-      STREE <- hclust(dist(t(subset)))
-      zscores <- scale(t(subset))
-      ngenes <- length(colnames(zscores))
-      textscale <- (1/(ngenes/30))
-      if (textscale > 1) {
-        textscale <-1
-      }
-      if (textscale < 0.1) {
-        textscale <- 0.1
-      }
-      png(file=paste(cond[i],'_',cond[j],'.heatmap.edgeR.png',sep=""),height=768,width=1024)
-      heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-      legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-      dev.off()
-      gcont <- paste(cond[j],cond[i],sep='-')
-      qs.results = qusage(logcpm, grps,gcont,MSIG.geneSets)
-      save(qs.results,file=paste(cond[i],'_',cond[j],'.qusage.rda',sep=""))
-      }
-      }
-    }
-  }
-}
-
-###################### Run Limma VOOM ########################
-
-# design <- model.matrix(~0+grps)
-# colnames(design) <- grpnames
-# a <- length(cond)-1
-# design.pairs <- c()
-# k <- 0
-# for (i in 1:a) {
-#   for (j in 2:length(cond)) {
-#     if (i == j) {
-#       next
-#     } else {
-#     k <- k+1
-#     design.pairs[k] <- paste(cond[i],'-',cond[j],sep='')
-#     }
-#     }
-#     }
-
-#contrast.matrix <- makeContrasts(design.pairs,levels=design)
-
-#d <- DGEList(counts=countTable,group=grps,lib.size=libSizes)
-#d <- calcNormFactors(d)
-#d <- estimateCommonDisp(d)
-
-# v <- voom(d,design,plot=TRUE)
-# fit <- lmFit(v,design)
-# fit2 <- contrasts.fit(fit, contrast.matrix)
-# fit2 <- eBayes(fit2)
-
-# comps <- colnames(fit2$coefficients)
-# for (i in 1:length(comps)) {
-#   res <- topTable(fit2,coef=i,number=Inf,sort.by="P")
-#   res2 <- merge(genenames,res,by.x='ensembl',by.y='row.names',all.y=TRUE,all.x=FALSE)
-#   output <- merge(res2,mean.by.group,by.y="row.names",by.x='symbol')
-#   output$rawP <- output$P.Value
-#   output$logFC <- output$adj.P.Val
-#   output$fdr <- p.adjust(output$rawP, method ='fdr')
-#   output$bonf <- p.adjust(output$rawP, method ='bonferroni')
-#   write.table(output,file=paste(cond[i],'_',cond[j],'.voom.txt',sep=""),quote=FALSE,row.names=FALSE,sep='\t')
-#   filt.out <- na.omit(output[output$fdr < 0.05,])
-#   if (nrow(filt.out) > 2) {
-#   subset <- logcpm[row.names(logcpm) %in% filt.out$ensembl,]
-#   gnames <- filt.out[c('ensembl','symbol')]
-#   s <- merge(gnames,subset,by.x="ensembl",by.y="row.names",all.x=FALSE,all.y=TRUE,sort=FALSE)
-#   STREE <- hclust(dist(t(subset)))
-#   zscores <- scale(t(subset))
-#   ngenes <- length(colnames(zscores))
-#   textscale <- (1/(ngenes/30))
-#   if (textscale > 1) {
-#      textscale <-1
-#   }
-#   if (textscale < 0.1) {
-#     textscale <- 0.1
-#   }
-#   png(file=paste(cond[i],'_',cond[j],'.heatmap.voom.png',sep=""),height=768,width=1024)
-#   heatmap.2(zscores, col = bluered(100),Rowv = as.dendrogram(STREE), RowSideColors = col.blocks,dendrogram='row', cexCol=textscale,labCol=s$symbol,srtRow=45,srtCol=45,trace="none", margins=c(5, 5))
-#   legend("topright",legend=grpnames,col=rainbow(length(grpnames)),pch=20,cex=0.5)
-#   dev.off()
-#   }
-# }
-
-
-
diff --git a/workflow/scripts/gencode_genename.pl b/workflow/scripts/gencode_genename.pl
deleted file mode 100755
index f2a81f5..0000000
--- a/workflow/scripts/gencode_genename.pl
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/perl -w
-#parse_gencode.pl
-
-open OUT, ">genenames.txt" or die $!;
-print OUT join("\t",'chrom','start','end','ensembl','symbol','type'),"\n";
-my $gtf = shift @ARGV;
-open GCODE, "<$gtf" or die $!;
-while (my $line = <GCODE>) {
-    chomp($line);
-    next if ($line =~ m/^#/);
-    my ($chrom,$source,$feature,$start,$end,$filter,$phase,$frame,$info) = 
-	split(/\t/,$line);
-    next unless ($feature eq 'gene');
-    $info =~ s/\"//g;
-    my %hash;
-    foreach $a (split(/;\s*/,$info)) {
-	my ($key,$val) = split(/ /,$a);
-	$hash{$key} = $val;
-    }
-    $hash{gene_id} =~ s/\.\d+//;
-    print OUT join("\t",$chrom,$start,$end,$hash{gene_id},$hash{gene_name},$hash{gene_type}),"\n";
-}
diff --git a/workflow/scripts/uniform_integrated_vcf.pl b/workflow/scripts/uniform_integrated_vcf.pl
deleted file mode 100755
index e2b84dc..0000000
--- a/workflow/scripts/uniform_integrated_vcf.pl
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/perl 
-#migrate_db.pl
-
-my $vcf = shift @ARGV;
-my $outfile = $vcf;
-$outfile =~ s/int.vcf/uniform.vcf/;
-open VCF, "<$vcf" or die $!;
-open OUT, ">$outfile" or die $!;
-
-while (my $line = <VCF>) {
-  chomp($line);
-  if ($line =~ m/#/) {
-    print OUT $line,"\n";
-    if ($line =~ m/##INFO=<ID=[AO|AD|DP|RO]/) {
-	$line =~ s/INFO/FORMAT/;
-	print OUT $line,"\n";
-    }
-    next;
-  }
-  my ($chrom, $pos,$id,$ref,$alt,$score,
-      $filter,$annot,$format,$allele_info) = split(/\t/, $line);
-  my %hash = ();
-  foreach $a (split(/;/,$annot)) {
-      my ($key,$val) = split(/=/,$a);
-      $hash{$key} = $val;
-  }
-  my @deschead = split(/:/,$format);
-  my @gtinfo = split(/:/,$allele_info);
-  my %gtdata;
-  foreach my $i (0..$#deschead) {
-    $gtdata{$deschead[$i]} = $gtinfo[$i];
-  }
-  $newformat = 'GT:DP:AD:AO:RO';
-  $gtdata{DP} = $hash{DP} unless ($gtdata{DP});
-  unless ($gtdata{AO}) {
-      if ($gtdata{AD}){
-	  ($gtdata{RO},$gtdata{AO}) = split(/,/,$gtdata{AD});
-      }elsif ($hash{AD}){
-      ($gtdata{RO},$gtdata{AO}) = split(/,/,$hash{AD});
-      }elsif ($gtdata{NR} && $gtdata{NV}) {
-	  $gtdata{DP} = $gtdata{NR}; 	
-	  $gtdata{AO} = $gtdata{NV};
-	  $gtdata{RO} = $gtdata{DP} - $gtdata{AO};
-      }elsif ($hash{TR}) {
-	  $gtdata{AO} = $hash{TR};
-	  $gtdata{DP} = $hash{TC};
-	  $gtdata{RO} = $gtdata{DP} - $gtdata{AO};
-    }
-  }
-  if ($gtdata{DP} < $gtdata{AO}+$gtdata{RO}) {
-      warn "inconsistent\n";
-  }
-  unless ($gtdata{AD}) {
-      if (exists $gtdata{RO} && exists $gtdata{AO}) {
-      $gtdata{AD} = join(",",$gtdata{RO},$gtdata{AO});
-      }
-  }
-  unless (exists $gtdata{GT} && exists $gtdata{DP} && exists $gtdata{AD} &&  exists $gtdata{AO} && exists $gtdata{RO}) {
-      warn "Missing Information\n";
-  }
-  $newgt = join(":",$gtdata{GT},$gtdata{DP},$gtdata{AD},$gtdata{AO},$gtdata{RO});
-  print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,$newgt),"\n";
-}
-- 
GitLab