From 50154ca3d5801ae2913e598d31b5f4a34093a086 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Mon, 30 Mar 2020 14:35:19 -0500
Subject: [PATCH] update viral alignment and mutect filter bug

---
 alignment/combine_idxstats.pl | 61 +++++++++++++++++++++++++++++++++++
 alignment/viralalign.sh       |  2 +-
 variants/germline_vc.sh       |  4 +--
 variants/somatic_vc.sh        |  4 +--
 4 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 alignment/combine_idxstats.pl

diff --git a/alignment/combine_idxstats.pl b/alignment/combine_idxstats.pl
new file mode 100644
index 0000000..e4bc42e
--- /dev/null
+++ b/alignment/combine_idxstats.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use diagnostics;
+use Getopt::Long;
+#use Cwd;
+
+#my $cwd = getcwd();
+
+my @allFiles = @ARGV;
+chomp(@allFiles);
+my %data;
+my @samples;
+open OUT, ">viral.info" or die $!;
+
+my %virus;
+$virus{'NC_001538.1'}="BK polyomavirus";
+$virus{'NC_007605.1'}="Human gammaherpesvirus 4";
+$virus{'NC_009334.1'}="Human herpesvirus 4";
+$virus{'NC_001488.1'}="Human T-lymphotropic virus 2";
+$virus{'D90400.1'}="Human papillomavirus type 58";
+$virus{'M14119.1'}="Human papillomavirus type 11 (HPV-11)";
+$virus{'J04353.1'}="Human papillomavirus type 31 (HPV-31)";
+$virus{'M12732.1'}="Human papillomavirus type 33";
+$virus{'M74117.1'}="Human papillomavirus type 35";
+$virus{'M62849.1'}="Human papillomavirus ORFs";
+$virus{'X74481.1'}="Human papillomavirus type 52";
+$virus{'X77858.1'}="Human papilloma virus type 59";
+$virus{'NC_001355.1'}="Human papillomavirus type 6b";
+$virus{'NC_001357.1'}="Human papillomavirus - 18";
+$virus{'NC_001436.1'}="Human T-lymphotropic virus 1";
+$virus{'NC_001699.1'}="JC polyomavirus";
+$virus{'EF177177.1'}="Human papillomavirus type 56 clone Qv26342";
+$virus{'NC_009333.1'}="Human herpesvirus 8";
+$virus{'EF202167.1'}="Human papillomavirus type 45 isolate Qv31748";
+$virus{'NC_014407.1'}="Human polyomavirus 7";
+$virus{'HM355825.1'}="Merkel cell polyomavirus isolate MCVw156";
+$virus{'NC_001526.4'}="Human papillomavirus type 16";
+
+
+print OUT join("\t","SampleName","VirusAcc","VirusName","Mapped","Unmapped"),"\n";
+foreach my $file_idx(@allFiles){
+		print "File Included: ".$file_idx."\n";
+		#my @filePath = split("/", $file_idx);
+		my $fileName = $file_idx;
+		$fileName =~ s/\.idxstats\.txt//g;
+		push @samples, $fileName;
+
+		open INFILE, "<$file_idx" or die $!;
+
+		foreach my $line(<INFILE>){
+				chomp $line;
+				my @data_array = split("\t",$line);
+				if($data_array[2]!=0 and $data_array[0] ne '*'){
+					$data{$data_array[0]}{$fileName} = $data_array[2]."\t".$data_array[3];
+					print OUT join("\t",$fileName,$data_array[0],$virus{$data_array[0]},$data_array[2],$data_array[3]),"\n";
+				}
+		}
+		close INFILE;
+}
diff --git a/alignment/viralalign.sh b/alignment/viralalign.sh
index 4e72b31..d3608a5 100644
--- a/alignment/viralalign.sh
+++ b/alignment/viralalign.sh
@@ -33,7 +33,7 @@ then
 fi
 
 reffa=${ref}/idt_virus_reference.fa
-
+source /etc/profile.d/modules.sh
 module load bwa/intel/0.7.17 picard/2.10.3 samtools/1.6 
 
 samtools view -@ 8 -b -u -F 2 ${bam} |samtools sort -n - >unmapped.bam
diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh
index f8b2eaa..9f598d9 100755
--- a/variants/germline_vc.sh
+++ b/variants/germline_vc.sh
@@ -126,8 +126,8 @@ then
       bamlist+="-I ${i} "
   done
   gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates  --tmp-dir `pwd`
-  gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf
-  vcf-sort ${pair_id}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
+  #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf
+  vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
 
 elif [[ $algo == 'strelka2' ]]
 then
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
index f441cdf..36a09b0 100755
--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -121,8 +121,8 @@ then
   module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14
   java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g  -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa}
   gatk --java-options "-Xmx20g" Mutect2 $ponopt  --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf
-  gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
-  vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
+  #gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf
+  vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz
 elif [ $algo == 'varscan' ]
 then
   module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14
-- 
GitLab