From 92e3a4265bed369bf8198a0e84e8dc3da5e0dc31 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Mon, 18 Mar 2019 09:28:04 -0500
Subject: [PATCH] adding oncokb hotspot

---
 variants/annotvcf.sh    |  6 ++++--
 variants/germline_vc.sh |  3 ++-
 variants/somatic_vc.sh  |  2 +-
 variants/unionvcf.pl    | 10 ++++++++++
 4 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh
index 22a4488..1d841ce 100755
--- a/variants/annotvcf.sh
+++ b/variants/annotvcf.sh
@@ -34,8 +34,10 @@ if  [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]]
 then
     tabix -f ${unionvcf}
     bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX,GNOMAD_HG19_VARIANT -o ${pair_id}.gnomad.vcf.gz ${unionvcf}
-    tabix ${pair_id}.gnomad.vcf.gz 
-    bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz
+    tabix ${pair_id}.gnomad.vcf.gz
+    bcftools annotate -Oz -a ${index_path}/oncokb_hotspot.txt.gz -o ${pair_id}.oncohotspot.vcf.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.gnomad.vcf.gz
+    tabix ${pair_id}.oncohotspot.vcf.gz
+    bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.oncohotspot.vcf.gz
     java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz -  | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz
     tabix ${pair_id}.annot.vcf.gz
 else 
diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh
index d54bd99..c6d47da 100755
--- a/variants/germline_vc.sh
+++ b/variants/germline_vc.sh
@@ -76,7 +76,8 @@ elif [[ $algo == 'hotspot' ]]
 then
     samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi
     bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type |  bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz -
-    java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz
+    tabix ${pair_id}.lowfreq.vcf.gz
+    bcftools annotate -Ov -a ${index_path}/oncokb_hotspot.txt.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - | grep '#\|CNT\|OncoKBHotspot' | bgzip > ${pair_id}.hotspot.vcf.gz
 elif [[ $algo == 'speedseq' ]]
 then
     module load speedseq/gcc/0.1.2
diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh
index f96b9a4..4a5ef6e 100755
--- a/variants/somatic_vc.sh
+++ b/variants/somatic_vc.sh
@@ -120,7 +120,7 @@ then
   else
       awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
   fi	 
-  vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz
+  vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz
 fi
 
 if [ $algo == 'varscan' ]
diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl
index 57c2046..5507c38 100755
--- a/variants/unionvcf.pl
+++ b/variants/unionvcf.pl
@@ -48,6 +48,7 @@ foreach $vcf (@vcffiles) {
     my $newformat = 'GT:DP:AD:AO:RO';
     my %newgts;
     my %afinfo;
+    my %gtfilt;
     my $missingGT = 0;
   FG:foreach my $i (0..$#gts) {
       my $allele_info = $gts[$i];
@@ -67,6 +68,9 @@ foreach $vcf (@vcffiles) {
 	$missingGT ++;
 	next FG;
       }
+      if ($gtdata{FT} && $gtdata{FT} =~ m/HighSNVSB/) {
+	  $gtfilt{'StrandBias'} = 1;
+      }
       if ($gtdata{DP4}) { #varscan uses this
 	my ($ref_fwd,$ref_rev,$alt_fwd,$alt_rev) = split(',',$gtdata{DP4});
 	$gtdata{AO} = $alt_fwd+$alt_rev;
@@ -118,6 +122,12 @@ foreach $vcf (@vcffiles) {
       push @gtdesc, join(":",$id,$afinfo{$id});
       push @newgts, $newgts{$id};
     }
+    if ($gtfilt{'StrandBias'}) {
+	$filter = $filter.";strandBias";
+    } elsif (($hash{FS} && $hash{FS} > 60) 
+	     || ($hash{SAP} && $hash{SAP} > 20)) { 
+	$filter = $filter.";strandBias";
+    }
     $lines{$chrom}{$pos}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc];
   }
   close VCF;
-- 
GitLab