From 92e3a4265bed369bf8198a0e84e8dc3da5e0dc31 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Mon, 18 Mar 2019 09:28:04 -0500 Subject: [PATCH] adding oncokb hotspot --- variants/annotvcf.sh | 6 ++++-- variants/germline_vc.sh | 3 ++- variants/somatic_vc.sh | 2 +- variants/unionvcf.pl | 10 ++++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh index 22a4488..1d841ce 100755 --- a/variants/annotvcf.sh +++ b/variants/annotvcf.sh @@ -34,8 +34,10 @@ if [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] then tabix -f ${unionvcf} bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX,GNOMAD_HG19_VARIANT -o ${pair_id}.gnomad.vcf.gz ${unionvcf} - tabix ${pair_id}.gnomad.vcf.gz - bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz + tabix ${pair_id}.gnomad.vcf.gz + bcftools annotate -Oz -a ${index_path}/oncokb_hotspot.txt.gz -o ${pair_id}.oncohotspot.vcf.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.gnomad.vcf.gz + tabix ${pair_id}.oncohotspot.vcf.gz + bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.oncohotspot.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz tabix ${pair_id}.annot.vcf.gz else diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index d54bd99..c6d47da 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -76,7 +76,8 @@ elif [[ $algo == 'hotspot' ]] then samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz - - java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz + tabix ${pair_id}.lowfreq.vcf.gz + bcftools annotate -Ov -a ${index_path}/oncokb_hotspot.txt.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - | grep '#\|CNT\|OncoKBHotspot' | bgzip > ${pair_id}.hotspot.vcf.gz elif [[ $algo == 'speedseq' ]] then module load speedseq/gcc/0.1.2 diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index f96b9a4..4a5ef6e 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -120,7 +120,7 @@ then else awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" fi - vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz + vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz fi if [ $algo == 'varscan' ] diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index 57c2046..5507c38 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -48,6 +48,7 @@ foreach $vcf (@vcffiles) { my $newformat = 'GT:DP:AD:AO:RO'; my %newgts; my %afinfo; + my %gtfilt; my $missingGT = 0; FG:foreach my $i (0..$#gts) { my $allele_info = $gts[$i]; @@ -67,6 +68,9 @@ foreach $vcf (@vcffiles) { $missingGT ++; next FG; } + if ($gtdata{FT} && $gtdata{FT} =~ m/HighSNVSB/) { + $gtfilt{'StrandBias'} = 1; + } if ($gtdata{DP4}) { #varscan uses this my ($ref_fwd,$ref_rev,$alt_fwd,$alt_rev) = split(',',$gtdata{DP4}); $gtdata{AO} = $alt_fwd+$alt_rev; @@ -118,6 +122,12 @@ foreach $vcf (@vcffiles) { push @gtdesc, join(":",$id,$afinfo{$id}); push @newgts, $newgts{$id}; } + if ($gtfilt{'StrandBias'}) { + $filter = $filter.";strandBias"; + } elsif (($hash{FS} && $hash{FS} > 60) + || ($hash{SAP} && $hash{SAP} > 20)) { + $filter = $filter.";strandBias"; + } $lines{$chrom}{$pos}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc]; } close VCF; -- GitLab