Skip to content
Snippets Groups Projects
Commit 92e3a426 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

adding oncokb hotspot

parent 6a875eb3
Branches
Tags
No related merge requests found
......@@ -34,8 +34,10 @@ if [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]]
then
tabix -f ${unionvcf}
bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX,GNOMAD_HG19_VARIANT -o ${pair_id}.gnomad.vcf.gz ${unionvcf}
tabix ${pair_id}.gnomad.vcf.gz
bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.gnomad.vcf.gz
tabix ${pair_id}.gnomad.vcf.gz
bcftools annotate -Oz -a ${index_path}/oncokb_hotspot.txt.gz -o ${pair_id}.oncohotspot.vcf.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.gnomad.vcf.gz
tabix ${pair_id}.oncohotspot.vcf.gz
bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.oncohotspot.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz
tabix ${pair_id}.annot.vcf.gz
else
......
......@@ -76,7 +76,8 @@ elif [[ $algo == 'hotspot' ]]
then
samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi
bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz -
java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz
tabix ${pair_id}.lowfreq.vcf.gz
bcftools annotate -Ov -a ${index_path}/oncokb_hotspot.txt.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz - | grep '#\|CNT\|OncoKBHotspot' | bgzip > ${pair_id}.hotspot.vcf.gz
elif [[ $algo == 'speedseq' ]]
then
module load speedseq/gcc/0.1.2
......
......@@ -120,7 +120,7 @@ then
else
awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
fi
vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz
vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz
fi
if [ $algo == 'varscan' ]
......
......@@ -48,6 +48,7 @@ foreach $vcf (@vcffiles) {
my $newformat = 'GT:DP:AD:AO:RO';
my %newgts;
my %afinfo;
my %gtfilt;
my $missingGT = 0;
FG:foreach my $i (0..$#gts) {
my $allele_info = $gts[$i];
......@@ -67,6 +68,9 @@ foreach $vcf (@vcffiles) {
$missingGT ++;
next FG;
}
if ($gtdata{FT} && $gtdata{FT} =~ m/HighSNVSB/) {
$gtfilt{'StrandBias'} = 1;
}
if ($gtdata{DP4}) { #varscan uses this
my ($ref_fwd,$ref_rev,$alt_fwd,$alt_rev) = split(',',$gtdata{DP4});
$gtdata{AO} = $alt_fwd+$alt_rev;
......@@ -118,6 +122,12 @@ foreach $vcf (@vcffiles) {
push @gtdesc, join(":",$id,$afinfo{$id});
push @newgts, $newgts{$id};
}
if ($gtfilt{'StrandBias'}) {
$filter = $filter.";strandBias";
} elsif (($hash{FS} && $hash{FS} > 60)
|| ($hash{SAP} && $hash{SAP} > 20)) {
$filter = $filter.";strandBias";
}
$lines{$chrom}{$pos}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc];
}
close VCF;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment