Skip to content
Snippets Groups Projects
Commit a17ae372 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

add annotvcf.sh

parent c90d2d4e
No related merge requests found
#!/bin/bash
#annotvcf.sh
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-p --Prefix for output file name"
echo "-v --VCF File"
echo "Example: bash hisat.sh -p prefix -r /path/GRCh38 -v vcffile"
exit 1
}
OPTIND=1 # Reset OPTIND
while getopts :r:v:p:h opt
do
case $opt in
r) index_path=$OPTARG;;
p) pair_id=$OPTARG;;
v) unionvcf=$OPTARG;;
h) usage;;
esac
done
function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
module load python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.3q
if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]]
then
tabix ${unionvcf}
bcftools annotate -Oz -a ${index_path}/ExAC.vcf.gz -o ${pair_id}.exac.vcf.gz --columns CHROM,POS,AC_Het,AC_Hom,AC_Hemi,AC_Adj,AN_Adj,AC_POPMAX,AN_POPMAX,POPMAX ${unionvcf}
tabix ${pair_id}.exac.vcf.gz
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.exac.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - |bgzip > ${pair_id}.annot.vcf.gz
tabix ${pair_id}.annot.vcf.gz
else
if [[ $index_path == '/project/shared/bicf_workflow_ref/GRCm38' ]]
then
snpeffvers='GRCh38.86'
elif [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh37' ]]
then
snpeffvers='GRCh37.75'
fi
java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffvers} ${unionvcf} |bgzip > ${pair_id}.annot.vcf.gz
tabix ${pair_id}.annot.vcf.gz
fi
......@@ -3,10 +3,10 @@
usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-r --Path to Reference Genome with the file genome.fa"
echo "-p --Prefix for output file name"
echo "-a --Algorithm/Command"
echo "Example: bash hisat.sh -p prefix -r /path/GRCh38"
echo "-a --Algorithm/Command: gatk, mpileup, speedseq, platypus "
echo "Example: bash hisat.sh -p prefix -r /path/GRCh38 -a gatk"
exit 1
}
OPTIND=1 # Reset OPTIND
......@@ -75,10 +75,10 @@ then
elif [[ $algo == 'gatk' ]]
then
module load gatk/3.7
$gvcflist=''
gvcflist=''
for i in *.bam; do
java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T HaplotypeCaller -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -variant_index_type LINEAR -variant_index_parameter 128000 --emitRefConfidence GVCF -I $i -o ${i}.gatk.g.vcf -nct 2 &
$gvcflist+="--variant ${i}.gatk.g.vcf "
gvcflist="$gvcflist --variant ${i}.gatk.g.vcf"
done
wait
......@@ -92,5 +92,5 @@ then
Platypus.py callVariants --minMapQual=10 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf
vcf-sort platypus.vcf |vcf-annotate -n --fill-type -n |bgzip > platypus.vcf.gz
tabix platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${i}.pl.vcf.gz platypus.vcf.gz
bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz
fi
......@@ -5,7 +5,6 @@ usage() {
echo "-h Help documentation for gatkrunner.sh"
echo "-r --Reference Genome: GRCh38 or GRCm38"
echo "-p --Prefix for output file name"
echo "-a --Algorithm/Command"
echo "Example: bash hisat.sh -p prefix -r /path/GRCh38"
exit 1
}
......@@ -20,7 +19,7 @@ do
done
function join_by { local IFS="$1"; shift; echo "$*"; }
shift $(($OPTIND -1))
baseDir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
module load gatk/3.7 python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.3q samtools/1.6 vcftools/0.1.14
HS=*.hotspot.vcf.gz
......@@ -30,16 +29,16 @@ calllist=''
for i in *.vcf.gz; do
EXT="${i#*.}"
CALL="${EXT%%.*}"
calllist+=" $CALL"
calllist="$calllist $CALL"
tabix $i
if [[ $i eq $HS ]]
if [[ $i == $HS ]]
then
bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > hotspot.nooverlap.vcf.gz
tabix hotspot.nooverlap.vcf.gz
list2+=" hotspot.nooverlap.vcf.gz"
varlist+=" --variant:$CALL hotspot.nooverlap.vcf.gz"
list2="$list2 hotspot.nooverlap.vcf.gz"
varlist="$varlist --variant:$CALL hotspot.nooverlap.vcf.gz"
else
varlist+=" --variant:$CALL $i"
varlist="$varlist --variant:$CALL $i"
fi
done
......@@ -48,17 +47,17 @@ bedtools multiinter -i $list2 -names $calllist | cut -f 1,2,3,5 | bedtools sort
priority='ssvar'
if [[ *.platypus.vcf.gz ]]
then
priority+=',platypus'
priority="$priority,platypus"
fi
priority+=',sam,gatk'
priority="$priority,sam,gatk"
if [[ *.hotspot.vcf.gz ]]
then
priority+=',hotspot'
priority="$priority,hotspot"
fi
java -Xmx32g -jar $GATK_JAR -R ${index_path}/genome.fa -T CombineVariants --filteredrecordsmergetype KEEP_UNCONDITIONAL $varlist -genotypeMergeOptions PRIORITIZE -priority $priority -o ${pair_id}.int.vcf
perl $baseDir/scripts/uniform_integrated_vcf.pl ${pair_id}.int.vcf
perl $baseDir/uniform_integrated_vcf.pl ${pair_id}.int.vcf
bgzip ${pair_id}_integrate.bed
tabix ${pair_id}_integrate.bed.gz
bgzip ${pair_id}.uniform.vcf
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment