diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 43daaea8983b2cf8f025a393c1c8221d23edc24b..f2094311585e6e1883157a342bece9a8f044cc4b 100644 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -1,26 +1,30 @@ +#!/bin/bash +#germline_vc.sh + usage() { echo "-h Help documentation for gatkrunner.sh" echo "-r --Reference Genome: GRCh38 or GRCm38" echo "-p --Prefix for output file name" echo "-a --Algorithm/Command" - echo "Example: bash hisat.sh -p prefix -r GRCh38" + echo "Example: bash hisat.sh -p prefix -r /path/GRCh38" exit 1 } OPTIND=1 # Reset OPTIND while getopts :r:a:b:p:h opt do case $opt in - r) refgeno=$OPTARG;; + r) index_path=$OPTARG;; p) pair_id=$OPTARG;; a) algo=$OPTARG;; h) usage;; esac done function join_by { local IFS="$1"; shift; echo "$*"; } + shift $(($OPTIND -1)) # Check for mandatory options -if [[ -z $pair_id ]] || [[ -z $bam ] || [[ -z $refgeno ]]]; then +if [[ -z $pair_id ]] || [[ -z $index_path ]]; then usage fi if [[ -z $SLURM_CPUS_ON_NODE ]] @@ -63,8 +67,8 @@ then samtools mpileup -d 99999 -t 'AD,DP,INFO/AD' -uf ${reffa} *.bam > ${pair_id}.mpi bcftools filter -i "AD[1]/DP > 0.01" ${pair_id}.mpi | bcftools filter -i "DP > 50" | bcftools call -m -A |vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.lowfreq.vcf.gz - java -jar $SNPEFF_HOME/SnpSift.jar annotate ${index_path}/cosmic.vcf.gz ${pair_id}.lowfreq.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter "(CNT[*] >0)" - |bgzip > ${pair_id}.hotspot.vcf.gz -elif -then [[ $algo == 'speedseq' ]] +elif [[ $algo == 'speedseq' ]] +then module load speedseq/20160506 speedseq var -t $SLURM_CPUS_ON_NODE -o ssvar ${reffa} *.bam vcf-annotate -n --fill-type ssvar.vcf.gz| bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.ssvar.vcf.gz - @@ -80,8 +84,8 @@ then java -Djava.io.tmpdir=./ -Xmx32g -jar $GATK_JAR -R ${reffa} -D ${dbsnp} -T GenotypeGVCFs -o gatk.vcf -nt $SLURM_CPUS_ON_NODE $gvcflist vcf-annotate -n --fill-type gatk.vcf | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.gatk.vcf.gz - tabix ${pair_id}.gatk.vcf.gz -elif -then [[ $algo == 'platypus' ]] +elif [[ $algo == 'platypus' ]] +then module load platypus/gcc/0.8.1 $bamlist = join_by , *.bam Platypus.py callVariants --minMapQual=10 --mergeClusteredVariants=1 --nCPU=$SLURM_CPUS_ON_NODE --bamFiles=${bamlist} --refFile=${reffa} --output=platypus.vcf diff --git a/variants/union.sh b/variants/union.sh new file mode 100644 index 0000000000000000000000000000000000000000..640ddfc8162e71e70e161ba0a8fd79b491d8d394 --- /dev/null +++ b/variants/union.sh @@ -0,0 +1,66 @@ +#!/bin/bash +#union.sh + +usage() { + echo "-h Help documentation for gatkrunner.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-p --Prefix for output file name" + echo "-a --Algorithm/Command" + echo "Example: bash hisat.sh -p prefix -r /path/GRCh38" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:p:h opt +do + case $opt in + r) index_path=$OPTARG;; + p) pair_id=$OPTARG;; + h) usage;; + esac +done +function join_by { local IFS="$1"; shift; echo "$*"; } +shift $(($OPTIND -1)) + +module load gatk/3.7 python/2.7.x-anaconda bedtools/2.26.0 snpeff/4.3q samtools/1.6 vcftools/0.1.14 + +HS=*.hotspot.vcf.gz +list1=`ls *vcf.gz|grep -v hotspot` +list2=`ls *vcf.gz|grep -v hotspot` +calllist='' +for i in *.vcf.gz; do + EXT="${i#*.}" + CALL="${EXT%%.*}" + calllist+=" $CALL" + tabix $i + if [[ $i eq $HS ]] + then + bedtools multiinter -i $list1 |cut -f 1,2,3 |bedtools intersect -header -v -a $i -b stdin |bgzip > hotspot.nooverlap.vcf.gz + tabix hotspot.nooverlap.vcf.gz + list2+=" hotspot.nooverlap.vcf.gz" + varlist+=" --variant:$CALL hotspot.nooverlap.vcf.gz" + else + varlist+=" --variant:$CALL $i" + fi +done + +bedtools multiinter -i $list2 -names $calllist | cut -f 1,2,3,5 | bedtools sort -i stdin | bedtools merge -c 4 -o distinct > ${pair_id}_integrate.bed + +priority='ssvar' +if [[ *.platypus.vcf.gz ]] +then + priority+=',platypus' +fi +priority+=',sam,gatk' +if [[ *.hotspot.vcf.gz ]] +then + priority+=',hotspot' +fi + +java -Xmx32g -jar $GATK_JAR -R ${index_path}/genome.fa -T CombineVariants --filteredrecordsmergetype KEEP_UNCONDITIONAL $varlist -genotypeMergeOptions PRIORITIZE -priority $priority -o ${pair_id}.int.vcf + +perl $baseDir/scripts/uniform_integrated_vcf.pl ${pair_id}.int.vcf +bgzip ${pair_id}_integrate.bed +tabix ${pair_id}_integrate.bed.gz +bgzip ${pair_id}.uniform.vcf +tabix ${pair_id}.uniform.vcf.gz +bcftools annotate -a ${pair_id}_integrate.bed.gz --columns CHROM,FROM,TO,CallSet -h ${index_path}/CallSet.header ${pair_id}.uniform.vcf.gz | bgzip > ${pair_id}.union.vcf.gz