#!/bin/bash #run_somatic_caller.sh usage(){ echo "-h --Help documentation for run_somatic_caller.sh" echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet" echo "-r --Path to Reference Genome with the file genome.fa" echo "-n --Normal" echo "-t --Tumor" echo "-p -- PairID" echo "-x --NormalID" echo "-y --TumorID" echo "-i --NormalBAM used for Mantra in the case of UMI consensus" echo "-j --TumorBAM used for Mantra in the case of UMI consensus" echo "-b --TargetBed" echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam" exit 1 } OPTIND=1 # Reset OPTIND while getopts :n:t:p:r:x:y:i:j:q:a:b:h opt do case $opt in r) index_path=$OPTARG;; x) tid=$OPTARG;; y) nid=$OPTARG;; p) pair_id=$OPTARG;; n) normal=$OPTARG;; t) tumor=$OPTARG;; a) algo=$OPTARG;; b) tbed=$OPTARG;; q) pon==$OPTARG;; h) usage;; esac done source /etc/profile.d/modules.sh module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH shift $(($OPTIND -1)) #Check for mandatory options if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then echo $normal $tumor $algo usage fi NPROC=$SLURM_CPUS_ON_NODE if [[ -z $NPROC ]] then NPROC=`nproc` fi ponopt=''; if [[ -f $pon ]] then ponopt="--pon $pon" fi if [[ -a "${index_path}/genome.fa" ]] then reffa="${index_path}/genome.fa" dict="${index_path}/genome.dict" else echo "Missing Fasta File: ${index_path}/genome.fa" usage fi baseDir="`dirname \"$0\"`" interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` if [[ -n $tbed ]] then interval=$tbed fi if [[ -z $tid ]] then tid=`samtools view -H ${tumor} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` fi if [[ -z $nid ]] then nid=`samtools view -H ${normal} |grep '^@RG' |perl -pi -e 's/\t/\n/g' |grep ID |cut -f 2 -d ':'` fi if [ $algo == 'strelka2' ] then module load strelka/2.9.10 manta/1.3.1 opt='' if [[ -n $tbed ]] then if [[ -f "${tbed}.gz" ]] then opt="--callRegions ${tbed}.gz" else cp $tbed panel.bed bgzip panel.bed tabix panel.bed.gz opt="--callRegions panel.bed.gz" fi fi mkdir manta configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} $opt --runDir manta manta/runWorkflow.py -m local -j 8 mantaopt='' if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]] then mantaopt="--indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz" fi configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --runDir strelka $mantaopt strelka/runWorkflow.py -m local -j 8 vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz elif [ $algo == 'virmid' ] then module load virmid/1.2 cosmic=${index_path}/cosmic.vcf.gz if [[ ! -f "${index_path}/cosmic.vcf.gz" ]] then echo "Missing InDel File: ${index_path}/cosmic.vcf.gz" usage fi virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $NPROC -M 2000 -c1 10 -c2 10 perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz elif [ $algo == 'mutect' ] then module load gatk/4.1.4.0 parallel/20150122 threads=`expr $NPROC / 2` gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval vcf-concat ${tid}.mutect.*vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then module load bcftools/gcc/1.8 VarScan/2.4.2 module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup samtools mpileup -C 50 -f ${reffa} $normal > n.mpileup VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1 VarScan copynumber n.mpileup t.mpileup vscancnv vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz elif [ $algo == 'shimmer' ] then module load R/3.6.1-gccmkl shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err perl $baseDir/add_readct_shimmer.pl module rm java/oracle/jdk1.7.0_51 module load snpeff/4.3q vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz fi