#!/bin/bash #run_somatic_caller.sh usage(){ echo "-h --Help documentation for run_somatic_caller.sh" echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet" echo "-r --Path to Reference Genome with the file genome.fa" echo "-n --Normal" echo "-t --Tumor" echo "-p -- PairID" echo "-x --NormalID" echo "-y --TumorID" echo "-i --NormalBAM used for Mantra in the case of UMI consensus" echo "-j --TumorBAM used for Mantra in the case of UMI consensus" echo "-b --TargetBed" echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam" exit 1 } OPTIND=1 # Reset OPTIND while getopts :n:t:p:r:x:y:i:j:a:b:h opt do case $opt in r) index_path=$OPTARG;; x) tid=$OPTARG;; y) nid=$OPTARG;; p) pair_id=$OPTARG;; n) normal=$OPTARG;; t) tumor=$OPTARG;; i) mnormal=$OPTARG;; j) mtumor=$OPTARG;; a) algo=$OPTARG;; b) tbed=$OPTARG;; h) usage;; esac done shift $(($OPTIND -1)) #Check for mandatory options if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then echo $normal $tumor $algo usage fi if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi #pair_id=${tid}_${nid} if [[ -z $mtumor ]] then mtumor=tumor mnormal=normal fi if [[ -a "${index_path}/genome.fa" ]] then reffa="${index_path}/genome.fa" dict="${index_path}/genome.dict" else echo "Missing Fasta File: ${index_path}/genome.fa" usage fi if [[ -a "${index_path}/dbSnp.vcf.gz" ]] then dbsnp="${index_path}/dbSnp.vcf.gz" else echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz" usage fi if [[ -a "${index_path}/cosmic.vcf.gz" ]] then cosmic=${index_path}/cosmic.vcf.gz else echo "Missing InDel File: ${index_path}/cosmic.vcf.gz" usage fi baseDir="`dirname \"$0\"`" source /etc/profile.d/modules.sh if [ $algo == 'strelka2' ] then module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14 mkdir manta strelka configManta.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --runDir manta manta/runWorkflow.py -m local -j 8 configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka strelka/runWorkflow.py -m local -j 8 vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz fi if [ $algo == 'virmid' ] then module load virmid/1.2 samtools/1.6 vcftools/0.1.14 virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $SLURM_CPUS_ON_NODE -M 2000 -c1 10 -c2 10 perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf module load snpeff/4.3q vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz fi if [ $algo == 'speedseq' ] then module load snpeff/4.3q speedseq/20160506 samtools/1.6 vcftools/0.1.14 speedseq somatic -q 10 -t $SLURM_CPUS_ON_NODE -o sssom ${reffa} ${normal} ${tumor} vcf-annotate -H -n --fill-type sssom.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.sssom.vcf.gz fi if [ $algo == 'mutect2' ] then module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14 if [ -z ${tbed} ] then cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" else awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" fi vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz fi if [ $algo == 'varscan' ] then module load samtools/1.6 VarScan/2.4.2 speedseq/20160506 vcftools/0.1.14 sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${tumor} --samtools "-C 50 -f ${reffa}" > t.mpileup sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${normal} --samtools "-C 50 -f ${reffa}" > n.mpileup VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1 VarScan copynumber n.mpileup t.mpileup vscancnv module load snpeff/4.3q vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz fi if [ $algo == 'shimmer' ] then module load snpeff/4.3q shimmer/0.1.1 samtools/1.6 vcftools/0.1.14 shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err perl $baseDir/add_readct_shimmer.pl vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz fi if [ $algo == 'lancet' ] then module load snpeff/4.3q lancet samtools/1.6 vcftools/0.1.14 lancet --tumor ${tumor} --normal ${normal} --ref $reffa -B $tbed --num-threads $SLURM_CPUS_ON_NODE > lancet.vcf vcf-sort lancet.vcf | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') | (FILTER = 'LowVafTumor') | (FILTER = 'LowVafTumor;LowAltCntTumor')) & (GEN[*].DP >= 10)" |bgzip > ${pair_id}.lancet.vcf.gz fi