#!/bin/bash
#run_somatic_caller.sh


usage(){
  echo "-h --Help documentation for run_somatic_caller.sh"
  echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet"
  echo "-r  --Path to Reference Genome with the file genome.fa"
  echo "-n --Normal"
  echo "-t --Tumor"
  echo "-p -- PairID"
  echo "-x --NormalID"
  echo "-y --TumorID"
  echo "-i --NormalBAM used for Mantra in the case of UMI consensus"
  echo "-j --TumorBAM used for Mantra in the case of UMI consensus"
  echo "-b --TargetBed"
  
  echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam"
  exit 1
}

OPTIND=1 # Reset OPTIND
while getopts :n:t:p:r:x:y:i:j:a:b:h opt
do
    case $opt in 
      r) index_path=$OPTARG;;
      x) tid=$OPTARG;;
      y) nid=$OPTARG;;
      p) pair_id=$OPTARG;;
      n) normal=$OPTARG;;
      t) tumor=$OPTARG;;
      i) mnormal=$OPTARG;;
      j) mtumor=$OPTARG;;
      a) algo=$OPTARG;;
      b) tbed=$OPTARG;; 
      h) usage;;
    esac
done

shift $(($OPTIND -1))

#Check for mandatory options
if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then
    echo $normal $tumor $algo
    usage
fi 
if [[ -z $SLURM_CPUS_ON_NODE ]] 
  then 
    SLURM_CPUS_ON_NODE=1
fi
#pair_id=${tid}_${nid}
if [[ -z $mtumor ]]
then
    mtumor=tumor
    mnormal=normal
fi

if [[ -a "${index_path}/genome.fa" ]]
then
    reffa="${index_path}/genome.fa"
    dict="${index_path}/genome.dict"
else 
    echo "Missing Fasta File: ${index_path}/genome.fa"
    usage
fi
if [[ -a "${index_path}/dbSnp.vcf.gz" ]]
then
    dbsnp="${index_path}/dbSnp.vcf.gz"
else 
    echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz"
    usage
fi

if [[ -a "${index_path}/cosmic.vcf.gz" ]]
then
    cosmic=${index_path}/cosmic.vcf.gz
else 
    echo "Missing InDel File: ${index_path}/cosmic.vcf.gz"
    usage
fi
baseDir="`dirname \"$0\"`"

source /etc/profile.d/modules.sh


if [ $algo == 'strelka2' ]
  then
    module load strelka/2.8.3 samtools/1.6 manta/1.2.0 snpeff/4.3q vcftools/0.1.14
    mkdir manta strelka
    configManta.py --normalBam ${mnormal} --tumorBam ${mtumor} --referenceFasta ${reffa} --runDir manta
    manta/runWorkflow.py -m local -j 8
    configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka
    strelka/runWorkflow.py -m local -j 8
    vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz
fi
if [ $algo == 'virmid' ]
  then 
    module load virmid/1.2 samtools/1.6 vcftools/0.1.14
    virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $SLURM_CPUS_ON_NODE -M 2000 -c1 10 -c2 10
    perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf
    perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf
    module load snpeff/4.3q
    vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz
fi

if [ $algo == 'speedseq' ]
  then 
    module load snpeff/4.3q speedseq/20160506 samtools/1.6 vcftools/0.1.14
    speedseq somatic -q 10 -t $SLURM_CPUS_ON_NODE -o sssom ${reffa} ${normal} ${tumor}
    vcf-annotate -H -n --fill-type sssom.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter '((QUAL >= 10) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.sssom.vcf.gz
fi

if [ $algo == 'mutect2' ]
then
  module load parallel gatk/3.7 snpeff/4.3q samtools/1.6 vcftools/0.1.14
  if [ -z ${tbed} ]
  then
      cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
  else
      awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}"
  fi	 
  vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz
fi

if [ $algo == 'varscan' ]
then
  module load samtools/1.6 VarScan/2.4.2 speedseq/20160506 vcftools/0.1.14
  sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${tumor} --samtools "-C 50 -f ${reffa}"  > t.mpileup
  sambamba mpileup --tmpdir=./ -t $SLURM_CPUS_ON_NODE ${normal} --samtools "-C 50 -f ${reffa}"  > n.mpileup
  VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1
  VarScan copynumber n.mpileup t.mpileup vscancnv
  module load snpeff/4.3q 
  vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip >  ${pair_id}.varscan.vcf.gz
fi

if [ $algo == 'shimmer' ]
then
    module load snpeff/4.3q shimmer/0.1.1 samtools/1.6  vcftools/0.1.14
    shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err
    perl $baseDir/add_readct_shimmer.pl
    vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz
fi

if [ $algo == 'lancet' ]
then
    module load snpeff/4.3q lancet samtools/1.6 vcftools/0.1.14
    lancet --tumor ${tumor} --normal ${normal} --ref $reffa -B $tbed --num-threads $SLURM_CPUS_ON_NODE > lancet.vcf
    vcf-sort lancet.vcf | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter "((FILTER = 'PASS') | (FILTER = 'LowVafTumor') | (FILTER = 'LowVafTumor;LowAltCntTumor')) & (GEN[*].DP >= 10)" |bgzip > ${pair_id}.lancet.vcf.gz
fi