diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 09e14265bc3d3c5b5464afdcd7152c1f4e1f19bb..d3d57d2968e3806f3f697c3b40abef11518fe844 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -69,8 +69,11 @@ fi if [[ -n $tbed ]] then interval=$tbed + awk '{print $1":"$2"-"$3}' $tbed > fbsplit.genomefile.txt + fbsplit=fbsplit.genomefile.txt else interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` + fbsplit="${index_path}/genomefile.5M.txt" fi source /etc/profile.d/modules.sh @@ -97,7 +100,7 @@ then for i in *.bam; do bamlist="$bamlist --bam ${PWD}/${i}" done - cut -f 1 ${index_path}/genomefile.5M.txt | parallel --delay 2 -j $NPROC "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" + cut -f 1 $fbsplit | parallel --delay 1 --jobs 0 "freebayes -f ${index_path}/genome.fa --min-mapping-quality 0 --min-base-quality 20 --min-coverage 10 --min-alternate-fraction 0.01 -C 3 --use-best-n-alleles 3 -r {} ${bamlist} > fb.{}.vcf" vcf-concat fb.*.vcf | vcf-sort | vcf-annotate -n --fill-type | bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.fb.vcf.gz - elif [[ $algo == 'platypus' ]] then diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 40843d0d50000b417506e757acaa7dd7dd4fd18f..a083bc01096a456ae7b5a6161c53b29dbb0ee0f8 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -1,40 +1,36 @@ #!/bin/bash #run_somatic_caller.sh - usage(){ - echo "-h --Help documentation for run_somatic_caller.sh" - echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet" - echo "-r --Path to Reference Genome with the file genome.fa" - echo "-n --Normal" - echo "-t --Tumor" - echo "-p -- PairID" - echo "-x --NormalID" - echo "-y --TumorID" - echo "-i --NormalBAM used for Mantra in the case of UMI consensus" - echo "-j --TumorBAM used for Mantra in the case of UMI consensus" - echo "-b --TargetBed" - - echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam" - exit 1 + echo "-h --Help documentation for run_somatic_caller.sh" + echo "-a --Somatic Workflow Method: strelka2, virmid, speedseq, mutect2, varscan, shimmer, lancet" + echo "-r --Path to Reference Genome with the file genome.fa" + echo "-n --Normal" + echo "-t --Tumor" + echo "-p -- PairID" + echo "-x --NormalID" + echo "-y --TumorID" + echo "-i --NormalBAM used for Mantra in the case of UMI consensus" + echo "-j --TumorBAM used for Mantra in the case of UMI consensus" + echo "-b --TargetBed" + echo "Example: bash somatic_vc.sh -a strelka2 -p subj1 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam" + exit 1 } OPTIND=1 # Reset OPTIND while getopts :n:t:p:r:x:y:i:j:q:a:b:h opt do case $opt in - r) index_path=$OPTARG;; - x) tid=$OPTARG;; - y) nid=$OPTARG;; - p) pair_id=$OPTARG;; - n) normal=$OPTARG;; - t) tumor=$OPTARG;; - i) mnormal=$OPTARG;; - j) mtumor=$OPTARG;; - a) algo=$OPTARG;; - b) tbed=$OPTARG;; - q) pon==$OPTARG;; - h) usage;; + r) index_path=$OPTARG;; + x) tid=$OPTARG;; + y) nid=$OPTARG;; + p) pair_id=$OPTARG;; + n) normal=$OPTARG;; + t) tumor=$OPTARG;; + a) algo=$OPTARG;; + b) tbed=$OPTARG;; + q) pon==$OPTARG;; + h) usage;; esac done @@ -47,20 +43,14 @@ if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then fi NPROC=$SLURM_CPUS_ON_NODE if [[ -z $NPROC ]] - then +then NPROC=`nproc` fi -#pair_id=${tid}_${nid} -if [[ -z $mtumor ]] -then - mtumor=tumor - mnormal=normal -fi + +ponopt=''; if [[ -f $pon ]] then ponopt="--pon $pon" -else - ponopt=''; fi if [[ -a "${index_path}/genome.fa" ]] @@ -105,32 +95,40 @@ fi source /etc/profile.d/modules.sh -module load htslib/gcc/1.8 +module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH if [ $algo == 'strelka2' ] then - module load strelka/2.9.10 manta/1.3.1 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 + module load strelka/2.9.10 manta/1.3.1 opt='' if [[ -n $tbed ]] then - opt="--callRegions ${tbed}.gz" + if [[ -f $tbed ]] + then + opt="--callRegions ${tbed}.gz" + else + cp $tbed targetpanel.bed + bgzip targetpanel.bed + tabix targetpanel.bed.gz + opt="--callRegions targetpanel.bed.gz" + fi fi mkdir manta configManta.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} $opt --runDir manta manta/runWorkflow.py -m local -j 8 + mantaopt='' if [[ -f manta/results/variants/candidateSmallIndels.vcf.gz ]] then - configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz --runDir strelka - else - configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --indelCandidates --runDir strelka + mantaopt="--indelCandidates manta/results/variants/candidateSmallIndels.vcf.gz" fi + configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --runDir strelka $mantaopt strelka/runWorkflow.py -m local -j 8 vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz fi -if [ $algo == 'virmid' ] - then - module load virmid/1.2 samtools/gcc/1.8 vcftools/0.1.14 +elif [ $algo == 'virmid' ] +then + module load virmid/1.2 virmid -R ${reffa} -D ${tumor} -N ${normal} -s ${cosmic} -t $NPROC -M 2000 -c1 10 -c2 10 perl $baseDir/addgt_virmid.pl ${tumor}.virmid.som.passed.vcf perl $baseDir/addgt_virmid.pl ${tumor}.virmid.loh.passed.vcf @@ -139,24 +137,23 @@ if [ $algo == 'virmid' ] vcf-concat *gt.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((NDP >= 10) & (DDP >= 10))' | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.virmid.vcf.gz elif [ $algo == 'mutect' ] then - gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz - module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 - java -XX:ParallelGCThreads=$NPROC -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} - gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval - vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz + module load gatk/4.1.4.0 picard/2.10.3 + gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval + vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then - module load bcftools/gcc/1.8 samtools/gcc/1.8 VarScan/2.4.2 vcftools/0.1.14 - module rm java/oracle/jdk1.7.0_51 - module load snpeff/4.3q - samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup - samtools mpileup -C 50 -f ${reffa} $normal > n.mpileup - VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1 - VarScan copynumber n.mpileup t.mpileup vscancnv - vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz + module load bcftools/gcc/1.8 VarScan/2.4.2 + module rm java/oracle/jdk1.7.0_51 + module load snpeff/4.3q + samtools mpileup -C 50 -f ${reffa} $tumor > t.mpileup + samtools mpileup -C 50 -f ${reffa} $normal > n.mpileup + VarScan somatic n.mpileup t.mpileup vscan --output-vcf 1 + VarScan copynumber n.mpileup t.mpileup vscancnv + vcf-concat vscan*.vcf | vcf-sort | vcf-annotate -n --fill-type -n | java -jar $SNPEFF_HOME/SnpSift.jar filter '((exists SOMATIC) & (GEN[*].DP >= 10))' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.varscan.vcf.gz elif [ $algo == 'shimmer' ] then - module load samtools/gcc/1.8 R/3.6.1-gccmkl vcftools/0.1.14 + module load R/3.6.1-gccmkl shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err perl $baseDir/add_readct_shimmer.pl module rm java/oracle/jdk1.7.0_51 diff --git a/variants/svcalling.sh b/variants/svcalling.sh index be79e49f437762ffd939f4b7a5b89d8dcbd6b150..a14ff60fb957b42ee016da4cebbe574844da5080 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -11,19 +11,20 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:p:b:i:x:y:n:l:a:hf opt +while getopts :r:p:b:t:x:c:y:n:l:a:hf opt do case $opt in r) index_path=$OPTARG;; p) pair_id=$OPTARG;; - b) sbam=$OPTARG;; - i) tumor=$OPTARG;; + t) tumor=$OPTARG;; n) normal=$OPTARG;; a) method=$OPTARG;; x) tid=$OPTARG;; y) nid=$OPTARG;; f) filter=1;; - l) bed=$OPTARG;; + b) sbam=$OPTARG;; + c) tbed=$OPTARG;; + l) itdbed=$OPTARG;; h) usage;; esac done @@ -152,14 +153,19 @@ then echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config samtools index -@ $NPROC $i done - pindel -T $NPROC -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP + bedopt='' + if [[ -f $tbed ]] + then + bedopt="-j $tbed" + fi + pindel -T $NPROC -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --report_inversions false $bedopt pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz tabix pindel.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel.sv.vcf.gz if [[ $filter == 1 ]] then @@ -178,7 +184,7 @@ then elif [[ $method == 'itdseek' ]] then stexe=`which samtools` - samtools view -@ $NPROC -L ${bed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz + samtools view -@ $NPROC -L ${itdbed} ${sbam} | itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | java -Xmx30g -jar $SNPEFF_HOME/SnpSift.jar filter "( LEN < 10000 )" | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz