diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index d3d57d2968e3806f3f697c3b40abef11518fe844..1bc6a073ecd5049cf78e9385881e3cffae6ec561 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -66,14 +66,14 @@ else ponopt=''; fi +fbsplit="${index_path}/genomefile.5M.txt" +cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' > intervals.txt +interval=`cat intervals.txt | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` + if [[ -n $tbed ]] then + awk '{print $1":"$2"-"$3}' $tbed > intervals.txt interval=$tbed - awk '{print $1":"$2"-"$3}' $tbed > fbsplit.genomefile.txt - fbsplit=fbsplit.genomefile.txt -else - interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` - fbsplit="${index_path}/genomefile.5M.txt" fi source /etc/profile.d/modules.sh @@ -130,12 +130,13 @@ then elif [ $algo == 'mutect' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz - module load gatk/4.1.4.0 + module load gatk/4.1.4.0 parallel/20150122 + threads=`expr $NPROC / 2` bamlist='' for i in *.bam; do bamlist+="-I ${i} " done - gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` -L $interval + cut -f 1 intervals.txt | parallel --delay 1 --jobs $threads "gatk --java-options \"-Xmx20g\" Mutect2 $ponopt -R ${reffa} ${bamlist} -RF AllowAllReadsReadFilter --independent-mates --tmp-dir `pwd` --output ${pair_id}.mutect.{}.vcf -L {}" vcf-sort ${pair_id}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [[ $algo == 'strelka2' ]] then diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index a083bc01096a456ae7b5a6161c53b29dbb0ee0f8..be5a185b4f7d836b462b1e6112cf6934936300b1 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -34,6 +34,10 @@ do esac done +source /etc/profile.d/modules.sh +module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 +export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH + shift $(($OPTIND -1)) #Check for mandatory options @@ -78,11 +82,10 @@ else fi baseDir="`dirname \"$0\"`" +cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' > intervals.txt if [[ -n $tbed ]] then - interval=$tbed -else - interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` + awk '{print $1":"$2"-"$3}' $tbed > intervals.txt fi if [[ -z $tid ]] then @@ -94,10 +97,6 @@ then fi -source /etc/profile.d/modules.sh -module load htslib/gcc/1.8 samtools/gcc/1.8 snpeff/4.3q vcftools/0.1.14 -export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH - if [ $algo == 'strelka2' ] then module load strelka/2.9.10 manta/1.3.1 @@ -125,7 +124,6 @@ then configureStrelkaSomaticWorkflow.py --normalBam ${normal} --tumorBam ${tumor} --referenceFasta ${reffa} --targeted --runDir strelka $mantaopt strelka/runWorkflow.py -m local -j 8 vcf-concat strelka/results/variants/*.vcf.gz | vcf-annotate -n --fill-type -n |vcf-sort |java -jar $SNPEFF_HOME/SnpSift.jar filter "(GEN[*].DP >= 10)" | perl -pe "s/TUMOR/${tid}/g" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.strelka2.vcf.gz -fi elif [ $algo == 'virmid' ] then module load virmid/1.2 @@ -138,9 +136,10 @@ then elif [ $algo == 'mutect' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz - module load gatk/4.1.4.0 picard/2.10.3 - gatk --java-options "-Xmx20g" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf -L $interval - vcf-sort ${tid}.mutect.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + module load gatk/4.1.4.0 parallel/20150122 + threads=`expr $NPROC / 2` + cut -f 1 intervals.txt | parallel --delay 1 --jobs $threads "gatk --java-options \"-Xmx20g\" Mutect2 $ponopt --independent-mates -RF AllowAllReadsReadFilter -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.{}.vcf -L {}" + vcf-concat ${tid}.mutect.*vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then module load bcftools/gcc/1.8 VarScan/2.4.2