diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index c7097db2ec82487ef53d66b3e82647673b1ee592..e110246e2270565037e2d2ba4000e16c91c1bd1f 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -15,14 +15,14 @@ while (my $line = <ENT>) { } } -open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/utswv2_known_genefusions.txt" or die $!; +open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/known_genefusions.txt" or die $!; while (my $line = <OM>) { chomp($line); $known{$line} = 1; } close OM; -open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panel1410.genelist.txt" or die $!; +open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panelgenes.txt" or die $!; while (my $line = <OM>) { chomp($line); $keep{$line} = 1; diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index cc0d8dfda4ef497503d6853906da14eef945f845..fdde560708ff4bde1d8eb2776fed1eea624f3fba 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -51,7 +51,6 @@ then export TMP_HOME=$tmphome index_path=${refgeno}/CTAT_lib_trinity1.6 trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion - #cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.tsv ${pair_id}.starfusion.txt cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt else module add star/2.5.2b diff --git a/variants/filter_cnvkit.pl b/variants/filter_cnvkit.pl index e86ee768a88b5633cddc4ee05653e490b030df12..7bd4bbf73f26c15ed570dea7dfb8d0baef83df74 100755 --- a/variants/filter_cnvkit.pl +++ b/variants/filter_cnvkit.pl @@ -2,7 +2,7 @@ #parse_cnvkit_table.pl my $refdir = '/project/shared/bicf_workflow_ref/human/GRCh38/'; -open OM, "<$refdir\/clinseq_prj/panel1410.genelist.txt" or die $!; +open OM, "<$refdir\/clinseq_prj/panelgenes.txt" or die $!; while (my $line = <OM>) { chomp($line); $keep{$line} = 1; diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index 62006d27ea31845de401c8be6195adba62412a0b..4550ad90a63dcfd6f67f020bd48744418a8356a8 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -56,6 +56,12 @@ else echo "Missing Fasta File: ${index_path}/genome.fa" usage fi +if [[ -z $pon ]] +then + ponopt=''; +else + ponopt="--pon $pon" +fi source /etc/profile.d/modules.sh module load python/2.7.x-anaconda picard/2.10.3 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 parallel @@ -118,8 +124,8 @@ then prefix="${i%.bam}" echo ${prefix} java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${i} O=artifact_metrics.txt R=${reffa} - gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${i} --output ${prefix}.mutect.vcf - gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${prefix}.mutect.vcf -O ${prefix}.mutect.filt.vcf + gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} --enable-all-annotations -I ${i} --output ${prefix}.mutect.vcf + gatk --java-options "-Xmx20g" FilterMutectCalls -V ${prefix}.mutect.vcf -O ${prefix}.mutect.filt.vcf vcf-sort ${prefix}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${prefix}.mutect.vcf.gz done elif [[ $algo == 'strelka2' ]] diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index e6d6c9007e78448440267f1b90b702a26063f9ea..0d336ef121fe0d04b5fddff7d7e4eb5fad86062b 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -114,8 +114,8 @@ then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} - gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" Mutect2 $ponopt -R ${reffa} -A FisherStrand -A QualByDepth -A StrandArtifact -A DepthPerAlleleBySample --enable_strand_artifact_filter -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf - gatk --java-options "-Xmx20g -Djava.io.tmpdir=./" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} --enable-all-annotations -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf + gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 881eefde49010df1ee7c2c0a9ec911628e7977cc..bbd1eaad0f279b0c9fe19d08fd686329944fcea8 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -11,7 +11,7 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:p:b:i:n:a:h opt +while getopts :r:p:b:i:x:y:n:l:a:h opt do case $opt in r) index_path=$OPTARG;; @@ -20,6 +20,9 @@ do i) tumor=$OPTARG;; n) normal=$OPTARG;; a) method=$OPTARG;; + x) tid=$OPTARG;; + y) nid=$OPTARG;; + l) bed=$OPTARG;; h) usage;; esac done @@ -49,17 +52,17 @@ else fi source /etc/profile.d/modules.sh -module load samtools/1.6 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 -mkdir temp +module load htslib/gcc/1.8 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 if [[ $method == 'delly' ]] then - module load delly2/v0.7.7-multi samtools/1.6 snpeff/4.3q + mkdir temp + module load delly2/v0.7.7-multi if [[ -n ${normal} ]] then #RUN DELLY - echo -e "${normal}\tcontrol"> samples.tsv - echo -e "${tumor}\ttumor" >> samples.tsv + echo -e "${nid}\tcontrol"> samples.tsv + echo -e "${tid}\ttumor" >> samples.tsv delly2 call -t BND -o delly_translocations.bcf -q 30 -g ${reffa} ${sbam} ${normal} delly2 call -t DUP -o delly_duplications.bcf -q 30 -g ${reffa} ${sbam} ${normal} delly2 call -t INV -o delly_inversions.bcf -q 30 -g ${reffa} ${sbam} ${normal} @@ -86,7 +89,17 @@ then #MERGE DELLY AND MAKE BED bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf bgzip delly.vcf - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 delly.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 20 )" | bgzip > ${pair_id}.sv.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 delly.vcf | bgzip > ${pair_id}.delly.vcf.gz +fi +if [[ $method == 'svaba' ]] +then + if [[ -n ${normal} ]] + then + /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $SLURM_CPUS_ON_NODE -G ${reffa} -t ${sbam} -n ${normal} -a ${pair_id} + else + /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $SLURM_CPUS_ON_NODE -G ${reffa} -t ${sbam} -a ${pair_id} + fi + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.svaba.unfiltered.somatic.sv.vcf | bgzip > ${pair_id}.svaba.vcf.gz fi if [[ $method == 'lumpy' ]] @@ -107,5 +120,33 @@ then else speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed fi - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.sv.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.lumpy.vcf.gz +fi +if [[ $method == 'pindel' ]] +then + module load pindel/0.2.5-intel + genomefiledate=`find ${reffa} -maxdepth 0 -printf "%TY%Tm%Td\n"` + touch ${pair_id}.pindel.config + for i in *.bam; do + sname=`echo "$i" |cut -f 1 -d '.'` + echo -e "${i}\t400\t${sname}" >> ${pair_id}.pindel.config + samtools index -@ $SLURM_CPUS_ON_NODE $i + done + pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP + pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf + cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz + tabix pindel.vcf.gz + bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz + perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz +fi +if [[ $method == 'itdseek' ]] +then + stexe=`which samtools` + samtools view -@ $SLURM_CPUS_ON_NODE -L ${bed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz + + tabix ${pair_id}.itdseek.vcf.gz + bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz fi