diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index 1656e5bc6f7eb7b320eb8f72f24d4f24699bdc88..bfc8c54e77a13893ebcc8b2440bafac09ae821ac 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -35,7 +35,7 @@ shift $(($OPTIND -1)) #fi source /etc/profile.d/modules.sh -module load samtools/1.6 fastqc/0.11.5 +module load samtools/gcc/1.8 fastqc/0.11.5 samtools flagstat ${sbam} > ${pair_id}.flagstat.txt fastqc -f bam ${sbam} baseDir="`dirname \"$0\"`" diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index af4d9a839d06e54ad10ed9c0608d292d8d89b281..cac762d61e7117fc2070f796d0e8adc4b85eadcb 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -63,10 +63,10 @@ fi bwa mem -M -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${read_group}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${read_group}" ${index_path}/genome.fa $file_opt > out.sam -if [[ $umi == 'umi' ]] && [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] +if [[ $umi == 'umi' ]] && [[ -f "${index_path}/genome.fa.alt" ]] then k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | python ${baseDir}/add_umi_sam.py -s - -o output.unsort.bam -elif [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] +elif [[ "${index_path}/genome.fa.alt" ]] then k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam| samtools view -1 - > output.unsort.bam elif [[ $umi == 'umi' ]] diff --git a/alignment/filter_genefusions.pl b/alignment/filter_genefusions.pl index e110246e2270565037e2d2ba4000e16c91c1bd1f..aba4e7247d541f7d8e2287ac9c9913652ae7af6f 100755 --- a/alignment/filter_genefusions.pl +++ b/alignment/filter_genefusions.pl @@ -4,25 +4,16 @@ use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); my %opt = (); -my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h'); -my %entrez; -open ENT, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!; -my $headline = <ENT>; -while (my $line = <ENT>) { - chomp($line); - my @row = split(/\t/,$line);{ - $entrez{$row[2]} = $row[1]; - } -} +my $results = GetOptions (\%opt,'fusion|f=s','prefix|p=s','help|h','datadir|r=s'); -open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/known_genefusions.txt" or die $!; +open OM, "<$opt{datadir}/known_genefusions.txt" or die $!; while (my $line = <OM>) { chomp($line); $known{$line} = 1; } close OM; -open OM, "</project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/panelgenes.txt" or die $!; +open OM, "<$opt{datadir}/panelgenes.txt" or die $!; while (my $line = <OM>) { chomp($line); $keep{$line} = 1; @@ -58,15 +49,11 @@ foreach $efile (@exonfiles) { } open OAS, ">$opt{prefix}\.translocations.answer.txt" or die $!; -open OUTIR, ">$opt{prefix}\.cbioportal.genefusions.txt" or die $!; print OAS join("\t","FusionName","LeftGene","LefttBreakpoint","LeftGeneExons","LeftStrand", "RightGene","RightBreakpoint","RightGeneExons","RightStrand", "RNAReads","DNAReads","FusionType","Annot",'Filter','ChrType','ChrDistance'),"\n"; -print OUTIR join("\t","Hugo_Symbol","Entrez_Gene_Id","Center","Tumor_Sample_Barcode", - "Fusion","DNA_support","RNA_support","Method","Frame"),"\n"; - my $sname = $opt{prefix}; open FUSION, "<$opt{fusion}" or die $!; @@ -132,12 +119,7 @@ while (my $line = <FUSION>) { print OAS join("\t",$fname,$hash{LeftGene},$hash{LeftBreakpoint},$leftexon,$hash{LeftStrand}, $hash{RightGene},$hash{RightBreakpoint},$rightexon,$hash{RightStrand}, $hash{SumRNAReads},0,lc($hash{PROT_FUSION_TYPE}),$fusion_annot,$qc,$chrtype,$chrdist),"\n"; - print OUTIR join("\t",$hash{LeftGene},$entrez{$hash{LeftGene}},"UTSW",$sname,$fname." fusion", - $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n"; - print OUTIR join("\t",$hash{RightGene},$entrez{$hash{RightGene}},"UTSW",$sname,$fname." fusion", - $dna_support,$rna_support,"STAR Fusion",lc($hash{PROT_FUSION_TYPE})),"\n"; } close OAS; -close OUTIR; diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 4b897a26be38f1f93717c3f540509014eb81b2c7..91bcc0a0693d36175d27cde1020742c788fb93e7 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -10,12 +10,13 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :a:b:p:h opt +while getopts :a:b:r:p:h opt do case $opt in a) algo=$OPTARG;; b) sbam=$OPTARG;; p) pair_id=$OPTARG;; + r) index_path=$OPTARG;; h) usage;; esac done @@ -31,11 +32,16 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi +if [[ -z $index_path ]] +then + index_path="/project/shared/bicf_workflow_ref/human/grch38_cloud/dnaref" +fi + baseDir="`dirname \"$0\"`" testexe='/project/shared/bicf_workflow_ref/seqprg/bin' source /etc/profile.d/modules.sh -module load picard/2.10.3 samtools/gcc/1.8 +module load picard/2.10.3 if [ $algo == 'sambamba' ] then @@ -44,6 +50,7 @@ then touch ${pair_id}.dedup.stat.txt elif [ $algo == 'samtools' ] then + module load samtools/gcc/1.8 samtools markdup -s --output-fmt BAM -@ $SLURM_CPUS_ON_NODE sort.bam ${pair_id}.dedup.bam touch ${pair_id}.dedup.stat.txt elif [ $algo == 'picard' ] @@ -54,7 +61,7 @@ then java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt elif [ $algo == 'fgbio_umi' ] then - module load fgbio bwa/intel/0.7.15 + module load fgbio bwakit/0.7.15 bwa/intel/0.7.17 samtools/gcc/1.8 samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} fgbio --tmp-dir ./ GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0 fgbio --tmp-dir ./ CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' @@ -62,8 +69,8 @@ then samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R2.fastq - bwa mem -M -C -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/human/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz > out.sam - if [ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ] + bwa mem -M -C -t $SLURM_CPUS_ON_NODE -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" ${index_path}/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz > out.sam + if [[ ${index_path}/genome.fa.alt ]] then k8 ${testexe}/bwa-postalt.js -p tmphla ${index_path}/genome.fa.alt out.sam | samtools view -1 - > ${pair_id}.consensus.bam else @@ -73,4 +80,5 @@ then else cp ${sbam} ${pair_id}.dedup.bam fi +module load samtools/gcc/1.8 samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.dedup.bam diff --git a/alignment/rnaseqalign.sh b/alignment/rnaseqalign.sh index 963143fa2682ddb77e1d326148d36f906a9323f2..d044bda3ea5646968bb06609ec54eb3296816f61 100644 --- a/alignment/rnaseqalign.sh +++ b/alignment/rnaseqalign.sh @@ -59,9 +59,9 @@ else module load hisat2/2.1.0-intel if [ -s difffile ] then - hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/hisat_index/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt + hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -1 $fq1 -2 $fq2 -S out.sam --summary-file ${pair_id}.alignerout.txt else - hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/hisat_index/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt + hisat2 -p $SLURM_CPUS_ON_NODE --rg-id ${pair_id} --rg LB:tx --rg PL:illumina --rg PU:barcode --rg SM:${pair_id} --no-unal --dta -x ${index_path}/genome -U $fq1 -S out.sam --summary-file ${pair_id}.alignerout.txt fi if [[ $umi == 1 ]] then diff --git a/alignment/starfusion.sh b/alignment/starfusion.sh index fdde560708ff4bde1d8eb2776fed1eea624f3fba..81a58775d29d860f623557b5eb1c9321f423cf9e 100644 --- a/alignment/starfusion.sh +++ b/alignment/starfusion.sh @@ -14,7 +14,7 @@ OPTIND=1 # Reset OPTIND while getopts :r:a:b:p:m:fh opt do case $opt in - r) refgeno=$OPTARG;; + r) index_path=$OPTARG;; a) fq1=$OPTARG;; b) fq2=$OPTARG;; p) pair_id=$OPTARG;; @@ -49,13 +49,13 @@ then mkdir $tmphome fi export TMP_HOME=$tmphome - index_path=${refgeno}/CTAT_lib_trinity1.6 - trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${index_path} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion + refgeno=${index_path}/CTAT_lib_trinity1.6 + trinity /usr/local/src/STAR-Fusion/STAR-Fusion --min_sum_frags 3 --CPU $SLURM_CPUS_ON_NODE --genome_lib_dir ${refgeno} --left_fq ${fq1} --right_fq ${fq2} --examine_coding_effect --output_dir ${pair_id}_star_fusion cp ${pair_id}_star_fusion/star-fusion.fusion_predictions.abridged.coding_effect.tsv ${pair_id}.starfusion.txt else module add star/2.5.2b - index_path=${refgeno}/CTAT_lib/ - STAR-Fusion --genome_lib_dir ${index_path} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err + refgeno=${index_path}/CTAT_lib/ + STAR-Fusion --genome_lib_dir ${refgeno} --min_sum_frags 3 --left_fq ${fq1} --right_fq ${fq2} --output_dir ${pair_id}_star_fusion &> star_fusion.err cp ${pair_id}_star_fusion/star-fusion.fusion_candidates.final.abridged ${pair_id}.starfusion.txt fi @@ -66,8 +66,8 @@ cut -f 5-8 ${pair_id}.starfusion.txt |perl -pe 's/\^|:/\t/g' | awk '{print "sing if [[ $filter == 1 ]] then cut -f 6,8 ${pair_id}.starfusion.txt |grep -v Breakpoint |perl -pe 's/\t/\n/g' |awk -F ':' '{print $1"\t"$2-1"\t"$2}' > temp.bed - bedtools intersect -wao -a temp.bed -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt - perl $baseDir/filter_genefusions.pl -p ${pair_id} -f ${pair_id}.starfusion.txt + bedtools intersect -wao -a temp.bed -b ${index_path}/cytoBand.txt |cut -f 1,2,7 > cytoband_pos.txt + perl $baseDir/filter_genefusions.pl -p ${pair_id} -r ${index_path} -f ${pair_id}.starfusion.txt fi diff --git a/variants/annot_sv.pl b/obsolete/annot_sv.pl similarity index 100% rename from variants/annot_sv.pl rename to obsolete/annot_sv.pl diff --git a/genect_rnaseq/cBioPortal_documents.pl b/obsolete/cBioPortal_documents.pl similarity index 89% rename from genect_rnaseq/cBioPortal_documents.pl rename to obsolete/cBioPortal_documents.pl index 31f6803d2e6dd0bde08ced0313be1b4f62910801..79b4f0a4ab2c08ddd9d7a57c38e9402755e8032b 100644 --- a/genect_rnaseq/cBioPortal_documents.pl +++ b/obsolete/cBioPortal_documents.pl @@ -4,9 +4,9 @@ use Getopt::Long qw(:config no_ignore_case no_auto_abbrev); use File::Basename; -my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h'); +my $results= GetOptions (\%opt,'fpkm|f=s','logcpm|l=s','cnv|c=s','prefix|p=s','help|h','datadir|d=s'); -open ENT_GENE, "</project/shared/bicf_workflow_ref/human/gene_info.human.txt" or die $!; +open ENT_GENE, "<$datadir\/gene_info.human.txt" or die $!; my %entrez; my %entgene; my $ent_header = <ENT_GENE>; @@ -17,7 +17,7 @@ while (my $line = <ENT_GENE>){ #$entrez{$row[2]}=$row[1]; } close ENT_GENE; -open ENT_ENS, "</project/shared/bicf_workflow_ref/human/GRCh38/genenames.txt" or die $!; +open ENT_ENS, "<$datadir\/genenames.txt" or die $!; my $gn_header = <ENT_ENS>; my %ensym; while (my $line = <ENT_ENS>){ @@ -26,7 +26,7 @@ while (my $line = <ENT_ENS>){ $entrez{$row[3]}=$entrez{$row[4]}; } close ENT_ENS; -open ENT_ENS, "</project/shared/bicf_workflow_ref/human/gene2ensembl.human.txt" or die $!; +open ENT_ENS, "<$datadir\/gene2ensembl.human.txt" or die $!; my $ens_header = <ENT_ENS>; while (my $line = <ENT_ENS>){ chomp $line; diff --git a/variants/parse_svresults.pl b/obsolete/parse_svresults.pl similarity index 100% rename from variants/parse_svresults.pl rename to obsolete/parse_svresults.pl diff --git a/variants/somatic_callers.sh b/obsolete/somatic_callers.sh similarity index 100% rename from variants/somatic_callers.sh rename to obsolete/somatic_callers.sh diff --git a/variants/svannot.pl b/obsolete/svannot.pl similarity index 100% rename from variants/svannot.pl rename to obsolete/svannot.pl diff --git a/variants/uniform_integrated_vcf.pl b/obsolete/uniform_integrated_vcf.pl similarity index 100% rename from variants/uniform_integrated_vcf.pl rename to obsolete/uniform_integrated_vcf.pl diff --git a/variants/annotvcf.sh b/variants/annotvcf.sh index c493fc17e600be9985b0b6971b3335bf7eb3e1b5..c538256f148a8b31c92b8513dd670caeb27ad33c 100755 --- a/variants/annotvcf.sh +++ b/variants/annotvcf.sh @@ -10,12 +10,13 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:v:p:h opt +while getopts :r:v:g:p:h opt do case $opt in r) index_path=$OPTARG;; p) pair_id=$OPTARG;; v) unionvcf=$OPTARG;; + g) snpeffgeno=$OPTARG;; h) usage;; esac done @@ -23,35 +24,57 @@ done shift $(($OPTIND -1)) source /etc/profile.d/modules.sh -module load bedtools/2.26.0 samtools/1.6 snpeff/4.3q +module load bedtools/2.26.0 snpeff/4.3q -if [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38/hisat_index' ]] +if [[ $index_path == '/project/shared/bicf_workflow_ref/human/grch38_cloud/rnaref' ]] then - index_path='/project/shared/bicf_workflow_ref/human/GRCh38' -elif [[ $index_path == '/project/shared/bicf_workflow_ref/GRCh38' ]] + index_path='/project/shared/bicf_workflow_ref/human/grch38_cloud/dnaref' +fi +if [[ -z $snpeffgeno ]] then - index_path='/project/shared/bicf_workflow_ref/human/GRCh38' + snpeffgeno='GRCh38.86' fi - -if [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh38' ]] +if [[ -f "${index_path}/gnomad.txt.gz" ]] then tabix -f ${unionvcf} bcftools annotate -Oz -a ${index_path}/gnomad.txt.gz -h ${index_path}/gnomad.header -c CHROM,POS,REF,ALT,GNOMAD_HOM,GNOMAD_AF,AF_POPMAX,GNOMAD_HG19_VARIANT -o ${pair_id}.gnomad.vcf.gz ${unionvcf} - tabix ${pair_id}.gnomad.vcf.gz - bcftools annotate -Oz -a ${index_path}/oncokb_hotspot.txt.gz -o ${pair_id}.oncohotspot.vcf.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot ${pair_id}.gnomad.vcf.gz - tabix ${pair_id}.oncohotspot.vcf.gz - bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h /project/shared/bicf_workflow_ref/RepeatType.header ${pair_id}.oncohotspot.vcf.gz - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.repeat.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz - | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info LEGACY_ID,CNT ${index_path}/cosmic.vcf.gz - | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz - | bgzip > ${pair_id}.annot.vcf.gz - tabix ${pair_id}.annot.vcf.gz -else - if [[ $index_path == '/project/shared/bicf_workflow_ref/mouse/GRCm38' ]] - then - snpeffvers='GRCm38.86' - elif [[ $index_path == '/project/shared/bicf_workflow_ref/human/GRCh37' ]] - then - snpeffvers='GRCh37.75' - fi - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffvers} ${unionvcf} |bgzip > ${pair_id}.annot.vcf.gz - tabix ${pair_id}.annot.vcf.gz + tabix -f ${pair_id}.gnomad.vcf.gz + unionvcf=${pair_id}.gnomad.vcf.gz +fi +if [[ -f "${index_path}/oncokb_hotspot.txt.gz" ]] +then + bcftools annotate -Oz -a ${index_path}/oncokb_hotspot.txt.gz -o ${pair_id}.oncohotspot.vcf.gz -h ${index_path}/oncokb_hotspot.header -c CHROM,FROM,TO,OncoKB_REF,OncoKB_ALT,Gene,OncoKB_ProteinChange,OncoKB_AF,OncoTree_Tissue,OncoTree_MainType,OncoTree_Code,OncoKBHotspot $unionvcf + tabix -f ${pair_id}.oncohotspot.vcf.gz + unionvcf=${pair_id}.oncohotspot.vcf.gz +fi +if [[ -f "${index_path}/repeat_regions.bed.gz" ]] +then + bcftools annotate -Oz -a ${index_path}/repeat_regions.bed.gz -o ${pair_id}.repeat.vcf.gz --columns CHROM,FROM,TO,RepeatType -h ${index_path}/RepeatType.header ${unionvcf} + unionvcf=${pair_id}.repeat.vcf.gz +fi + +acom="java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-downstream -no-upstream -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config $snpeffgeno ${unionvcf}" + +if [[ -f ${index_path}/dbSnp.vcf.gz ]] +then + acom+=" | java -jar $SNPEFF_HOME/SnpSift.jar annotate -id ${index_path}/dbSnp.vcf.gz -" fi +if [[ -f ${index_path}/clinvar.vcf.gz ]] +then + acom+=" | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info CLNSIG,CLNDSDB,CLNDSDBID,CLNDBN,CLNREVSTAT,CLNACC ${index_path}/clinvar.vcf.gz -" +fi +if [[ -f ${index_path}/cosmic.vcf.gz ]] +then + acom+=" | java -jar $SNPEFF_HOME/SnpSift.jar annotate -info LEGACY_ID,CNT ${index_path}/cosmic.vcf.gz -" +fi +if [[ -f ${index_path}/dbNSFP.txt.gz ]] +then + acom+=" | java -Xmx10g -jar $SNPEFF_HOME/SnpSift.jar dbnsfp -v -db ${index_path}/dbNSFP.txt.gz -" +fi + +acom+=" | bgzip > ${pair_id}.annot.vcf.gz " + +eval $acom + +tabix ${pair_id}.annot.vcf.gz diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index b8f1da3c19735fdd308edd95e74182c17b383abf..78365090c9676f5ae3d357aed73be54e8f30a259 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -84,5 +84,5 @@ else cnvkit.py scatter ${pair_id}.cnr -s ${pair_id}.call.cns -t --segment-color "blue" -o ${pair_id}.cnv.scatter.pdf fi -cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b /project/shared/bicf_workflow_ref/human/GRCh38/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed +cut -f 1,2,3 ${pair_id}.call.cns | grep -v chrom | bedtools intersect -wao -b ${index_path}/cytoBand.txt -a stdin |cut -f 1,2,3,7 > ${pair_id}.cytoband.bed perl $baseDir/filter_cnvkit.pl -s ${pair_id}.call.cns diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index a8f6ee49c08c4a742943411c683292aeb4f1dbcc..018f765406f4ba2e7fe5653f805807794452a7df 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -37,6 +37,7 @@ fi if [[ -s "${index_path}/dbSnp.vcf.gz" ]] then dbsnp="${index_path}/dbSnp.vcf.gz" + gatk4_dbsnp="${index_path}/dbSnp.gatk4.vcf.gz" else echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz" usage @@ -56,11 +57,11 @@ else echo "Missing Fasta File: ${index_path}/genome.fa" usage fi -if [[ -z $pon ]] +if [[ -f $pon ]] then - ponopt=''; -else ponopt="--pon $pon" +else + ponopt=''; fi source /etc/profile.d/modules.sh @@ -99,7 +100,6 @@ then bcftools norm -c s -f ${reffa} -w 10 -O z -o ${pair_id}.platypus.vcf.gz platypus.vcf.gz elif [[ $algo == 'gatk' ]] then - gatk4_dbsnp=/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/dbSnp.gatk4.vcf.gz user=$USER module load gatk/4.1.4.0 gvcflist='' @@ -120,14 +120,14 @@ elif [ $algo == 'mutect2' ] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 - for i in *.bam; do - prefix="${i%.bam}" - echo ${prefix} - java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${i} O=artifact_metrics.txt R=${reffa} - gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} --enable-all-annotations -I ${i} --output ${prefix}.mutect.vcf - gatk --java-options "-Xmx20g" FilterMutectCalls -V ${prefix}.mutect.vcf -O ${prefix}.mutect.filt.vcf - vcf-sort ${prefix}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${prefix}.mutect.vcf.gz - done + bamlist='' + for i in *.bam; do + bamlist+="-I ${i} " + done + gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} ${bamlist} --output ${pair_id}.mutect.vcf -RF AllowAllReadsReadFilter --independent-mates + gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${pair_id}.mutect.vcf -O ${pair_id}.mutect.filt.vcf + vcf-sort ${pair_id}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz + elif [[ $algo == 'strelka2' ]] then if [[ $rna == 1 ]] diff --git a/variants/itdseek.sh b/variants/itdseek.sh index d867bb70d5f0c340fd152192ccd4f56d6243d373..f40a33053f79f732a8f89585d0d19a4a85360a81 100755 --- a/variants/itdseek.sh +++ b/variants/itdseek.sh @@ -16,6 +16,7 @@ do b) sbam=$OPTARG;; p) pair_id=$OPTARG;; l) itdbed=$OPTARG;; + g) snpeffgeno=$OPTARG;; h) usage;; esac done @@ -33,6 +34,10 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi +if [[ -z $snpeffgeno ]] +then + snpeffgeno='GRCh38.86' +fi if [[ -a "${index_path}/genome.fa" ]] then @@ -52,4 +57,4 @@ stexe=`which samtools` samtools view -@ $SLURM_CPUS_ON_NODE -L ${itdbed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${itdbed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz -bcftools norm --fasta-ref $reffa -c w -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz +bcftools norm --fasta-ref $reffa -c w -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config $snpeffgeno - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz diff --git a/variants/pindel.sh b/variants/pindel.sh index 9eba0c20b45b2f48c5c9747250ca18330e04d581..294d7cd57175d1792a23be3bdc6eabd7bb524275 100755 --- a/variants/pindel.sh +++ b/variants/pindel.sh @@ -15,6 +15,7 @@ do r) index_path=$OPTARG;; p) pair_id=$OPTARG;; l) idtbed=$OPTARG;; + g) snpeffgeno=$OPTARG;; h) usage;; esac done @@ -46,8 +47,12 @@ fi source /etc/profile.d/modules.sh genomefiledate=`find ${reffa} -maxdepth 0 -printf "%TY%Tm%Td\n"` +if [[ -z $snpeffgeno ]] +then + snpeffgeno='GRCh38.86' +fi -module load samtools/1.6 pindel/0.2.5-intel snpeff/4.3q bedtools/2.26.0 +module load samtools/gcc/1.8 bcftools/gcc/1.8 htslib/gcc/1.8 pindel/0.2.5-intel snpeff/4.3q bedtools/2.26.0 touch ${pair_id}.pindel.config for i in *.bam; do sname=`echo "$i" |cut -f 1 -d '.'` @@ -60,13 +65,13 @@ cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 tabix pindel.vcf.gz bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz -java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz +java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz if [[ -a $idtbed ]] then - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf | bedtools intersect -header -b ${idtbed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${idtbed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz else - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz fi -java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz +java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index e11da3fb777692204d2a96c13a41da204aaa311d..d21321a0852fe3b9d0535fdc41273ea58aa67b85 100755 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -45,6 +45,7 @@ if [[ -z $normal ]] || [[ -z $tumor ]] || [[ -z $algo ]]; then echo $normal $tumor $algo usage fi + if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 @@ -55,11 +56,11 @@ then mtumor=tumor mnormal=normal fi -if [[ -z $pon ]] +if [[ -f $pon ]] then - ponopt=''; -else ponopt="--pon $pon" +else + ponopt=''; fi if [[ -a "${index_path}/genome.fa" ]] @@ -114,8 +115,8 @@ then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz module load gatk/4.1.4.0 picard/2.10.3 snpeff/4.3q samtools/gcc/1.8 vcftools/0.1.14 java -XX:ParallelGCThreads=$SLURM_CPUS_ON_NODE -Djava.io.tmpdir=./ -Xmx16g -jar $PICARD/picard.jar CollectSequencingArtifactMetrics I=${tumor} O=artifact_metrics.txt R=${reffa} - gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} --enable-all-annotations -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf - gatk --java-options "-Xmx20g" FilterMutectCalls -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf + gatk --java-options "-Xmx20g" Mutect2 $ponopt -R ${reffa} -I ${tumor} -tumor ${tid} -I ${normal} -normal ${nid} --output ${tid}.mutect.vcf + gatk --java-options "-Xmx20g" FilterMutectCalls -R ${reffa} -V ${tid}.mutect.vcf -O ${tid}.mutect.filt.vcf vcf-sort ${tid}.mutect.filt.vcf | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '(GEN[*].DP >= 10)' | bgzip > ${pair_id}.mutect.vcf.gz elif [ $algo == 'varscan' ] then diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 7579fa4cd4b03a8a9adff123c154a01a2ddc9eee..eedc0c9b8f13289abe96c7c3dac3a9f3495024f9 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -50,6 +50,10 @@ else usage fi +if [[ -z $snpeffgeno ]] +then + snpeffgeno='GRCh38.86' +fi source /etc/profile.d/modules.sh module load htslib/gcc/1.8 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 @@ -89,7 +93,7 @@ then #MERGE DELLY AND MAKE BED bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf bgzip delly.vcf - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 delly.vcf | bgzip > ${pair_id}.delly.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} delly.vcf | bgzip > ${pair_id}.delly.vcf.gz fi if [[ $method == 'svaba' ]] then @@ -99,7 +103,7 @@ then else /project/shared/bicf_workflow_ref/seqprg/svaba/bin/svaba run -p $SLURM_CPUS_ON_NODE -G ${reffa} -t ${sbam} -a ${pair_id} fi - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.svaba.unfiltered.somatic.sv.vcf | bgzip > ${pair_id}.svaba.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.svaba.unfiltered.somatic.sv.vcf | bgzip > ${pair_id}.svaba.vcf.gz fi if [[ $method == 'lumpy' ]] @@ -120,7 +124,7 @@ then else speedseq sv -t $SLURM_CPUS_ON_NODE -o lumpy -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed fi - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.lumpy.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} lumpy.sv.vcf.gz | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].DV >= 20 )" | bgzip > ${pair_id}.lumpy.vcf.gz fi if [[ $method == 'pindel' ]] then @@ -136,11 +140,11 @@ then pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz tabix pindel.vcf.gz - bash $baseDir/norm_annot.sh -p pindel -v pindel.vcf.gz + bash $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.dup.vcf | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.pindel_tandemdup.vcf.gz + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} ${pair_id}.sv.vcf | bgzip > ${pair_id}.pindel_sv.vcf.gz fi if [[ $method == 'itdseek' ]] then @@ -148,5 +152,5 @@ then samtools view -@ $SLURM_CPUS_ON_NODE -L ${bed} ${sbam} | /project/shared/bicf_workflow_ref/seqprg/itdseek-1.2/itdseek.pl --refseq ${reffa} --samtools ${stexe} --bam ${sbam} | vcf-sort | bedtools intersect -header -b ${bed} -a stdin | bgzip > ${pair_id}.itdseek.vcf.gz tabix ${pair_id}.itdseek.vcf.gz - bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz + bcftools norm --fasta-ref $reffa -m - -Ov ${pair_id}.itdseek.vcf.gz | java -Xmx30g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} - |bgzip > ${pair_id}.itdseek_tandemdup.vcf.gz fi diff --git a/variants/uni_norm_annot.sh b/variants/uni_norm_annot.sh index 6a09f64e64933ac2ce2a6e98a0944dabbce6eab9..2fa40ac622be0154274a97b1fa889134e96fb3dd 100755 --- a/variants/uni_norm_annot.sh +++ b/variants/uni_norm_annot.sh @@ -10,12 +10,13 @@ usage() { exit 1 } OPTIND=1 # Reset OPTIND -while getopts :r:p:v:h opt +while getopts :r:p:g:v:h opt do case $opt in r) index_path=$OPTARG;; p) pair_id=$OPTARG;; v) vcf=$OPTARG;; + g) snpeffgeno=$OPTARG;; h) usage;; esac done @@ -42,6 +43,6 @@ bgzip -f ${pair_id}.uniform.vcf j=${pair_id}.uniform.vcf.gz tabix -f $j bcftools norm --fasta-ref $reffa -m - -Oz $j -o ${pair_id}.norm.vcf.gz -bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz +bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz -g $snpeffgeno /project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub ${pair_id}.annot.vcf.gz -p -a -o ${pair_id}.vcf bgzip -f ${pair_id}.vcf