diff --git a/alignment/dnaseqalign.sh b/alignment/dnaseqalign.sh index 7b9fca4142c1c7125508f1299db82bcfdc36293b..666ba1e53078ed0d9a838ea28ebb3503959dc7af 100644 --- a/alignment/dnaseqalign.sh +++ b/alignment/dnaseqalign.sh @@ -46,7 +46,7 @@ fi testexe='/project/shared/bicf_workflow_ref/seqprg/bin' source /etc/profile.d/modules.sh -module load bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 +module load python/2.7.x-anaconda bwakit/0.7.15 samtools/gcc/1.8 picard/2.10.3 baseDir="`dirname \"$0\"`" diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 4d23d7c5067c8607b9a8a75b9974d129f076dad9..8322057a653400904eaf4bf2c8eeb1ecd0e3e7c0 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -26,7 +26,7 @@ done shift $(($OPTIND -1)) -index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/' +index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj' # Check for mandatory options if [[ -z $pair_id ]] || [[ -z $sbam ]]; then @@ -38,7 +38,7 @@ then fi baseDir="`dirname \"$0\"`" -if [[ $capture == '${index_path}/UTSWV2.bed' ]] +if [[ $capture == "${index_path}/UTSWV2.bed" ]] then normals="${index_path}/UTSWV2.normals.cnn" targets="${index_path}/UTSWV2.cnvkit_" @@ -46,11 +46,11 @@ then then normals="${index_path}/UTSWV2.uminormals.cnn" fi -elif [[ $capture == '${index_path}/UTSWV2_2.panelplus.bed' ]] +elif [[ $capture == "${index_path}/UTSWV2_2.panelplus.bed" ]] then normals="${index_path}/panelofnormals.panel1385V2_2.cnn" targets="${index_path}/panel1385V2-2.cnvkit_" -elif [[ $capture == '${index_path}/hemepanelV3.bed' ]] +elif [[ $capture == "${index_path}/hemepanelV3.bed" ]] then normals="${index_path}/hemepanelV3.flatreference.cnn" targets="${index_path}/hemepanelV3.cnvkit_" diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index f8ba7b6fa732a699e2f126a8f21111bd79e7fce0..bd0e964fd4a65d2ebb9edb0231574495c579cc48 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -57,11 +57,7 @@ else fi source /etc/profile.d/modules.sh -user=$USER -module load gatk/4.x singularity/2.6.1 samtools/gcc/1.8 -mkdir /tmp/${user} -export TMP_HOME=/tmp/${user} - +module load gatk/4.1.2.0 samtools/gcc/1.8 samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} if [[ $algo == 'gatkbam_rna' ]] @@ -71,14 +67,14 @@ then java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam + gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'gatkbam' ]] then - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'abra2' ]] diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index c24e3c37f993696082ac7f20de434da0639328a8..6386e1b83c9ff26baf6d87935408e37e57482ab1 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -33,21 +33,21 @@ if [[ -z $SLURM_CPUS_ON_NODE ]] then SLURM_CPUS_ON_NODE=1 fi -if [[ -a "${index_path}/dbSnp.vcf.gz" ]] +if [[ -s "${index_path}/dbSnp.vcf.gz" ]] then dbsnp="${index_path}/dbSnp.vcf.gz" else echo "Missing dbSNP File: ${index_path}/dbSnp.vcf.gz" usage fi -if [[ -a "${index_path}/GoldIndels.vcf.gz" ]] +if [[ -s "${index_path}/GoldIndels.vcf.gz" ]] then knownindel="${index_path}/GoldIndels.vcf.gz" else echo "Missing InDel File: ${index_path}/GoldIndels.vcf.gz" usage fi -if [[ -a "${index_path}/genome.fa" ]] +if [[ -s "${index_path}/genome.fa" ]] then reffa="${index_path}/genome.fa" dict="${index_path}/genome.dict" diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh index 4f89845c93e6df8258ad7229ac3fc4d7a5b6cc04..4ccd4c2360bbcbfc1ceede436805c81dd59cd2d5 100755 --- a/variants/norm_annot.sh +++ b/variants/norm_annot.sh @@ -30,4 +30,4 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf bgzip -f ${pair_id}.uniform.vcf j=${pair_id}.uniform.vcf.gz tabix -f $j -bcftools norm -m - -O z -o ${pair_id}.norm.vcf.gz $j +bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz diff --git a/variants/pindel.sh b/variants/pindel.sh index 936caff1e619883b6aaafac9cf164934629d6580..b980dc11aa801372eea331c9b063c8b5d8e081e0 100644 --- a/variants/pindel.sh +++ b/variants/pindel.sh @@ -56,8 +56,9 @@ done pindel -T $SLURM_CPUS_ON_NODE -f ${reffa} -i ${pair_id}.pindel.config -o ${pair_id}.pindel_out --RP pindel2vcf -P ${pair_id}.pindel_out -r ${reffa} -R HG38 -d ${genomefiledate} -v pindel.vcf cat pindel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter " ( GEN[*].AD[1] >= 10 )" | bgzip > pindel.vcf.gz -perl $baseDir/norm_annot.sh -r ${index_path} -p pindel -v pindel.vcf.gz -perl $baseDir/parse_pindel.pl ${pair_id} pindel.norm.vcf.gz -java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > ${pair_id}.pindel_indel.vcf.gz +perl $baseDir/parse_pindel.pl ${pair_id} pindel.vcf.gz +java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.indel.vcf |bgzip > indel.vcf.gz +perl $baseDir/norm_annot.sh -r ${index_path} -p pindel_indel -v indel.vcf.gz +mv pindel_indel.norm.vcf.gz ${pair_id}.pindel_indel.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.dup.vcf |bgzip > ${pair_id}.pindel_tandemdup.vcf.gz java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config GRCh38.86 ${pair_id}.sv.vcf |bgzip > ${pair_id}.pindel_sv.vcf.gz diff --git a/variants/uni_norm_annot.sh b/variants/uni_norm_annot.sh new file mode 100644 index 0000000000000000000000000000000000000000..dba02f29f8ffdd1717d49e7f33f8b78caccb15be --- /dev/null +++ b/variants/uni_norm_annot.sh @@ -0,0 +1,38 @@ +#!/bin/bash +#union.sh + +usage() { + echo "-h Help documentation for gatkrunner.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-p --Prefix for output file name" + echo "-v --VCF File" + echo "Example: bash union.sh -p prefix -r /path/GRCh38" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:p:v:h opt +do + case $opt in + r) index_path=$OPTARG;; + p) pair_id=$OPTARG;; + + v) vcf=$OPTARG;; + h) usage;; + esac +done +function join_by { local IFS="$1"; shift; echo "$*"; } +shift $(($OPTIND -1)) +baseDir="`dirname \"$0\"`" + +source /etc/profile.d/modules.sh +module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q + +perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf +mv ${vcf} ${pair_id}.ori.vcf.gz +bgzip -f ${pair_id}.uniform.vcf +j=${pair_id}.uniform.vcf.gz +tabix -f $j +bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz +bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz +/project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub ${pair_id}.annot.vcf.gz -p -a -o ${pair_id}.vcf +bgzip -f ${pair_id}.vcf diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index 37452dc15b6e80403590c96f9e73995eaa509c14..c63fbae307a92537c0eca3500c617f1fdba5034f 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -3,6 +3,8 @@ my $headerfile = shift @ARGV; my @vcffiles = @ARGV; +my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); +%algos = map {$_=>1} @callers; open HEADER, "<$headerfile" or die $!; open OUT, ">int.vcf" or die $!; @@ -15,7 +17,11 @@ my @sampleorder; my %headerlines; foreach $vcf (@vcffiles) { - $caller = (split(/\./,$vcf))[-3]; + my @filename = (split(/\./,$vcf)); + my $caller; + foreach $fio (@filename) { + $caller = $fio if ($algos{$fio}); + } open VCF, "gunzip -c $vcf|" or die $!; my @sampleids; while (my $line = <VCF>) { @@ -125,19 +131,18 @@ foreach $vcf (@vcffiles) { my @filts = split(";",$filter); my %filterqc = map {$_ => 1} @filts; delete $filterqc{'.'}; - if ($gtfilt{'StrandBias'} || ($hash{FS} && $hash{FS} > 60) || - ($hash{SAP} && $hash{SAP} > 20)) { + if ($gtfilt{'StrandBias'} || ($hash{FS} && $hash{FS} > 60)) { # ||($hash{SAP} && $hash{SAP} > 20)) { $filterqc{strandBias} = 1; - }if (scalar(keys %filterqc) > 1) { + $annot .= ';strandBias=1'; + }if (scalar(keys %filterqc) > 0) { $filter = join(";",keys %filterqc); }else { $filter = '.'; } - $lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc]; + $lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc] unless $lines{$chrom}{$pos}{$ref}{$alt}{$caller}; } close VCF; } -my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); F1:foreach $chr (sort {$a cmp $b} keys %lines) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) {