diff --git a/variants/cnvkit.sh b/variants/cnvkit.sh index 4d23d7c5067c8607b9a8a75b9974d129f076dad9..8322057a653400904eaf4bf2c8eeb1ecd0e3e7c0 100755 --- a/variants/cnvkit.sh +++ b/variants/cnvkit.sh @@ -26,7 +26,7 @@ done shift $(($OPTIND -1)) -index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj/' +index_path='/project/shared/bicf_workflow_ref/human/GRCh38/clinseq_prj' # Check for mandatory options if [[ -z $pair_id ]] || [[ -z $sbam ]]; then @@ -38,7 +38,7 @@ then fi baseDir="`dirname \"$0\"`" -if [[ $capture == '${index_path}/UTSWV2.bed' ]] +if [[ $capture == "${index_path}/UTSWV2.bed" ]] then normals="${index_path}/UTSWV2.normals.cnn" targets="${index_path}/UTSWV2.cnvkit_" @@ -46,11 +46,11 @@ then then normals="${index_path}/UTSWV2.uminormals.cnn" fi -elif [[ $capture == '${index_path}/UTSWV2_2.panelplus.bed' ]] +elif [[ $capture == "${index_path}/UTSWV2_2.panelplus.bed" ]] then normals="${index_path}/panelofnormals.panel1385V2_2.cnn" targets="${index_path}/panel1385V2-2.cnvkit_" -elif [[ $capture == '${index_path}/hemepanelV3.bed' ]] +elif [[ $capture == "${index_path}/hemepanelV3.bed" ]] then normals="${index_path}/hemepanelV3.flatreference.cnn" targets="${index_path}/hemepanelV3.cnvkit_" diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index f8ba7b6fa732a699e2f126a8f21111bd79e7fce0..c1b936144c291fd8a7e54dfa6483289e95565f77 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -57,11 +57,7 @@ else fi source /etc/profile.d/modules.sh -user=$USER -module load gatk/4.x singularity/2.6.1 samtools/gcc/1.8 -mkdir /tmp/${user} -export TMP_HOME=/tmp/${user} - +module load gatk/4.2.1.0 samtools/gcc/1.8 samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} if [[ $algo == 'gatkbam_rna' ]] @@ -71,14 +67,14 @@ then java -Xmx4g -jar $PICARD/picard.jar ReorderSam I=${pair_id}.clean.bam O=${pair_id}.sort.bam R=${reffa} CREATE_INDEX=TRUE java -Xmx4g -jar $PICARD/picard.jar AddOrReplaceReadGroups INPUT=${pair_id}.clean.bam O=${pair_id}.rg_added_sorted.bam SO=coordinate RGID=${pair_id} RGLB=tx RGPL=illumina RGPU=barcode RGSM=${pair_id} samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.rg_added_sorted.bam - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam + gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'gatkbam' ]] then - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table + gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities + gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table samtools index -@ $SLURM_CPUS_ON_NODE ${pair_id}.final.bam elif [[ $algo == 'abra2' ]] diff --git a/variants/germline_vc.sh b/variants/germline_vc.sh index bdb08492a37f7463155f90d43fd295f14e1c5f28..59aeb3fd0db5d8c3887fe586f1f3fe9b17848335 100755 --- a/variants/germline_vc.sh +++ b/variants/germline_vc.sh @@ -86,24 +86,19 @@ elif [[ $algo == 'gatk' ]] then gatk4_dbsnp=${index_path}/clinseq_prj/dbSnp.gatk4.vcf.gz user=$USER - module load gatk/4.x singularity/2.6.1 - if [[ ! -e "/tmp/${user}" ]] - then - mkdir /tmp/${user} - fi - export TMP_HOME=/tmp/${user} + module load gatk/4.1.2.0 gvcflist='' for i in *.bam; do prefix="${i%.bam}" echo ${prefix} - singularity exec -H /tmp/${user} /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -O haplotypecaller.vcf.gz + gatk --java-options "-Xmx32g" HaplotypeCaller -R ${reffa} -I ${i} -A FisherStrand -A QualByDepth -A DepthPerAlleleBySample -A TandemRepeat --emit-ref-confidence GVCF -O haplotypecaller.vcf.gz java -jar $PICARD/picard.jar SortVcf I=haplotypecaller.vcf.gz O=${prefix}.gatk.g.vcf R=${reffa} CREATE_INDEX=TRUE - gvcflist="$gvcflist --variant ${prefix}.gatk.g.vcf" + gvcflist="$gvcflist -V ${prefix}.gatk.g.vcf" done - interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' |paste -sd "," -` - #singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb --intervals $interval - singularity exec -H /tmp/$user /project/apps/singularity-images/gatk4/gatk-4.x.simg /gatk/gatk --java-options "-Xmx32g" GenotypeGVCFs $gvcflist -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf + interval=`cat ${reffa}.fai |cut -f 1 |grep -v decoy |grep -v 'HLA' |grep -v alt |grep -v 'chrUn' |grep -v 'random' | perl -pe 's/\n/ -L /g' |perl -pe 's/-L $//'` + gatk --java-options "-Xmx32g" GenomicsDBImport $gvcflist --genomicsdb-workspace-path gendb -L $interval + gatk --java-options "-Xmx32g" GenotypeGVCFs -V gendb://gendb -R ${reffa} -D ${gatk4_dbsnp} -O gatk.vcf bcftools norm -c s -f ${reffa} -w 10 -O v gatk.vcf | vcf-annotate -n --fill-type gatk.vcf | bgzip > ${pair_id}.gatk.vcf.gz tabix ${pair_id}.gatk.vcf.gz elif [[ $algo == 'platypus' ]] diff --git a/variants/norm_annot.sh b/variants/norm_annot.sh index 7aafd268c158b009428b102bddc71eacb04b3290..4ccd4c2360bbcbfc1ceede436805c81dd59cd2d5 100755 --- a/variants/norm_annot.sh +++ b/variants/norm_annot.sh @@ -30,5 +30,4 @@ perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf bgzip -f ${pair_id}.uniform.vcf j=${pair_id}.uniform.vcf.gz tabix -f $j -bcftools norm -m - -O v $j | /project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub - -a -o ${pair_id}.norm.vcf -bgzip -f ${pair_id}.norm.vcf +bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz diff --git a/variants/uni_norm_annot.sh b/variants/uni_norm_annot.sh new file mode 100644 index 0000000000000000000000000000000000000000..b2007aacec26549c0b890855b05d8f372dafde72 --- /dev/null +++ b/variants/uni_norm_annot.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#union.sh + +usage() { + echo "-h Help documentation for gatkrunner.sh" + echo "-r --Reference Genome: GRCh38 or GRCm38" + echo "-p --Prefix for output file name" + echo "-v --VCF File" + echo "Example: bash union.sh -p prefix -r /path/GRCh38" + exit 1 +} +OPTIND=1 # Reset OPTIND +while getopts :r:p:v:h opt +do + case $opt in + r) index_path=$OPTARG;; + p) pair_id=$OPTARG;; + v) vcf=$OPTARG;; + h) usage;; + esac +done +function join_by { local IFS="$1"; shift; echo "$*"; } +shift $(($OPTIND -1)) +baseDir="`dirname \"$0\"`" + +source /etc/profile.d/modules.sh +module load bedtools/2.26.0 samtools/1.6 bcftools/1.6 snpeff/4.3q + +perl $baseDir\/uniform_vcf_gt.pl $pair_id $vcf +mv ${vcf} ${pair_id}.ori.vcf.gz +bgzip -f ${pair_id}.uniform.vcf +j=${pair_id}.uniform.vcf.gz +tabix -f $j +bcftools norm -m - -Oz $j -o ${pair_id}.norm.vcf.gz +bash $baseDir/annotvcf.sh -p ${pair_id} -r $index_path -v ${pair_id}.norm.vcf.gz +/project/shared/bicf_workflow_ref/seqprg/vt/vt decompose_blocksub ${pair_id}.annot.vcf.gz -p -a -o ${pair_id}.vcf +bgzip -f ${pair_id}.vcf diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index a4db69e48a2a79fbebcc02c4ff4419506ab3abe8..c63fbae307a92537c0eca3500c617f1fdba5034f 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -3,6 +3,8 @@ my $headerfile = shift @ARGV; my @vcffiles = @ARGV; +my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); +%algos = map {$_=>1} @callers; open HEADER, "<$headerfile" or die $!; open OUT, ">int.vcf" or die $!; @@ -15,7 +17,11 @@ my @sampleorder; my %headerlines; foreach $vcf (@vcffiles) { - $caller = (split(/\./,$vcf))[-3]; + my @filename = (split(/\./,$vcf)); + my $caller; + foreach $fio (@filename) { + $caller = $fio if ($algos{$fio}); + } open VCF, "gunzip -c $vcf|" or die $!; my @sampleids; while (my $line = <VCF>) { @@ -133,11 +139,10 @@ foreach $vcf (@vcffiles) { }else { $filter = '.'; } - $lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc]; + $lines{$chrom}{$pos}{$ref}{$alt}{$caller} = [$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,\@newgts,\@gtdesc] unless $lines{$chrom}{$pos}{$ref}{$alt}{$caller}; } close VCF; } -my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); F1:foreach $chr (sort {$a cmp $b} keys %lines) { F2:foreach $pos (sort {$a <=> $b} keys %{$lines{$chr}}) {