diff --git a/alignment/bamqc.sh b/alignment/bamqc.sh index a464836b41ae1ee6eb5ac56531dde11627594523..cee7b80555bb0b6aa724dda97bcc33b13f0151f7 100644 --- a/alignment/bamqc.sh +++ b/alignment/bamqc.sh @@ -59,12 +59,12 @@ if [[ $nuctype == 'dna' ]]; then samtools view -@ $NPROC -b -L ${bed} -o ${pair_id}.ontarget.bam ${sbam} samtools index -@ $NPROC ${pair_id}.ontarget.bam samtools flagstat ${pair_id}.ontarget.bam > ${pair_id}.ontarget.flagstat.txt - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt --TMP_DIR=${tmpdir} - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt --TMP_DIR=${tmpdir} + java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectAlignmentSummaryMetrics R=${index_path}/genome.fa I=${pair_id}.ontarget.bam OUTPUT=${pair_id}.alignmentsummarymetrics.txt TMP_DIR=${tmpdir} + java -Xmx64g -Djava.io.tmpdir=${tmpdir} -XX:ParallelGCThreads=$NPROC -jar $PICARD/picard.jar EstimateLibraryComplexity I=${sbam} OUTPUT=${pair_id}.libcomplex.txt TMP_DIR=${tmpdir} samtools view -@ $NPROC -b -q 1 ${sbam} | bedtools coverage -sorted -hist -g ${index_path}/genomefile.txt -b stdin -a ${bed} > ${pair_id}.mapqualcov.txt samtools view -@ $NPROC ${sbam} | awk '{sum+=$5} END { print "Mean MAPQ =",sum/NR}' > ${pair_id}.meanmap.txt fi - java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt --TMP_DIR=${tmpdir} + java -Xmx64g -Djava.io.tmpdir=${tmpdir} -jar $PICARD/picard.jar CollectInsertSizeMetrics INPUT=${sbam} HISTOGRAM_FILE=${pair_id}.hist.ps REFERENCE_SEQUENCE=${index_path}/genome.fa OUTPUT=${pair_id}.hist.txt TMP_DIR=${tmpdir} bedtools coverage -sorted -g ${index_path}/genomefile.txt -a ${bed} -b ${sbam} -hist > ${pair_id}.covhist.txt grep ^all ${pair_id}.covhist.txt > ${pair_id}.genomecov.txt perl $baseDir/calculate_depthcov.pl ${pair_id}.covhist.txt diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 306343b3edd20cd6b96d85702dc3a7b319803b66..dbe1b8ef0c81e4088621d8007a6513f306771943 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -71,9 +71,9 @@ elif [ $algo == 'fgbio_umi' ] then module load fgbio bwakit/0.7.15 bwa/intel/0.7.17 samtools/gcc/1.8 samtools index -@ $NPROC ${sbam} - fgbio --tmp-dir ./ GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0 - fgbio --tmp-dir ./ CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' - samtools index ${pair_id}.consensus.bam + fgbio --tmp-dir ./ GroupReadsByUmi -s identity -i ${sbam} -o group.bam --family-size-histogram ${pair_id}.umihist.txt -e 0 -m 0 + fgbio --tmp-dir ./ CallMolecularConsensusReads -i group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' + samtools index -@ $NPROC ${pair_id}.consensus.bam samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R2.fastq @@ -85,6 +85,8 @@ then samtools view -1 out.sam > ${pair_id}.consensus.bam fi samtools sort --threads $NPROC -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam + samtools sort --threads $NPROC -o ${pair_id}.group.bam group.bam + samtools index -@ $NPROC ${pair_id}.group.bam else cp ${sbam} ${pair_id}.dedup.bam fi diff --git a/variants/gatkrunner.sh b/variants/gatkrunner.sh index 2bb2273c5422d8941801ff4ce74ea5997c381cbf..df24cc7a0c197c0f8570e1d7537a56e3505887ed 100755 --- a/variants/gatkrunner.sh +++ b/variants/gatkrunner.sh @@ -60,7 +60,7 @@ fi source /etc/profile.d/modules.sh module load gatk/4.1.4.0 samtools/gcc/1.8 which samtools -/cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $NPROC ${sbam} +samtools index -@ $NPROC ${sbam} if [[ $algo == 'gatkbam_rna' ]] then @@ -72,12 +72,12 @@ then gatk SplitNCigarReads -R ${reffa} -I ${pair_id}.rg_added_sorted.bam -O ${pair_id}.split.bam gatk --java-options "-Xmx32g" BaseRecalibrator -I ${pair_id}.split.bam --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities gatk --java-options "-Xmx32g" ApplyBQSR -I ${pair_id}.split.bam -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table - /cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $NPROC ${pair_id}.final.bam + samtools index -@ $NPROC ${pair_id}.final.bam elif [[ $algo == 'gatkbam' ]] then gatk --java-options "-Xmx32g" BaseRecalibrator -I ${sbam} --known-sites ${index_path}/dbSnp.gatk4.vcf.gz -R ${reffa} -O ${pair_id}.recal_data.table --use-original-qualities gatk --java-options "-Xmx32g" ApplyBQSR -I ${sbam} -R ${reffa} -O ${pair_id}.final.bam --use-original-qualities -bqsr ${pair_id}.recal_data.table - /cm/shared/apps/samtools/gcc/1.8/bin/samtools index -@ $NPROC ${pair_id}.final.bam + samtools index -@ $NPROC ${pair_id}.final.bam elif [[ $algo == 'abra2' ]] then diff --git a/variants/parse_pindel.pl b/variants/parse_pindel.pl index 3f0031680c793be8789f088ad3ff3e4af661f09c..ecc47d2dc058e7ad0f640607464f87f9590f5bc1 100755 --- a/variants/parse_pindel.pl +++ b/variants/parse_pindel.pl @@ -79,12 +79,15 @@ while (my $line = <VCF>) { if ($hash{SVTYPE} eq 'DUP:TANDEM') { print DUP join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }elsif ($hash{SVTYPE} eq 'DEL' || $hash{SVTYPE} eq 'INS') { - if (abs($hash{SVLEN}) < 20) { + if (abs($hash{SVLEN}) < 100) { print SI join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; }else { $newalt = "<".$hash{SVTYPE}.">"; print SV join("\t",$chrom,$pos,$id,'N',$newalt,$score,$filter,$annot,$newformat,@newgts),"\n"; } + }else { + $newalt = "<".$hash{SVTYPE}.">"; + print SV join("\t",$chrom,$pos,$id,'N',$newalt,$score,$filter,$annot,$newformat,@newgts),"\n"; } } close VCF; diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 75a062d9854fbf9a953f8eed94de35359dbe5f19..17830e7b4a84c303bffbf63a041495cd58588287 100755 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -59,10 +59,10 @@ fi source /etc/profile.d/modules.sh module load htslib/gcc/1.8 samtools/gcc/1.8 bcftools/gcc/1.8 bedtools/2.26.0 snpeff/4.3q vcftools/0.1.14 export PATH=/project/shared/bicf_workflow_ref/seqprg/bin:$PATH +mkdir temp if [[ $method == 'delly' ]] then - mkdir temp module load delly2/v0.7.7-multi if [[ -n ${normal} ]] then @@ -105,10 +105,17 @@ then else svaba run -p $NPROC -G ${reffa} -t ${sbam} -a ${pair_id} fi - vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf | vcf-sort -t temp > svaba.unfiltered.vcf - bash $baseDir/norm_annot.sh -r ${index_path} -p svaba -v svaba.unfiltered.vcf -s - java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.norm.vcf | bgzip > svaba.vcf.gz - + vcf-concat ${pair_id}.svaba.unfiltered*sv.vcf | vcf-sort -t temp > svaba.sv.vcf + cat svaba.sv.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[0] >= 20 )" | bgzip > svaba.vcf.gz + tabix svaba.sv.vcf.gz + bash $baseDir/norm_annot.sh -r ${index_path} -p svaba.sv -v svaba.sv.vcf.gz -s + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.sv.norm.vcf | bgzip > ${pair_id}.svaba.sv.vcf.gz + vcf-concat ${pair_id}.svaba.unfiltered*indel.vcf | vcf-sort -t temp > svaba.indel.vcf + cat svaba.indel.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter "( GEN[*].AD[0] >= 20 )" | bgzip > svaba.indel.vcf.gz + tabix svaba.indel.vcf.gz + bash $baseDir/norm_annot.sh -r ${index_path} -p svaba.indel -v svaba.indel.vcf.gz -s + java -Xmx10g -jar $SNPEFF_HOME/snpEff.jar -no-intergenic -lof -c $SNPEFF_HOME/snpEff.config ${snpeffgeno} svaba.indel.norm.vcf | bgzip > ${pair_id}.svaba.vcf.gz + fi if [[ $method == 'lumpy' ]] diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index c63fbae307a92537c0eca3500c617f1fdba5034f..84eedd53a0f63723b3207e7d430a2b0002d700b2 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -3,7 +3,7 @@ my $headerfile = shift @ARGV; my @vcffiles = @ARGV; -my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','virmid'); +my @callers = ('fb','mutect','gatk','platypus','strelka2','shimmer','pindel','svaba'); %algos = map {$_=>1} @callers; open HEADER, "<$headerfile" or die $!;