From 91f715b4ae2ecb62afacec91c5ed00c545c357d7 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Wed, 6 Dec 2017 16:58:29 -0600 Subject: [PATCH] update svcalling for somatic --- alignment/markdups.sh | 3 +-- variants/somatic_vc.sh | 7 +++++-- variants/svcalling.sh | 12 +++++++----- variants/unionvcf.pl | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/alignment/markdups.sh b/alignment/markdups.sh index f040fc0..ad1b8c7 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -54,11 +54,10 @@ then java -Djava.io.tmpdir=./ -Xmx4g -jar $PICARD/picard.jar MarkDuplicates BARCODE_TAG=RX I=${sbam} O=${pair_id}.dedup.bam M=${pair_id}.dedup.stat.txt elif [ $algo == 'fgbio_umi' ] then + module load fgbio samtools index -@ $SLURM_CPUS_ON_NODE ${sbam} - source activate fgbiotools fgbio GroupReadsByUmi -s identity -i ${sbam} -o ${pair_id}.group.bam -m 0 fgbio CallMolecularConsensusReads -i ${pair_id}.group.bam -p consensus -M 1 -o ${pair_id}.consensus.bam -S ':none:' - source deactivate module load bwa/intel/0.7.15 samtools index ${pair_id}.consensus.bam samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index 2c1dccb..37bc444 100644 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -12,12 +12,14 @@ usage(){ echo "-y --TumorID" echo "-i --NormalBAM used for Mantra in the case of UMI consensus" echo "-j --TumorBAM used for Mantra in the case of UMI consensus" + echo "-b --TargetBed" + echo "Example: bash somatic_vc.sh -a strelka2 -y ORD1_N_panel1385 -y ORD1_T_panel138 -n ORD1_N_panel1385.final.bam -t ORD1_T_panel1385.final.bam" exit 1 } OPTIND=1 # Reset OPTIND -while getopts :n:t:r:x:y:i:j:a:h opt +while getopts :n:t:r:x:y:i:j:a:b:h opt do case $opt in r) index_path=$OPTARG;; @@ -28,6 +30,7 @@ do i) mnormal=$OPTARG;; j) mtumor=$OPTARG;; a) algo=$OPTARG;; + b) tbed=$OPTARG;; h) usage;; esac done @@ -130,6 +133,6 @@ fi if [ $algo == 'lancet' ] then module load snpeff/4.3q lancet vcftools/0.1.14 - lancet --tumor ${tumor} --normal ${normal} --ref $reffa --bed $target_panel --num-threads 16 > lancet.vcf + lancet --tumor ${tumor} --normal ${normal} --ref $reffa -B $tbed --num-threads 16 > lancet.vcf vcf-sort lancet.vcf | vcf-annotate -n --fill-type -n | perl -pe 's/TUMOR/${tid}/' | perl -pe 's/NORMAL/${nid}/g' |bgzip > ${pair_id}.lancet.vcf.gz fi diff --git a/variants/svcalling.sh b/variants/svcalling.sh index 504ff88..ea22332 100644 --- a/variants/svcalling.sh +++ b/variants/svcalling.sh @@ -86,12 +86,12 @@ fi #MERGE DELLY AND MAKE BED bcftools concat -a -O v delly_dup.bcf delly_inv.bcf delly_tra.bcf delly_del.bcf delly_ins.bcf| vcf-sort -t temp > delly.vcf -perl $baseDir/vcf2bed.sv.pl delly.vcf > delly.bed +perl $baseDir/vcf2bed.sv.pl delly.vcf | sort -V -k 1,1 -k 2,2n > delly.bed bgzip delly.vcf if [[ -n ${normal} ]] then tabix delly.vcf.gz - bcftools view -O z -o delly.vcf.gz -s ${keepid} ${pair_id}.delly.vcf.gz + bcftools view -O z -o ${pair_id}.delly.vcf.gz -s ${keepid} delly.vcf.gz else mv delly.vcf.gz ${pair_id}.delly.vcf.gz fi @@ -110,19 +110,21 @@ then gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" splitters.sam | samtools view -S -b - | samtools sort -o normal.splitters.bam - gawk '{ if ($0~"^@") { print; next } else { $10="*"; $11="*"; print } }' OFS="\t" discordants.sam | samtools view -S -b - | samtools sort -o normal.discordants.bam - speedseq sv -t $SLURM_CPUS_ON_NODE -o sssv -R ${reffa} -B ${normal},${sbam} -D normal.discordants.bam,discordants.bam -S normal.splitters.bam,splitters.bam -x ${index_path}/exclude_alt.bed - bcftools view -O z -o sssv.vcf.gz -s ${keepid} ${pair_id}.sssv.sv.vcf.gz + java -jar $SNPEFF_HOME/SnpSift.jar filter -n "GEN[0].SU > 1" sssv.sv.vcf.gz |bgzip > tumor.sssv.sv.vcf.gz + tabix tumor.sssv.sv.vcf.gz + bcftools view -O z -o ${pair_id}.sssv.sv.vcf.gz -s ${keepid} tumor.sssv.sv.vcf.gz else speedseq sv -t $SLURM_CPUS_ON_NODE -o ${pair_id}.sssv -R ${reffa} -B ${sbam} -D discordants.bam -S splitters.bam -x ${index_path}/exclude_alt.bed fi -java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 2" ${pair_id}.sssv.sv.vcf.gz > lumpy.vcf +java -jar $SNPEFF_HOME/SnpSift.jar filter "GEN[0].SU > 10" ${pair_id}.sssv.sv.vcf.gz > lumpy.vcf perl $baseDir/vcf2bed.sv.pl lumpy.vcf > lumpy.bed #COMPARE DELLY & LUMPY if [[ -n ${normal} ]] then bedtools multiinter -cluster -header -names novobreak delly lumpy -i novobreak.bed delly.bed lumpy.bed > sv.intersect.bed - grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${tid}_${nid}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz + grep novobreak sv.intersect.bed |cut -f 1,2,3 |sort -V -k 1,1 -k 2,2n |grep -v start | bedtools intersect -header -b stdin -a ${pair_id}.novobreak.vcf.gz | perl -p -e 's/SPIKEIN/${tid}/' |bgzip > svt1.vcf.gz else bedtools multiinter -cluster -header -names delly lumpy -i delly.bed lumpy.bed > sv.intersect.bed fi diff --git a/variants/unionvcf.pl b/variants/unionvcf.pl index 699a7bb..f03c96d 100755 --- a/variants/unionvcf.pl +++ b/variants/unionvcf.pl @@ -15,7 +15,7 @@ my @sampleorder; my %headerlines; foreach $vcf (@vcffiles) { - $caller = (split(/\./,$vcf))[1]; + $caller = (split(/\./,$vcf))[-3]; open VCF, "gunzip -c $vcf|" or die $!; my @sampleids; while (my $line = <VCF>) { -- GitLab