From d8c03da1a85873c611913164637493b27436e810 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Wed, 21 Feb 2018 15:03:33 -0600 Subject: [PATCH] modify errors in somatic_vc.sh --- variants/add_readct_shimmer.pl | 44 +++++++++++++++++++++++++++++ variants/somatic_vc.sh | 4 +-- variants/vcf2bed.pl | 51 ++++++++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 2 deletions(-) create mode 100755 variants/add_readct_shimmer.pl create mode 100755 variants/vcf2bed.pl diff --git a/variants/add_readct_shimmer.pl b/variants/add_readct_shimmer.pl new file mode 100755 index 0000000..51d3574 --- /dev/null +++ b/variants/add_readct_shimmer.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl -w +#add_readct_shimmer.pl + +open CT, "<shimmer/som_counts.bh.txt" or die $!; +while (my $line = <CT>) { + chomp($line); + my ($chrom,$pos,$refnorm,$reftum,$refnormct, + $reftumct,$alt,$altnormct,$alttumct, + $pval,$qval) = split(/\t/,$line); + $stats{$chrom}{$pos}{'tumor'} = {ao=>$alttumct,ro=>$reftumct, + ad=>join(",",$reftumct,$alttumct)}; + $stats{$chrom}{$pos}{'normal'} = {ao=>$altnormct,ro=>$refnormct, + ad=>join(",",$refnormct,$altnormct)}; +} + +open VCF, "<shimmer/somatic_diffs.vcf" or die $!; +open OUT, ">shimmer/somatic_diffs.readct.vcf" or die $!; +while (my $line = <VCF>) { + chomp($line); + if ($line =~ m/#/) { + print OUT $line,"\n"; + next; + } + my ($chrom, $pos,$id,$ref,$alt,$score, + $filter,$annot,$format,@gts) = split(/\t/, $line); + my %hash = (); + foreach $a (split(/;/,$annot)) { + my ($key,$val) = split(/=/,$a); + $hash{$key} = $val; + } + my @deschead = split(/:/,$format); + my @samples = ('normal','tumor'); + my @newgts; + foreach my $i (0..1) { + %gtinfo = %{$stats{$chrom}{$pos}{$samples[$i]}}; + my @gtinfo = split(/:/,$gts[$i]); + foreach my $i (0..$#deschead) { + $gtinfo{$deschead[$i]} = $gtinfo[$i]; + } + push @newgts, join(":",$gtinfo{GT},$gtinfo{DP},$gtinfo{ad},$gtinfo{ao},$gtinfo{ro}); + } + $newformat = 'GT:DP:AD:AO:RO'; + print OUT join("\t",$chrom,$pos,$id,$ref,$alt,$score,$filter,$annot,$newformat,@newgts),"\n"; +} diff --git a/variants/somatic_vc.sh b/variants/somatic_vc.sh index a130aab..fe274ef 100644 --- a/variants/somatic_vc.sh +++ b/variants/somatic_vc.sh @@ -116,7 +116,7 @@ then else awk '{print $1":"$2"-"$3}' ${tbed} | parallel --delay 2 -j 10 "java -Xmx20g -jar \$GATK_JAR -R ${reffa} -D ${dbsnp} -T MuTect2 -stand_call_conf 10 -A FisherStrand -A QualByDepth -A VariantType -A DepthPerAlleleBySample -A HaplotypeScore -A AlleleBalance -I:tumor ${tumor} -I:normal ${normal} --cosmic ${cosmic} -o ${tid}.{}.mutect.vcf -L {}" fi - vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar \$SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz + vcf-concat ${tid}*mutect.vcf | vcf-sort | vcf-annotate -n --fill-type | java -jar $SNPEFF_HOME/SnpSift.jar filter -p '((FS <= 60) & GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" |bgzip > ${pair_id}.mutect.vcf.gz fi if [ $algo == 'varscan' ] @@ -133,7 +133,7 @@ if [ $algo == 'shimmer' ] then module load snpeff/4.3q shimmer/0.1.1 samtools/1.6 vcftools/0.1.14 shimmer.pl --minqual 25 --ref ${reffa} ${normal} ${tumor} --outdir shimmer 2> shimmer.err - perl /project/PHG/PHG_Clinical/clinseq_workflows/scripts/add_readct_shimmer.pl + perl $baseDir/add_readct_shimmer.pl vcf-annotate -n --fill-type shimmer/somatic_diffs.readct.vcf | java -jar $SNPEFF_HOME/SnpSift.jar filter '(GEN[*].DP >= 10)' | perl -pe "s/TUMOR/${tid}/" | perl -pe "s/NORMAL/${nid}/g" | bgzip > ${pair_id}.shimmer.vcf.gz fi diff --git a/variants/vcf2bed.pl b/variants/vcf2bed.pl new file mode 100755 index 0000000..235635d --- /dev/null +++ b/variants/vcf2bed.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl + +use strict; + +if (scalar @ARGV != 1) { + print "vcf2bed.pl <vcf_file>\n"; + exit 1; +} + +my $vcfFile = $ARGV[0]; + +open VCF, $vcfFile + or die "Cannot open $vcfFile"; + +while (<VCF>) { + + if (index($_, '#') == 0) { + next; + } + + my @fields = split /\t/, $_; + + my $chrom = $fields[0]; + my $startPos = $fields[1]; + my $ref = $fields[3]; + my @alts = split (/,/, $fields[4]); + my $endPos = $startPos; + my $indelCall = ""; + my $annot = $fields[7]; + foreach my $alt (@alts) { + next if (length($ref) > 50 || length($alt) > 50); + if (length($ref) > length($alt)) { + # deletion event + my $diff = substr($ref, length($alt)); + + $endPos = $startPos + length($diff); + $indelCall = '-' . $diff; + } + elsif (length($alt) > length($ref)) { + # insertion event + my $diff = substr($alt, length($ref)); + + $indelCall = '+' . $diff; + } + + # print out the line + print "$chrom\t$startPos\t$endPos\t$indelCall\n"; + } +} + +close VCF; -- GitLab