From 679cd228273f24f037a43ba881b9d54b167d6495 Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Fri, 17 Nov 2017 07:35:02 -0600 Subject: [PATCH] umi RG bug --- alignment/calculate_depthcov.pl | 106 +++++++++++++++----------------- alignment/markdups.sh | 2 +- 2 files changed, 49 insertions(+), 59 deletions(-) diff --git a/alignment/calculate_depthcov.pl b/alignment/calculate_depthcov.pl index 8ce512c..de109e7 100755 --- a/alignment/calculate_depthcov.pl +++ b/alignment/calculate_depthcov.pl @@ -1,72 +1,62 @@ #!/usr/bin/perl -w #get_regions_coverage.pl -open OUT, ">utswv2_cds.lackscov.txt" or die $!; -open OUT2, ">utswv2_cds.covstats.txt" or die $!; -open MISS, ">utswv2_cds.missingcov.txt" or die $!; -print MISS join("\t",'Sample','Chromosome','Position','ExonName', +my $cfile = shift @ARGV; +my $prefix = (split(/\./,$cfile))[0]; + +open OUT2, ">$prefix\_exoncoverage.txt" or die $!; +open MISS, ">$prefix\_lowcoverage.txt" or die $!; +print MISS join("\t",'Sample','Chromosome','Start','End','ExonName', 'MinDepth','MaxDepth','MedianDepth','AvgDepth', - 'Fraction100X+','Fraction10X-','BP100X+','BP10X-','TotalBP'),"\n"; -print OUT2 join("\t",'Sample','Chromosome','Position','ExonName', + 'Fraction100X+','BP100X+','TotalBP'),"\n"; +print OUT2 join("\t",'Sample','Chromosome','Start','End','ExonName', 'MinDepth','MaxDepth','MedianDepth','AvgDepth', - 'Fraction100X+','Fraction10X-','BP100X+','BP10X-','TotalBP'),"\n"; -print OUT join("\t",'Chromosome','Position','ExonName','NumberSamples10X-_HalfExon'),"\n"; - -my %totalbp; -my %numsamp; -my @covfiles = @ARGV; + 'Fraction100X+','BP100X+','TotalBP'),"\n"; my %cov; my %covstats; -foreach $cfile (@covfiles) { - my %good; - my $prefix = (split(/\./,$cfile))[0]; - open COV, "<$cfile" or die $!; - while (my $line = <COV>) { - chomp($line); - next if ($line =~ m/^all/); - my ($chrom,$pos,$end,$name,$depth,$bp,$total,$percent) = split(/\t/,$line); - my $region = join(":",$chrom,$pos,$end); - $cov{$region}{$depth} = $percent; - $covstats{$region}{sumdepth} += $depth*$bp; - $exons{$region} = $name; - $totalbp{$region} = $total; - if ($depth <= 10) { - $poor{$region} += $bp; - } - if ($depth >= 100) { - $good{$region} += $bp; - } +my %exons; +my %totalbp; +my %good; + +open COV, "<$cfile" or die $!; +while (my $line = <COV>) { + chomp($line); + next if ($line =~ m/^all/); + my ($chrom,$pos,$end,$name,$depth,$bp,$total,$percent) = split(/\t/,$line); + my $region = join(":",$chrom,$pos,$end); + $cov{$region}{$depth} = $percent; + $covstats{$region}{sumdepth} += $depth*$bp; + $exons{$region} = $name; + $totalbp{$region} = $total; + if ($depth >= 100) { + $good{$region} += $bp; } - close COV; - foreach $reg (keys %totalbp) { - $good{$reg} = 0 unless $good{$reg}; - $poor{$reg} = 0 unless $poor{$reg}; - if ($totalbp{$reg} < 1) { - next; - } - my $avgdepth = sprintf("%.0f",$covstats{$reg}{sumdepth}/$totalbp{$reg}); - my @depths = sort {$a <=> $b} keys %{$cov{$reg}}; - %covhash = %{$cov{$reg}}; - my @perc = @covhash{@depths}; - my @cum_sum = cumsum(@perc); - my $mediandep = 0; - foreach my $i (0..$#cum_sum) { - if ($cum_sum[$i] < 0.5) { - $mediandep = $depths[$i]; - } - } - $good_fract = sprintf("%.4f",$good{$reg}/$totalbp{$reg}); - $poor_fract = sprintf("%.4f",$poor{$reg}/$totalbp{$reg}); - print OUT2 join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$poor_fract,$good{$reg},$poor{$reg},$totalbp{$reg}),"\n"; - if ($poor_fract> 0.5) { - $numsamp{$reg} ++; - print MISS join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$poor_fract,$good{$reg},$poor{$reg},$totalbp{$reg}),"\n"; +} +close COV; +foreach $reg (keys %totalbp) { + $good{$reg} = 0 unless $good{$reg}; + if ($totalbp{$reg} < 1) { + next; + } + my $avgdepth = sprintf("%.0f",$covstats{$reg}{sumdepth}/$totalbp{$reg}); + my @depths = sort {$a <=> $b} keys %{$cov{$reg}}; + my %covhash = %{$cov{$reg}}; + my @perc = @covhash{@depths}; + my @cum_sum = cumsum(@perc); + my $mediandep = 0; + foreach my $i (0..$#cum_sum) { + if ($cum_sum[$i] < 0.5) { + $mediandep = $depths[$i]; } } + $good_fract = sprintf("%.4f",$good{$reg}/$totalbp{$reg}); + print OUT2 join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$good{$reg},$totalbp{$reg}),"\n"; + if ($good_fract < 1) { + print MISS join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$good{$reg},$totalbp{$reg}),"\n"; + } } -foreach $reg (sort {$a cmp $b} keys %numsamp) { - print OUT join("\t",split(/:/,$reg),$exons{$reg},$numsamp{$reg}),"\n"; -} + + sub cumsum { my @nums = @_; my @cumsum = (); diff --git a/alignment/markdups.sh b/alignment/markdups.sh index 793dc96..f040fc0 100644 --- a/alignment/markdups.sh +++ b/alignment/markdups.sh @@ -64,7 +64,7 @@ then samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R2.fastq - bwa mem -M -C -t 2 -R '@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}' /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam + bwa mem -M -C -t 2 -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam samtools sort --threads 10 -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam else cp ${sbam} ${pair_id}.dedup.bam -- GitLab