Skip to content
Snippets Groups Projects
Commit 679cd228 authored by Brandi Cantarel's avatar Brandi Cantarel
Browse files

umi RG bug

parent 2c1dee61
Branches
Tags publish_0.0.5
No related merge requests found
#!/usr/bin/perl -w #!/usr/bin/perl -w
#get_regions_coverage.pl #get_regions_coverage.pl
open OUT, ">utswv2_cds.lackscov.txt" or die $!; my $cfile = shift @ARGV;
open OUT2, ">utswv2_cds.covstats.txt" or die $!; my $prefix = (split(/\./,$cfile))[0];
open MISS, ">utswv2_cds.missingcov.txt" or die $!;
print MISS join("\t",'Sample','Chromosome','Position','ExonName', open OUT2, ">$prefix\_exoncoverage.txt" or die $!;
open MISS, ">$prefix\_lowcoverage.txt" or die $!;
print MISS join("\t",'Sample','Chromosome','Start','End','ExonName',
'MinDepth','MaxDepth','MedianDepth','AvgDepth', 'MinDepth','MaxDepth','MedianDepth','AvgDepth',
'Fraction100X+','Fraction10X-','BP100X+','BP10X-','TotalBP'),"\n"; 'Fraction100X+','BP100X+','TotalBP'),"\n";
print OUT2 join("\t",'Sample','Chromosome','Position','ExonName', print OUT2 join("\t",'Sample','Chromosome','Start','End','ExonName',
'MinDepth','MaxDepth','MedianDepth','AvgDepth', 'MinDepth','MaxDepth','MedianDepth','AvgDepth',
'Fraction100X+','Fraction10X-','BP100X+','BP10X-','TotalBP'),"\n"; 'Fraction100X+','BP100X+','TotalBP'),"\n";
print OUT join("\t",'Chromosome','Position','ExonName','NumberSamples10X-_HalfExon'),"\n";
my %totalbp;
my %numsamp;
my @covfiles = @ARGV;
my %cov; my %cov;
my %covstats; my %covstats;
foreach $cfile (@covfiles) { my %exons;
my %good; my %totalbp;
my $prefix = (split(/\./,$cfile))[0]; my %good;
open COV, "<$cfile" or die $!;
while (my $line = <COV>) { open COV, "<$cfile" or die $!;
chomp($line); while (my $line = <COV>) {
next if ($line =~ m/^all/); chomp($line);
my ($chrom,$pos,$end,$name,$depth,$bp,$total,$percent) = split(/\t/,$line); next if ($line =~ m/^all/);
my $region = join(":",$chrom,$pos,$end); my ($chrom,$pos,$end,$name,$depth,$bp,$total,$percent) = split(/\t/,$line);
$cov{$region}{$depth} = $percent; my $region = join(":",$chrom,$pos,$end);
$covstats{$region}{sumdepth} += $depth*$bp; $cov{$region}{$depth} = $percent;
$exons{$region} = $name; $covstats{$region}{sumdepth} += $depth*$bp;
$totalbp{$region} = $total; $exons{$region} = $name;
if ($depth <= 10) { $totalbp{$region} = $total;
$poor{$region} += $bp; if ($depth >= 100) {
} $good{$region} += $bp;
if ($depth >= 100) {
$good{$region} += $bp;
}
} }
close COV; }
foreach $reg (keys %totalbp) { close COV;
$good{$reg} = 0 unless $good{$reg}; foreach $reg (keys %totalbp) {
$poor{$reg} = 0 unless $poor{$reg}; $good{$reg} = 0 unless $good{$reg};
if ($totalbp{$reg} < 1) { if ($totalbp{$reg} < 1) {
next; next;
} }
my $avgdepth = sprintf("%.0f",$covstats{$reg}{sumdepth}/$totalbp{$reg}); my $avgdepth = sprintf("%.0f",$covstats{$reg}{sumdepth}/$totalbp{$reg});
my @depths = sort {$a <=> $b} keys %{$cov{$reg}}; my @depths = sort {$a <=> $b} keys %{$cov{$reg}};
%covhash = %{$cov{$reg}}; my %covhash = %{$cov{$reg}};
my @perc = @covhash{@depths}; my @perc = @covhash{@depths};
my @cum_sum = cumsum(@perc); my @cum_sum = cumsum(@perc);
my $mediandep = 0; my $mediandep = 0;
foreach my $i (0..$#cum_sum) { foreach my $i (0..$#cum_sum) {
if ($cum_sum[$i] < 0.5) { if ($cum_sum[$i] < 0.5) {
$mediandep = $depths[$i]; $mediandep = $depths[$i];
}
}
$good_fract = sprintf("%.4f",$good{$reg}/$totalbp{$reg});
$poor_fract = sprintf("%.4f",$poor{$reg}/$totalbp{$reg});
print OUT2 join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$poor_fract,$good{$reg},$poor{$reg},$totalbp{$reg}),"\n";
if ($poor_fract> 0.5) {
$numsamp{$reg} ++;
print MISS join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$poor_fract,$good{$reg},$poor{$reg},$totalbp{$reg}),"\n";
} }
} }
$good_fract = sprintf("%.4f",$good{$reg}/$totalbp{$reg});
print OUT2 join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$good{$reg},$totalbp{$reg}),"\n";
if ($good_fract < 1) {
print MISS join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$good{$reg},$totalbp{$reg}),"\n";
}
} }
foreach $reg (sort {$a cmp $b} keys %numsamp) {
print OUT join("\t",split(/:/,$reg),$exons{$reg},$numsamp{$reg}),"\n";
}
sub cumsum { sub cumsum {
my @nums = @_; my @nums = @_;
my @cumsum = (); my @cumsum = ();
......
...@@ -64,7 +64,7 @@ then ...@@ -64,7 +64,7 @@ then
samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam
gzip ${pair_id}.consensus.R1.fastq gzip ${pair_id}.consensus.R1.fastq
gzip ${pair_id}.consensus.R2.fastq gzip ${pair_id}.consensus.R2.fastq
bwa mem -M -C -t 2 -R '@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}' /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam bwa mem -M -C -t 2 -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam
samtools sort --threads 10 -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam samtools sort --threads 10 -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam
else else
cp ${sbam} ${pair_id}.dedup.bam cp ${sbam} ${pair_id}.dedup.bam
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment