From 679cd228273f24f037a43ba881b9d54b167d6495 Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Fri, 17 Nov 2017 07:35:02 -0600
Subject: [PATCH] umi RG bug

---
 alignment/calculate_depthcov.pl | 106 +++++++++++++++-----------------
 alignment/markdups.sh           |   2 +-
 2 files changed, 49 insertions(+), 59 deletions(-)

diff --git a/alignment/calculate_depthcov.pl b/alignment/calculate_depthcov.pl
index 8ce512c..de109e7 100755
--- a/alignment/calculate_depthcov.pl
+++ b/alignment/calculate_depthcov.pl
@@ -1,72 +1,62 @@
 #!/usr/bin/perl -w
 #get_regions_coverage.pl
 
-open OUT, ">utswv2_cds.lackscov.txt" or die $!;
-open OUT2, ">utswv2_cds.covstats.txt" or die $!;
-open MISS, ">utswv2_cds.missingcov.txt" or die $!;
-print MISS join("\t",'Sample','Chromosome','Position','ExonName',
+my $cfile = shift @ARGV;
+my $prefix = (split(/\./,$cfile))[0];
+
+open OUT2, ">$prefix\_exoncoverage.txt" or die $!;
+open MISS, ">$prefix\_lowcoverage.txt" or die $!;
+print MISS join("\t",'Sample','Chromosome','Start','End','ExonName',
 		'MinDepth','MaxDepth','MedianDepth','AvgDepth',
-		'Fraction100X+','Fraction10X-','BP100X+','BP10X-','TotalBP'),"\n";
-print OUT2 join("\t",'Sample','Chromosome','Position','ExonName',
+		'Fraction100X+','BP100X+','TotalBP'),"\n";
+print OUT2 join("\t",'Sample','Chromosome','Start','End','ExonName',
 		'MinDepth','MaxDepth','MedianDepth','AvgDepth',
-		'Fraction100X+','Fraction10X-','BP100X+','BP10X-','TotalBP'),"\n";
-print OUT join("\t",'Chromosome','Position','ExonName','NumberSamples10X-_HalfExon'),"\n";
-
-my %totalbp;
-my %numsamp;
-my @covfiles = @ARGV;
+		'Fraction100X+','BP100X+','TotalBP'),"\n";
 my %cov;
 my %covstats;
-foreach $cfile (@covfiles) {
-  my %good;
-  my $prefix = (split(/\./,$cfile))[0];
-  open COV, "<$cfile" or die $!;
-  while (my $line = <COV>) {
-    chomp($line);
-    next if ($line =~ m/^all/);
-    my ($chrom,$pos,$end,$name,$depth,$bp,$total,$percent) = split(/\t/,$line);
-    my $region = join(":",$chrom,$pos,$end);
-    $cov{$region}{$depth} = $percent;
-    $covstats{$region}{sumdepth} += $depth*$bp;
-    $exons{$region} = $name;
-    $totalbp{$region} = $total;
-    if ($depth <= 10) {
-      $poor{$region} += $bp;
-    }
-    if ($depth >= 100) {
-      $good{$region} += $bp;
-    }
+my %exons;
+my %totalbp;
+my %good;
+
+open COV, "<$cfile" or die $!;
+while (my $line = <COV>) {
+  chomp($line);
+  next if ($line =~ m/^all/);
+  my ($chrom,$pos,$end,$name,$depth,$bp,$total,$percent) = split(/\t/,$line);
+  my $region = join(":",$chrom,$pos,$end);
+  $cov{$region}{$depth} = $percent;
+  $covstats{$region}{sumdepth} += $depth*$bp;
+  $exons{$region} = $name;
+  $totalbp{$region} = $total;
+  if ($depth >= 100) {
+    $good{$region} += $bp;
   }
-  close COV;
-  foreach $reg (keys %totalbp) {
-    $good{$reg} = 0 unless $good{$reg};
-    $poor{$reg} = 0 unless $poor{$reg};
-    if ($totalbp{$reg} < 1) {
-      next;
-    }
-    my $avgdepth = sprintf("%.0f",$covstats{$reg}{sumdepth}/$totalbp{$reg});
-    my @depths = sort {$a <=> $b} keys %{$cov{$reg}};
-    %covhash = %{$cov{$reg}};
-    my @perc = @covhash{@depths};
-    my @cum_sum = cumsum(@perc);
-    my $mediandep = 0;
-    foreach my $i (0..$#cum_sum) {
-      if ($cum_sum[$i] < 0.5) {
-	$mediandep = $depths[$i];
-      }
-    }
-    $good_fract = sprintf("%.4f",$good{$reg}/$totalbp{$reg});
-    $poor_fract = sprintf("%.4f",$poor{$reg}/$totalbp{$reg});
-    print OUT2 join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$poor_fract,$good{$reg},$poor{$reg},$totalbp{$reg}),"\n";
-    if ($poor_fract> 0.5) {
-      $numsamp{$reg} ++;
-      print MISS join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$poor_fract,$good{$reg},$poor{$reg},$totalbp{$reg}),"\n";
+}
+close COV;
+foreach $reg (keys %totalbp) {
+  $good{$reg} = 0 unless $good{$reg};
+  if ($totalbp{$reg} < 1) {
+    next;
+  }
+  my $avgdepth = sprintf("%.0f",$covstats{$reg}{sumdepth}/$totalbp{$reg});
+  my @depths = sort {$a <=> $b} keys %{$cov{$reg}};
+  my %covhash = %{$cov{$reg}};
+  my @perc = @covhash{@depths};
+  my @cum_sum = cumsum(@perc);
+  my $mediandep = 0;
+  foreach my $i (0..$#cum_sum) {
+    if ($cum_sum[$i] < 0.5) {
+      $mediandep = $depths[$i];
     }
   }
+  $good_fract = sprintf("%.4f",$good{$reg}/$totalbp{$reg});
+  print OUT2 join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$good{$reg},$totalbp{$reg}),"\n";
+  if ($good_fract < 1) {
+    print MISS join("\t",$prefix,split(/:/,$reg),$exons{$reg},$depths[0],$depths[-1],$mediandep,$avgdepth,$good_fract,$good{$reg},$totalbp{$reg}),"\n";
+  }
 }
-foreach $reg (sort {$a cmp $b} keys %numsamp) {
-  print OUT join("\t",split(/:/,$reg),$exons{$reg},$numsamp{$reg}),"\n";
-}
+
+
 sub cumsum {
   my @nums = @_;
   my @cumsum = ();
diff --git a/alignment/markdups.sh b/alignment/markdups.sh
index 793dc96..f040fc0 100644
--- a/alignment/markdups.sh
+++ b/alignment/markdups.sh
@@ -64,7 +64,7 @@ then
     samtools fastq -1 ${pair_id}.consensus.R1.fastq -2 ${pair_id}.consensus.R2.fastq ${pair_id}.consensus.bam
     gzip ${pair_id}.consensus.R1.fastq
     gzip ${pair_id}.consensus.R2.fastq
-    bwa mem -M -C -t 2 -R '@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}' /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam
+    bwa mem -M -C -t 2 -R "@RG\tID:${pair_id}\tLB:tx\tPL:illumina\tPU:barcode\tSM:${pair_id}" /project/shared/bicf_workflow_ref/GRCh38/genome.fa ${pair_id}.consensus.R1.fastq.gz ${pair_id}.consensus.R2.fastq.gz | samtools view -1 - > ${pair_id}.consensus.bam
     samtools sort --threads 10 -o ${pair_id}.dedup.bam ${pair_id}.consensus.bam
 else
     cp ${sbam} ${pair_id}.dedup.bam    
-- 
GitLab