#!/usr/bin/perl -w
#parse_alignment_stats.pl


my @files = @ARGV;
open OUT, ">alignment.summary.txt" or die $!;
print OUT join("\t",'Sample','Total','R1.map','R2.map','Mapping.Rate','Concordant.Rate'),"\n";
chomp(@files);
foreach my $file (@files) {
  chomp($file);
  my @directory = split(/\//, $file);
  $prefix = $directory[-1];
  my $sample = (split(/\./,$prefix))[0];
  $hisatfile = $file;
  $hisatfile =~ s/flagstat/alignerout/;
  open HISAT, "<$hisatfile";
  my ($total, $pairs,$read1ct,$read2ct,$maprate,$concorrate);
  while (my $line = <HISAT>) {
    chomp($line);
    if ($line =~ m/(\d+) reads; of these:/) {
      $total = $1;
    }elsif ($line =~ m/Number of input reads\s+\|\s+(\d+)/) {
	$total = $1;
    }elsif ($line =~ m/(\d+) \S+ were paired; of these:/) {
      $pairs = $1;
      $total = $pairs*2 if ($total == $pairs);
    }elsif ($line =~ m/(\d+) pairs aligned concordantly 0 times; of these:/) {
      $concorrate = sprintf("%.4f",1-($1/$pairs));
    }elsif ($line =~ m/(\S+)% overall alignment rate/) {
      $maprate = $1/100;
    }
  }
  close HISAT;
  open IN, "<$file" or die $!;
  while ($line = <IN>) {
    chomp($line);
    if ($line =~ m/(\d+) \+ \d+ in total/) {
      $total = $1 unless $total;
    }elsif ($line =~ m/(\d+) \+ \d+ read1/) {
      $pairs = $1;
    }elsif ($line =~ m/(\d+) \+ \d+ read2/) {
      $read2ct = $1;
    }elsif ($line =~ m/(\d+) \+ \d+ mapped\s+\((\S+)%.+\)/) {
      $maprate = sprintf("%.2f",$2/100) unless ($maprate);
    }elsif ($line =~ m/(\d+) \+ \d+ properly paired\s+\((\S+)%.+\)/) {
      $concorrate = sprintf("%.2f",$2/100) unless ($concorrate);
    }
  }
  close IN;
  $total = $pairs*2 if ($total == $pairs);
  print OUT join("\t",$sample,$total, $pairs,$read2ct,$maprate,$concorrate),"\n";
}