Commit c2fde8cf authored by Jiwoong Kim's avatar Jiwoong Kim
Browse files

Initial commit

parents
# SpliceFisher
Multiple Fisher's exact tests for differential alternative splicing detection using RNA-seq data
## Method
![](SpliceFisher.method.png)
1. Counting reads from BAM files
* Exon skipping
* *a*, *b* exon-junction reads
* *c* exon-skipping reads
* *d* exon-mapping reads
* *e* gene-mapping reads
* Intron retention
* *a*, *b* exon-intron reads
* *c* exon-exon reads
* *d* intron-mapping reads
* *e* gene-mapping reads
2. Multiple Fisher's exact tests
* Head: (control *a* / control *c*) / (test *a* / test *c*)
* Tail: (control *b* / control *c*) / (test *b* / test *c*)
* Body: (control *d* / control *e*) / (test *d* / test *e*)
3. Adjustment of p-values by false discovery rate (FDR) method
4. Alternative splice sites (A5SS and A3SS) and Mutually exclusive exons (MXE)
![](SpliceFisher.method.ASS.png)
![](SpliceFisher.method.MXE.png)
## Requirements
1. Perl - https://www.perl.org
2. Perl module "Bio::DB::Sam" - http://search.cpan.org/~lds/Bio-SamTools-1.43/lib/Bio/DB/Sam.pm
3. R (Rscript) - https://www.r-project.org
4. Common linux commands: bash, cut, sort, uniq, ...
## Install
If you already have Git (https://git-scm.com) installed, you can get the latest development version using Git.
```
git clone https://github.com/jiwoongbio/SpliceFisher.git
```
## Usages
1. Preparation of exon/intron regions
```
./prepare.sh <gene.gtf>
```
* Example: Before running the following command, download "Homo_sapiens_UCSC_hg19.tar.gz" from http://support.illumina.com/sequencing/sequencing_software/igenome.html and unzip the file.
```
./prepare.sh Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf
```
* The repository already contains prebuilt exon/intron regions in hg19 (exon.unique.txt, intron.unique.txt).
2. Detection of differential alternative splicing
```
./SpliceFisher.sh <outputPrefix> <alpha> <control.sorted.bam> <test.sorted.bam>
```
* Example: It uses SRA data of https://www.ncbi.nlm.nih.gov/pubmed/24840202. The reads mapped to 11p13 were extracted to generate the input BAM files.
```
./SpliceFisher.sh hnRNPM_KD 0.05 Control_rep1.bam,Control_rep2.bam hnRNPM_KD_rep1.bam,hnRNPM_KD_rep2.bam
```
* Output: exon skipping and intron retention of CD44 (hnRNPM_KD.exon.filtered.txt, hnRNPM_KD.intron.filtered.txt)
![hnRNPM_KD.CD44.png](hnRNPM_KD.CD44.png)
## Citation
Han T, Goralski M, Gaskill N, Capota E, Kim J, Ting TC, Xie Y, Williams NS, Nijhawan D.
"Anticancer sulfonamides target splicing by inducing RBM39 degradation via recruitment to DCAF15"
Science. 2017 Mar 16. pii: eaal3755.
PMID: [28302793](https://www.ncbi.nlm.nih.gov/pubmed/28302793)
args <- commandArgs(TRUE)
table <- read.table(args[1], header = TRUE, sep = "\t", quote = "", comment.char = "", stringsAsFactors = FALSE)
parts <- c("Head", "Tail", "Body")
for(part in parts) {
count.table <- table[, grep(paste("^count", part, sep = ""), colnames(table))]
table[, paste("kruskal_pvalue", part, sep = "")] <- apply(count.table, 1, function(x) {
ratiosList <- lapply(as.list(data.frame(matrix(as.character(unlist(x)), nrow = 2), stringsAsFactors = FALSE)), function(y) {
counts <- as.numeric(unlist(strsplit(y[2], ',')))
ratios <- counts / (counts + as.numeric(unlist(strsplit(y[1], ','))))
ratios[!is.na(ratios)]
})
lengths <- unlist(lapply(ratiosList, length))
if(all(lengths > 0)) {
kruskal.test(unlist(ratiosList), rep(1:length(lengths), lengths))$p.value
} else {
NA
}
})
count.table[, ] <- unlist(lapply(as.character(unlist(count.table)), function(x) {sum(as.numeric(unlist(strsplit(x, ','))))}))
tests <- apply(count.table, 1, function(x) {fisher.test(matrix(x, nrow = 2))})
table[, paste("pvalue", part, sep = "")] <- unlist(lapply(tests, function(x) {x$p.value}))
table[, paste("oddsratio", part, sep = "")] <- unlist(lapply(tests, function(x) {x$estimate}))
table[apply(count.table, 1, function(x) {any(c(rowSums(matrix(x, nrow = 2)), colSums(matrix(x, nrow = 2))) == 0)}), paste(c("pvalue", "oddsratio"), part, sep = "")] <- NA
}
table[, paste("padjust", c("Head", "Tail"), sep = "")] <- p.adjust(data.matrix(table[, paste("pvalue", c("Head", "Tail"), sep = "")]), method = "fdr")
table[, paste("padjust", "Body", sep = "")] <- p.adjust(table[, paste("pvalue", "Body", sep = "")], method = "fdr")
table[, paste("kruskal_padjust", c("Head", "Tail"), sep = "")] <- p.adjust(data.matrix(table[, paste("kruskal_pvalue", c("Head", "Tail"), sep = "")]), method = "fdr")
table[, paste("kruskal_padjust", "Body", sep = "")] <- p.adjust(table[, paste("kruskal_pvalue", "Body", sep = "")], method = "fdr")
table[, "change"] <- 0
table[apply(table[, paste("oddsratio", parts, sep = "")], 1, function(x) {all(!is.na(x) & x > 1)}), "change"] <- -1
table[apply(table[, paste("oddsratio", parts, sep = "")], 1, function(x) {all(!is.na(x) & x < 1)}), "change"] <- 1
colnames <- c("change")
colnames <- c(colnames, apply(expand.grid(c("pvalue", "padjust", "oddsratio", "kruskal_pvalue", "kruskal_padjust"), parts), 1, function(x) {paste(x, collapse = "")}))
colnames <- c(colnames, colnames(table)[grep("^count", colnames(table))])
if("pairType" %in% colnames(table)) {
colnames <- c("chromosome", "start1", "end1", "start2", "end2", "strand", "pairType", "gene", colnames)
} else {
colnames <- c("chromosome", "start", "end", "strand", "gene", colnames)
}
table <- table[, colnames]
write.table(table, file = args[2], quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
# Author: Jiwoong Kim (jiwoongbio@gmail.com)
#!/bin/bash
directory=`dirname $0`
outputPrefix=$1
alpha=$2
bamFiles=${@:3}
if [ -z "$bamFiles" ]; then
echo 'Usage: ./SpliceFisher.sh <outputPrefix> <alpha> <control.sorted.bam> <test.sorted.bam>' 1>&2
exit 1
fi
perl -MBio::DB::Sam -e '' || exit 1
for bamFile in `echo "$bamFiles" | sed 's/,/ /g'`; do
find $bamFile.bai -newer $bamFile > /dev/null || samtools index $bamFile
done
perl $directory/SpliceFisher_gene.pl $directory/exon.unique.txt $bamFiles > $outputPrefix.gene.count.txt
type=exon
perl $directory/SpliceFisher_$type.pl $directory/$type.unique.txt $outputPrefix.gene.count.txt $bamFiles > $outputPrefix.$type.count.txt
Rscript $directory/SpliceFisher.R $outputPrefix.$type.count.txt $outputPrefix.$type.txt
perl $directory/SpliceFisher_filter.pl $outputPrefix.$type.txt $alpha > $outputPrefix.$type.filtered.txt
type=intron
perl $directory/SpliceFisher_$type.pl $directory/$type.unique.txt $outputPrefix.gene.count.txt $bamFiles > $outputPrefix.$type.count.txt
Rscript $directory/SpliceFisher.R $outputPrefix.$type.count.txt $outputPrefix.$type.txt
perl $directory/SpliceFisher_filter.pl $outputPrefix.$type.txt $alpha > $outputPrefix.$type.filtered.txt
type=exon_pair
perl $directory/SpliceFisher_$type.pl $directory/$type.txt $bamFiles > $outputPrefix.$type.count.txt
Rscript $directory/SpliceFisher.R $outputPrefix.$type.count.txt $outputPrefix.$type.txt
perl $directory/SpliceFisher_filter.pl $outputPrefix.$type.txt $alpha > $outputPrefix.$type.filtered.txt
# Author: Jiwoong Kim (jiwoongbio@gmail.com)
use strict;
use warnings;
local $SIG{__WARN__} = sub { die $_[0] };
use Bio::DB::Sam;
use Getopt::Long qw(:config no_ignore_case);
GetOptions(
'h' => \(my $help = ''),
'q=i' => \(my $minimumMappingQuality = 0),
's=s' => \(my $stranded = ''),
);
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl SpliceFisher_exon.pl [options] exon.txt gene.count.txt group1.bam,[...] group2.bam,[...] > exon.count.txt
Options: -h display this help message
-q INT minimum mapping quality [$minimumMappingQuality]
-s stranded, "f" or "r"
EOF
}
my ($exonFile, $geneReadCountFile, @bamFilesList) = @ARGV;
my @samListList = ();
foreach my $bamFiles (@bamFilesList) {
my @samList = map {Bio::DB::Sam->new(-bam => $_)} split(/,/, $bamFiles);
push(@samListList, \@samList);
}
my %geneReadCountListListHash = ();
{
open(my $reader, $geneReadCountFile);
while(my $line = <$reader>) {
chomp($line);
my @tokenList = split(/\t/, $line, -1);
my ($gene, @readCountsList) = @tokenList;
$geneReadCountListListHash{$gene} = [map {[split(/,/, $_)]} @readCountsList];
}
close($reader);
}
my @columnList = ('chromosome', 'start', 'end', 'strand', 'gene');
push(@columnList, map {"count$_->[1]$_->[2]_$_->[0]"} getCombinationList([map {$_ + 1} 0 .. $#samListList], ['Head', 'Tail', 'Body'], [1, 2]));
print join("\t", @columnList), "\n";
open(my $reader, $exonFile);
while(my $line = <$reader>) {
chomp($line);
my @tokenList = split(/\t/, $line, -1);
my ($chromosome, $start, $end, $strand, $gene) = @tokenList;
my @readCountListListList = map {[getExonReadCountListList($chromosome, $start, $end, $strand, @$_)]} @samListList;
my @geneReadCountListList = @{$geneReadCountListListHash{$gene}};
foreach my $index (0 .. $#samListList) {
$readCountListListList[$index]->[5]->[$_] = $geneReadCountListList[$index]->[$_] - $readCountListListList[$index]->[4]->[$_] foreach(0 .. $#{$samListList[$index]});
}
print join("\t", $chromosome, $start, $end, $strand, $gene, (map {join(',', @$_)} map {@$_} @readCountListListList)), "\n";
}
close($reader);
sub getExonReadCountListList {
my ($chromosome, $start, $end, $strand, @samList) = @_;
my @readCountListList = ();
foreach my $index (0 .. $#samList) {
$readCountListList[$_]->[$index] = 0 foreach(0 .. 3);
my %readCountHash = ();
foreach my $alignment ($samList[$index]->get_features_by_location(-seq_id => $chromosome, -start => $start, -end => $end)) {
next if($alignment->qual < $minimumMappingQuality);
next if($stranded ne '' && getReadStrand($alignment->flag) ne $strand);
my @junctionStartEndList = ($alignment->cigar_str =~ /[0-9]+N/) ? getJunctionStartEndList($alignment->start, $alignment->cigar_str) : ();
if(grep {$_->[0] <= $start && $end <= $_->[1]} @junctionStartEndList) {
$readCountListList[1]->[$index] += 1;
$readCountListList[3]->[$index] += 1;
} else {
$readCountListList[0]->[$index] += 1 if(grep {$_->[1] == $start - 1} @junctionStartEndList);
$readCountListList[2]->[$index] += 1 if(grep {$_->[0] == $end + 1} @junctionStartEndList);
}
$readCountHash{$alignment->qname} += 1 unless(grep {$_->[0] <= $start && $end <= $_->[1]} @junctionStartEndList);
}
$readCountListList[4]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
}
@readCountListList[0, 1, 2, 3] = @readCountListList[2, 3, 0, 1] if($strand eq '-');
return @readCountListList;
}
sub getJunctionStartEndList {
my ($position, $cigar) = @_;
my @junctionStartEndList = ();
while($cigar =~ s/^([0-9]+)([MIDNSHP=X])//) {
my ($length, $operation) = ($1, $2);
$position += $length if($operation eq 'M');
$position += $length if($operation eq 'D');
if($operation eq 'N') {
push(@junctionStartEndList, [$position, $position + $length - 1]);
$position += $length;
}
}
return @junctionStartEndList;
}
sub getReadStrand {
my ($flag) = @_;
if($stranded eq 'f') {
if($flag & 1) {
return '+' if(grep {$_ == $flag} (99, 147));
return '-' if(grep {$_ == $flag} (83, 163));
} else {
return '+' if($flag == 0);
return '-' if($flag == 16);
}
}
if($stranded eq 'r') {
if($flag & 1) {
return '+' if(grep {$_ == $flag} (83, 163));
return '-' if(grep {$_ == $flag} (99, 147));
} else {
return '+' if($flag == 16);
return '-' if($flag == 0);
}
}
return '';
}
sub getCombinationList {
return if(scalar(@_) == 0);
return map {[$_]} @{$_[0]} if(scalar(@_) == 1);
my @combinationList = ();
foreach my $element (@{$_[0]}) {
push(@combinationList, map {[$element, @$_]} getCombinationList(@_[1 .. $#_]));
}
return @combinationList;
}
# Author: Jiwoong Kim (jiwoongbio@gmail.com)
use strict;
use warnings;
local $SIG{__WARN__} = sub { die $_[0] };
use Bio::DB::Sam;
use Getopt::Long qw(:config no_ignore_case);
GetOptions(
'h' => \(my $help = ''),
'q=i' => \(my $minimumMappingQuality = 0),
's=s' => \(my $stranded = ''),
);
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl SpliceFisher_exon_pair.pl [options] exon_pair.txt group1.bam,[...] group2.bam,[...] > exon_pair.count.txt
Options: -h display this help message
-q INT minimum mapping quality [$minimumMappingQuality]
-s stranded, "f" or "r"
EOF
}
my ($exonPairFile, @bamFilesList) = @ARGV;
my @samListList = ();
foreach my $bamFiles (@bamFilesList) {
my @samList = map {Bio::DB::Sam->new(-bam => $_)} split(/,/, $bamFiles);
push(@samListList, \@samList);
}
my @columnList = ('chromosome', 'start1', 'end1', 'start2', 'end2', 'strand', 'pairType', 'gene');
push(@columnList, map {"count$_->[1]$_->[2]_$_->[0]"} getCombinationList([map {$_ + 1} 0 .. $#samListList], ['Head', 'Tail', 'Body'], [1, 2]));
print join("\t", @columnList), "\n";
open(my $reader, $exonPairFile);
while(my $line = <$reader>) {
chomp($line);
my @tokenList = split(/\t/, $line, -1);
my ($chromosome, $start1, $end1, $start2, $end2, $strand, $pairType, $gene) = @tokenList;
my @readCountListListList = map {[getExonPairReadCountListList($chromosome, $start1, $end1, $start2, $end2, $strand, $pairType, @$_)]} @samListList;
print join("\t", $chromosome, $start1, $end1, $start2, $end2, $strand, $pairType, $gene, (map {join(',', @$_)} map {@$_} @readCountListListList)), "\n";
}
close($reader);
sub getExonPairReadCountListList {
my ($chromosome, $start1, $end1, $start2, $end2, $strand, $pairType, @samList) = @_;
if($pairType eq 'MXE') { # alternative exons
($start1, $end1, $start2, $end2) = ($start2, $end2, $start1, $end1) if($strand eq '-');
} elsif(($pairType eq 'A5SS' && $strand eq '+') || ($pairType eq 'A3SS' && $strand eq '-')) { # alternative ends
$start2 = $end1 + 1;
} elsif(($pairType eq 'A5SS' && $strand eq '-') || ($pairType eq 'A3SS' && $strand eq '+')) { # alternative starts
$end1 = $start2 - 1;
}
my @readCountListList = ();
foreach my $index (0 .. $#samList) {
$readCountListList[$_]->[$index] = 0 foreach(0 .. 3);
{
my %readCountHash = ();
foreach my $alignment ($samList[$index]->get_features_by_location(-seq_id => $chromosome, -start => $start1, -end => $end1)) {
next if($alignment->qual < $minimumMappingQuality);
next if($stranded ne '' && getReadStrand($alignment->flag) ne $strand);
my @junctionStartEndList = ($alignment->cigar_str =~ /[0-9]+N/) ? getJunctionStartEndList($alignment->start, $alignment->cigar_str) : ();
if($pairType eq 'MXE') { # alternative exons
$readCountListList[0]->[$index] += 1 if(grep {$_->[1] == $start1 - 1} @junctionStartEndList); # upstream junction
$readCountListList[2]->[$index] += 1 if(grep {$_->[0] == $end1 + 1} @junctionStartEndList); # downstream junction
} elsif(($pairType eq 'A5SS' && $strand eq '+') || ($pairType eq 'A3SS' && $strand eq '-')) { # alternative ends
$readCountListList[1]->[$index] += 1 if(grep {$_->[0] == $end1 + 1} @junctionStartEndList);
$readCountListList[2]->[$index] += 1 if(scalar(@junctionStartEndList) == 0 && $alignment->end > $end1);
} elsif(($pairType eq 'A5SS' && $strand eq '-') || ($pairType eq 'A3SS' && $strand eq '+')) { # alternative starts
$readCountListList[0]->[$index] += 1 if(grep {$_->[1] == $start1 - 1} @junctionStartEndList);
}
$readCountHash{$alignment->qname} += 1 unless(grep {$_->[0] <= $start1 && $end1 <= $_->[1]} @junctionStartEndList);
}
if($pairType eq 'MXE') { # alternative exons
$readCountListList[4]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
} elsif(($pairType eq 'A5SS' && $strand eq '+') || ($pairType eq 'A3SS' && $strand eq '-')) { # alternative ends
$readCountListList[3]->[$index] = $readCountListList[1]->[$index];
$readCountListList[5]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
} elsif(($pairType eq 'A5SS' && $strand eq '-') || ($pairType eq 'A3SS' && $strand eq '+')) { # alternative starts
$readCountListList[4]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
}
}
{
my %readCountHash = ();
foreach my $alignment ($samList[$index]->get_features_by_location(-seq_id => $chromosome, -start => $start2, -end => $end2)) {
next if($alignment->qual < $minimumMappingQuality);
next if($stranded ne '' && getReadStrand($alignment->flag) ne $strand);
my @junctionStartEndList = ($alignment->cigar_str =~ /[0-9]+N/) ? getJunctionStartEndList($alignment->start, $alignment->cigar_str) : ();
if($pairType eq 'MXE') { # alternative exons
$readCountListList[1]->[$index] += 1 if(grep {$_->[1] == $start2 - 1} @junctionStartEndList); # upstream junction
$readCountListList[3]->[$index] += 1 if(grep {$_->[0] == $end2 + 1} @junctionStartEndList); # downstream junction
} elsif(($pairType eq 'A5SS' && $strand eq '+') || ($pairType eq 'A3SS' && $strand eq '-')) { # alternative ends
$readCountListList[0]->[$index] += 1 if(grep {$_->[0] == $end2 + 1} @junctionStartEndList);
} elsif(($pairType eq 'A5SS' && $strand eq '-') || ($pairType eq 'A3SS' && $strand eq '+')) { # alternative starts
$readCountListList[1]->[$index] += 1 if(grep {$_->[1] == $start2 - 1} @junctionStartEndList);
$readCountListList[2]->[$index] += 1 if(scalar(@junctionStartEndList) == 0 && $alignment->start < $start2);
}
$readCountHash{$alignment->qname} += 1 unless(grep {$_->[0] <= $start2 && $end2 <= $_->[1]} @junctionStartEndList);
}
if($pairType eq 'MXE') { # alternative exons
$readCountListList[5]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
} elsif(($pairType eq 'A5SS' && $strand eq '+') || ($pairType eq 'A3SS' && $strand eq '-')) { # alternative ends
$readCountListList[4]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
} elsif(($pairType eq 'A5SS' && $strand eq '-') || ($pairType eq 'A3SS' && $strand eq '+')) { # alternative starts
$readCountListList[3]->[$index] = $readCountListList[1]->[$index];
$readCountListList[5]->[$index] = scalar(grep {$readCountHash{$_} > 0} keys %readCountHash);
}
}
}
if($pairType eq 'MXE') { # alternative exons
@readCountListList[0, 1, 2, 3] = @readCountListList[2, 3, 0, 1] if($strand eq '-');
}
return @readCountListList;
}
sub getJunctionStartEndList {
my ($position, $cigar) = @_;
my @junctionStartEndList = ();
while($cigar =~ s/^([0-9]+)([MIDNSHP=X])//) {
my ($length, $operation) = ($1, $2);
$position += $length if($operation eq 'M');
$position += $length if($operation eq 'D');
if($operation eq 'N') {
push(@junctionStartEndList, [$position, $position + $length - 1]);
$position += $length;
}
}
return @junctionStartEndList;
}
sub getReadStrand {
my ($flag) = @_;
if($stranded eq 'f') {
if($flag & 1) {
return '+' if(grep {$_ == $flag} (99, 147));
return '-' if(grep {$_ == $flag} (83, 163));
} else {
return '+' if($flag == 0);
return '-' if($flag == 16);
}
}
if($stranded eq 'r') {
if($flag & 1) {
return '+' if(grep {$_ == $flag} (83, 163));
return '-' if(grep {$_ == $flag} (99, 147));
} else {
return '+' if($flag == 16);
return '-' if($flag == 0);
}
}
return '';
}
sub getCombinationList {
return if(scalar(@_) == 0);
return map {[$_]} @{$_[0]} if(scalar(@_) == 1);
my @combinationList = ();
foreach my $element (@{$_[0]}) {
push(@combinationList, map {[$element, @$_]} getCombinationList(@_[1 .. $#_]));
}
return @combinationList;
}
# Author: Jiwoong Kim (jiwoongbio@gmail.com)
use strict;
use warnings;
local $SIG{__WARN__} = sub { die $_[0] };
use List::Util qw(all);
use Getopt::Long qw(:config no_ignore_case);
GetOptions(
'h' => \(my $help = ''),
'r' => \(my $raw = ''),
'k' => \(my $kruskal = ''),
'H' => \(my $headOnly = ''),
'B' => \(my $bodyOnly = ''),
'T' => \(my $tailOnly = ''),
);
my $prefix = $raw ? 'pvalue' : 'padjust';
$prefix = "kruskal_$prefix" if($kruskal);
my @targetList = ();
push(@targetList, "${prefix}Head") if($headOnly);
push(@targetList, "${prefix}Body") if($bodyOnly);
push(@targetList, "${prefix}Tail") if($tailOnly);
if(all {$_ eq ''} ($headOnly, $bodyOnly, $tailOnly)) {
push(@targetList, "${prefix}Head");
push(@targetList, "${prefix}Body");
push(@targetList, "${prefix}Tail");
}
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl SpliceFisher_filter.pl [options] output.txt alpha > output.filtered.txt
Options: -h display this help message
-r use raw p-values instead of FDR-adjusted p-values
-k use Kruskal-Wallis rank sum test p-values
-H use head p-values only
-B use body p-values only
-T use tail p-values only
EOF
}
my ($inputFile, $alpha) = @ARGV;
open(my $reader, $inputFile);
chomp(my $line = <$reader>);
my @columnList = split(/\t/, $line);
print join("\t", @columnList), "\n";
while(my $line = <$reader>) {
chomp($line);
my %tokenHash = ();
@tokenHash{@columnList} = split(/\t/, $line);
if(all {$_ ne "NA" && $_ < $alpha} @tokenHash{@targetList}) {
print join("\t", @tokenHash{@columnList}), "\n";
}
}
close($reader);
# Author: Jiwoong Kim (jiwoongbio@gmail.com)
use strict;
use warnings;
local $SIG{__WARN__} = sub { die $_[0] };
use Bio::DB::Sam;
use Getopt::Long qw(:config no_ignore_case);
GetOptions(
'h' => \(my $help = ''),
'q=i' => \(my $minimumMappingQuality = 0),
's=s' => \(my $stranded = ''),
);
if($help || scalar(@ARGV) == 0) {
die <<EOF;
Usage: perl SpliceFisher_gene.pl [options] exon.txt group1.bam,[...] group2.bam,[...] > gene.count.txt
Options: -h display this help message
-q INT minimum mapping quality [$minimumMappingQuality]
-s stranded, "f" or "r"
EOF
}
my ($exonFile, @bamFilesList) = @ARGV;
my @samListList = ();
foreach my $bamFiles (@bamFilesList) {
my @samList = map {Bio::DB::Sam->new(-bam => $_)} split(/,/, $bamFiles);
push(@samListList, \@samList);
}
{
my ($previousGene, @chromosomeStartEndStrandList) = ('');
open(my $reader, "sort --field-separator='\t' -k5 $exonFile |");
while(my $line = <$reader>) {
chomp($line);
my @tokenList = split(/\t/, $line, -1);
my ($chromosome, $start, $end, $strand, $gene) = @tokenList;
if($gene ne $previousGene) {
print join("\t", $previousGene, getReadCountsList(@chromosomeStartEndStrandList)), "\n" if($previousGene ne '');
($previousGene, @chromosomeStartEndStrandList) = ($gene);
}
push(@chromosomeStartEndStrandList, [$chromosome, $start, $end, $strand]);
}
close($reader);
print join("\t", $previousGene, getReadCountsList(@chromosomeStartEndStrandList)), "\n" if($previousGene ne '');
}
<