Update pysam to account for new syntax.

00dd6e2e · Venkat Malladi · cd3784ba · 00dd6e2e · 00dd6e2e
Commit 00dd6e2e authored 8 years ago by Venkat Malladi
--- a/rpkm.py
+++ b/rpkm.py
@@ -47,7 +47,7 @@ def rpkm(peak_file,aln_file,exp_name,columns):
    columns.append(exp_name)
    ## RPKM  =   numReads / (geneLength/1000 * totalNumReads/1,000,000 )
    peak_counts = peak_file.multi_bam_coverage(bams=[aln_file])
-    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)])
+    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,,split_lines=True))])
    rpkm = peak_counts.each(normalized_to_length, 3, float(math.pow(10,9))/total_counts).saveas("test.bed")
    rpkm_df = rpkm.to_dataframe()
    #os.remove('test.bed')
@@ -61,7 +61,7 @@ def rpkm_strand(peak_file,aln_file,exp_name,columns):
    columns.append(exp_name)
    ## RPKM  =   numReads / (geneLength/1000 * totalNumReads/1,000,000 )
    peak_counts = peak_file.multi_bam_coverage(bams=[aln_file],s=True)
-    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)])
+    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,,split_lines=True))])
    rpkm = peak_counts.each(normalized_to_length, 6, float(math.pow(10,9))/float(total_counts)).saveas("test.bed")
    rpkm_df = rpkm.to_dataframe()
    #os.remove('test.bed')

--- a/rpkm_gro.py
+++ b/rpkm_gro.py
@@ -47,7 +47,8 @@ def rpkm(peak_file,aln_file,exp_name,columns):
    columns.append(exp_name)
    ## RPKM  =   numReads / (geneLength/1000 * totalNumReads/1,000,000 )
    peak_counts = peak_file.multi_bam_coverage(bams=[aln_file])
-    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)])
+    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,split_lines=True)])
+    print total_counts
    rpkm = peak_counts.each(normalized_to_length, 3, float(math.pow(10,9))/total_counts).saveas("test.bed")
    rpkm_df = rpkm.to_dataframe()
    #os.remove('test.bed')
@@ -61,7 +62,7 @@ def rpkm_strand(peak_file,aln_file,exp_name,columns):
    columns.append(exp_name)
    ## RPKM  =   numReads / (geneLength/1000 * totalNumReads/1,000,000 )
    peak_counts = peak_file.multi_bam_coverage(bams=[aln_file],s=True)
-    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)])
+    total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,split_lines=True))])
    rpkm = peak_counts.each(normalized_to_length, 6, float(math.pow(10,9))/float(total_counts)).saveas("test.bed")
    rpkm_df = rpkm.to_dataframe()
    #os.remove('test.bed')
@@ -112,7 +113,6 @@ def main():
    # Write out RPKM matrix
    filtered_peaks = filtered_rpkm[columns]
    filtered_rpkm.to_csv(args.factor + '_filtered_peaks.tsv', header=True, index=None, sep='\t')
-    peak_rpkm_only_sum.to_csv(args.factor + '_sum.tsv', header=True, index=True, sep='\t')
    pybedtools.BedTool.from_dataframe(filtered_peaks).saveas(args.factor + '_filtered_peaks.bed')