From 00dd6e2e4971ebc49243be2b3216c18ed2293d6d Mon Sep 17 00:00:00 2001 From: Venkat Malladi <Venkat.Malladi@utsouthwestern.edu> Date: Thu, 2 Mar 2017 11:33:44 -0600 Subject: [PATCH] Update pysam to account for new syntax. --- rpkm.py | 4 ++-- rpkm_gro.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/rpkm.py b/rpkm.py index bcc842c..77e195a 100755 --- a/rpkm.py +++ b/rpkm.py @@ -47,7 +47,7 @@ def rpkm(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file]) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,,split_lines=True))]) rpkm = peak_counts.each(normalized_to_length, 3, float(math.pow(10,9))/total_counts).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') @@ -61,7 +61,7 @@ def rpkm_strand(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file],s=True) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,,split_lines=True))]) rpkm = peak_counts.each(normalized_to_length, 6, float(math.pow(10,9))/float(total_counts)).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') diff --git a/rpkm_gro.py b/rpkm_gro.py index 8f817eb..ee3c56a 100755 --- a/rpkm_gro.py +++ b/rpkm_gro.py @@ -47,7 +47,8 @@ def rpkm(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file]) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,split_lines=True)]) + print total_counts rpkm = peak_counts.each(normalized_to_length, 3, float(math.pow(10,9))/total_counts).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') @@ -61,7 +62,7 @@ def rpkm_strand(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file],s=True) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,split_lines=True))]) rpkm = peak_counts.each(normalized_to_length, 6, float(math.pow(10,9))/float(total_counts)).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') @@ -112,7 +113,6 @@ def main(): # Write out RPKM matrix filtered_peaks = filtered_rpkm[columns] filtered_rpkm.to_csv(args.factor + '_filtered_peaks.tsv', header=True, index=None, sep='\t') - peak_rpkm_only_sum.to_csv(args.factor + '_sum.tsv', header=True, index=True, sep='\t') pybedtools.BedTool.from_dataframe(filtered_peaks).saveas(args.factor + '_filtered_peaks.bed') -- GitLab