diff --git a/rpkm.py b/rpkm.py index bcc842c39541c84eebd4eed99716f3cf8d68e553..77e195a0b189a145900230b1458d966aae3ea374 100755 --- a/rpkm.py +++ b/rpkm.py @@ -47,7 +47,7 @@ def rpkm(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file]) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,,split_lines=True))]) rpkm = peak_counts.each(normalized_to_length, 3, float(math.pow(10,9))/total_counts).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') @@ -61,7 +61,7 @@ def rpkm_strand(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file],s=True) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,,split_lines=True))]) rpkm = peak_counts.each(normalized_to_length, 6, float(math.pow(10,9))/float(total_counts)).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') diff --git a/rpkm_gro.py b/rpkm_gro.py index 8f817eb801b187995385d3afcbf9548ace19f385..ee3c56aefe43d95716765a94707514f9097b028f 100755 --- a/rpkm_gro.py +++ b/rpkm_gro.py @@ -47,7 +47,8 @@ def rpkm(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file]) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,split_lines=True)]) + print total_counts rpkm = peak_counts.each(normalized_to_length, 3, float(math.pow(10,9))/total_counts).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') @@ -61,7 +62,7 @@ def rpkm_strand(peak_file,aln_file,exp_name,columns): columns.append(exp_name) ## RPKM = numReads / (geneLength/1000 * totalNumReads/1,000,000 ) peak_counts = peak_file.multi_bam_coverage(bams=[aln_file],s=True) - total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file)]) + total_counts = reduce(lambda x, y: x + y, [ int(l.rstrip('\n').split('\t')[2]) for l in pysam.idxstats(aln_file,split_lines=True))]) rpkm = peak_counts.each(normalized_to_length, 6, float(math.pow(10,9))/float(total_counts)).saveas("test.bed") rpkm_df = rpkm.to_dataframe() #os.remove('test.bed') @@ -112,7 +113,6 @@ def main(): # Write out RPKM matrix filtered_peaks = filtered_rpkm[columns] filtered_rpkm.to_csv(args.factor + '_filtered_peaks.tsv', header=True, index=None, sep='\t') - peak_rpkm_only_sum.to_csv(args.factor + '_sum.tsv', header=True, index=True, sep='\t') pybedtools.BedTool.from_dataframe(filtered_peaks).saveas(args.factor + '_filtered_peaks.bed')