An error occurred while loading the file. Please try again.
-
Gervaise Henry authoredc3053816
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
tin_hist.py 1.98 KiB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#!/usr/bin/env python3
import argparse
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument(
'-r', '--repRID', help="The replicate RID.", required=True)
args = parser.parse_args()
return args
def main():
args = get_args()
tin = pd.read_csv(args.repRID + '_sorted.deduped.tin.xls',
sep="\t", header=0)
hist = pd.cut(tin['TIN'], bins=pd.interval_range(
start=0, freq=10, end=100, closed='right')).value_counts(sort=False)
labels = ["{0} - {1}".format(i, i + 9) for i in range(1, 100, 10)]
#labels[0] = '0 - 10'
binned = tin.assign(Bins=lambda x: pd.cut(tin['TIN'], range(
0, 105, 10), labels=labels, include_lowest=False, right=True))
binned['chrom'] = binned['chrom'] = binned['chrom'].replace(
'chr1', 'chr01')
binned['chrom'] = binned['chrom'].replace('chr2', 'chr02')
binned['chrom'] = binned['chrom'].replace('chr3', 'chr03')
binned['chrom'] = binned['chrom'].replace('chr4', 'chr04')
binned['chrom'] = binned['chrom'].replace('chr5', 'chr05')
binned['chrom'] = binned['chrom'].replace('chr6', 'chr06')
binned['chrom'] = binned['chrom'].replace('chr7', 'chr07')
binned['chrom'] = binned['chrom'].replace('chr8', 'chr08')
binned['chrom'] = binned['chrom'].replace('chr9', 'chr09')
hist = pd.pivot_table(binned, values='geneID',
index='Bins', columns='chrom', aggfunc=np.size)
hist['TOTAL'] = hist.sum(axis=1)
hist = hist[['TOTAL'] + [i for i in hist.columns if i != 'TOTAL']]
hist = hist.T.fillna(0.0).astype(int)
#hist = hist.apply(lambda x: x/x.sum()*100, axis=1)
hist.to_csv(args.repRID + '_tin.hist.tsv', sep='\t')
medFile = open(args.repRID + '_tin.med.csv', "w")
medFile.write(str(round(tin['TIN'][(tin['TIN'] != 0)].median(), 2)))
medFile.close()
if __name__ == '__main__':
main()