You need to sign in or sign up before continuing.
Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Author: John Lafin
Email: lafinj@gmail.com
This script is built to accept a CSV file
with three columns with headers (in order): Target, Sample, and Cq.
It expects that the extraction efficiency target is named 'cel-miR-39'
and that the loading control target is named 'miR-30b'.
It will output one file containing the final dataframe with mean
raw Cq, normalized Cq, dCq, ddCq, and Rq values.
"""
import pandas as pd
import sys
# read in miRNA panel data
# first two columns construct multiindex
in_path = sys.argv[1]
out_path = in_path.strip('.csv') + '_out.csv'
relative = input('What is the name of your control?')
data = pd.read_csv(in_path, index_col = ['Target', 'Sample'])
# ensure that levels are read in as string instead of int
data.index = data.index.set_levels(data.index.levels[0].astype(str), level = 0)
data.index = data.index.set_levels(data.index.levels[1].astype(str), level = 1)
# convert NANs to Ct = 40
data.fillna(40, inplace = True)
# group the data together by target and sample
grouped = data.groupby(level = ['Target', 'Sample'], sort=False)
# find the mean of each group
means = grouped.mean()
# Step 1: determine the sample with the lowest cel-miR-39 Cq
# and subtract this from all mean cel-miR-39 Cqs
cel_norm = means.loc['cel-miR-39'].min()
norm_means = means.loc['cel-miR-39'] - cel_norm
# Step 2: subtract extraction correction (calculated above)
# from all mean miR-30b Cqs and add as a column to data
hk_norm = means.loc['miR-30b'] - norm_means
means = means.join(hk_norm, rsuffix = "_norm")
# Step 3: normalize the GCT biomarker miRNAs to miR-30b
means['dCq'] = means['Cq'] - means['Cq_norm']
# Step 4: calculate ddCq and Rq
means['ddCq'] = means.apply(
lambda x: x['dCq'] - means.xs([x.name[0],'normal'])['dCq'],axis=1)
means['Rq'] = 2 ** (-means['ddCq'])
# export to file
means.sort_index().to_csv(out_path)