""" Author: John Lafin Email: lafinj@gmail.com This script is built to accept a CSV file with three columns with headers (in order): Target, Sample, and Cq. It expects that the extraction efficiency target is named 'cel-miR-39' and that the loading control target is named 'miR-30b'. It will output one file containing the final dataframe with mean raw Cq, normalized Cq, dCq, ddCq, and Rq values. """ import pandas as pd import sys # read in miRNA panel data # first two columns construct multiindex in_path = sys.argv[1] out_path = in_path.strip('.csv') + '_out.csv' relative = input('What is the name of your control?') data = pd.read_csv(in_path, index_col = ['Target', 'Sample']) # ensure that levels are read in as string instead of int data.index = data.index.set_levels(data.index.levels[0].astype(str), level = 0) data.index = data.index.set_levels(data.index.levels[1].astype(str), level = 1) # convert NANs to Ct = 40 data.fillna(40, inplace = True) # group the data together by target and sample grouped = data.groupby(level = ['Target', 'Sample'], sort=False) # find the mean of each group means = grouped.mean() # Step 1: determine the sample with the lowest cel-miR-39 Cq # and subtract this from all mean cel-miR-39 Cqs cel_norm = means.loc['cel-miR-39'].min() norm_means = means.loc['cel-miR-39'] - cel_norm # Step 2: subtract extraction correction (calculated above) # from all mean miR-30b Cqs and add as a column to data hk_norm = means.loc['miR-30b'] - norm_means means = means.join(hk_norm, rsuffix = "_norm") # Step 3: normalize the GCT biomarker miRNAs to miR-30b means['dCq'] = means['Cq'] - means['Cq_norm'] # Step 4: calculate ddCq and Rq means['ddCq'] = means.apply( lambda x: x['dCq'] - means.xs([x.name[0],'normal'])['dCq'],axis=1) means['Rq'] = 2 ** (-means['ddCq']) # export to file means.sort_index().to_csv(out_path)