Skip to content
Snippets Groups Projects
Commit 6f140af3 authored by Vishruth Mullapudi's avatar Vishruth Mullapudi
Browse files

added fragment modification analysis and output

added test for multiple abundance columns
removed printing for debugging
parent 6ba13dac
1 merge request!1Fix unlocalized peptide mods not adding
......@@ -13,10 +13,9 @@ title="Abundance Parser Configuration"
#input_files=[ "/input/file1",
# path to file2
# ]
input_files="/input/test.csv"
input_files=[]
#fasta file(s) containing the protein sequence(s) to align against
prot_seq_fasta=["/data/1N4RP301STau.fasta","/data/2N4R_wt_tau.fasta"]
prot_seq_fasta=[]
[output]
......@@ -24,33 +23,31 @@ title="Abundance Parser Configuration"
output_directory="output/"
#this stub is prepended by the file name of the input file
output_name_stub="residueModificationAnalysis"
residue_output_name_stub="residueModificationAnalysis"
peptide_output_name_stub='peptideModificationAnalysis'
#Configuration of parser settings[regex]
regex_file="data/parser_regex.toml"
regex_to_use="phosphoregex"
#Configuration of parser settings
[parser_config]
sequence_column_title="Annotated Sequence"
#If false, files will be specified via abundance column
using_fileID_column=false
#Title of column containing abundance. If using_fileID_column is set to
#true, only the first Abundance column specified below will be used
abundance_col_titles=["Abundance","Abundance: F1"]
#true, only the first entry will be used
abundance_col_titles=[]
calculate_peptide_modifications=true
[parser_config.regex]
regex_file="data/parser_regex.toml"
mod_parsing_regex="phosphoregex"
pos_master_regex="pos_master_regex"
[parser_config.master]
#Configs for use of pre-localized modification in master protein in
#parsing
use=false
#the name in the data of the master protein
master_protein_name= 'P10636-8'
#The id of the master protein as it appears in the fasta data
master_protein_fasta_ID='sp|P10636-8|TAU_HUMAN'
#Column header containing the title of the column containing the
#localized modifications
modification_header="Modifications in Master Proteins"
[regex]
regex_file="data/parser_regex.toml"
regex_to_use="phosphoregex"
modification_header="Modifications in Master Proteins"
\ No newline at end of file
......@@ -43,10 +43,10 @@ def main():
# todo implement fileID splitting (pre-processing step?)
# if the file_id column is used we expect to see only one column of abundances
using_file_id_column = configuration['parser_config']['using_fileID_column']
# todo implement
should_calculate_peptide_modifications = True # configuration['parser_config']['calculate_peptide_modifications']
if should_calculate_peptide_modifications:
peptide_outpput_name_stub = configuration['output']['peptide_output_name_stub']
peptide_output_name_stub = configuration['output']['peptide_output_name_stub']
# End of configuration reading--------------------------------------------------------------------------------------
......@@ -74,7 +74,7 @@ def main():
# provided in the configuration
if use_mod_in_master_prot: # localize using the master protein positions and modifications
file_headers_tuple: Tuple[FileTuple, Dict[str, str], Dict[str, str]] = \
parsemasterlocalizations(ftuple, master_protein_fasta_id, mod_regex, master_regex)
parse_masterlocalizations(ftuple, master_protein_fasta_id, mod_regex, master_regex)
else: # localize by aligning the fragment against a protein and looking at the modification index within the
# fragment
file_headers_tuple: Tuple[FileTuple, Dict[str, str], Dict[str, str]] = \
......@@ -90,24 +90,32 @@ def main():
# Calculate the residue and modification abundances of each input against each desired protein----------------------
residue_analysis_all_prot: List[Dict[Dict]] = []
peptide_analysis_all_prot: List[Dict[Dict]] = []
for ftuple in localized_data:
# for residue modification analysis, calculate the amount each residue is modified
residue_analysis_all_prot.append(calc_residue_mod_abundances(ftuple, modification_localization_col_titles,
frag_localization_col_titles, abundance_col_titles,
protein_seq_records))
# for peptide modification analysis, calculate the amount each peptide fragment is modified
if should_calculate_peptide_modifications:
calc_peptide_mod_abundances(ftuple, modification_localization_col_titles, frag_localization_col_titles,
abundance_col_titles)
peptide_analysis_all_prot.append(calc_peptide_mod_abundances(ftuple, modification_localization_col_titles,
frag_localization_col_titles,
abundance_col_titles, protein_seq_records))
# End of abundance calculations-------------------------------------------------------------------------------------
# output the abundance data-----------------------------------------------------------------------------------------
# TODO peptide modification output
for prot_residue_analysis in residue_analysis_all_prot:
for protID, sample_analysis in prot_residue_analysis.items():
for fileID, abundance_array in sample_analysis.items():
output_residue_analysis_data(protID, fileID, abundance_array, output_directory,
for prot_id, sample_analysis in prot_residue_analysis.items():
for file_id, abundance_array in sample_analysis.items():
output_residue_analysis_data(prot_id, file_id, abundance_array, output_directory,
residue_output_name_stub)
for prot_peptide_analysis in peptide_analysis_all_prot:
for file_id, prot_analysis in prot_peptide_analysis.items():
for prot_id, fragment_list in prot_analysis.items():
output_peptide_analysis_data(prot_id, file_id, fragment_list, output_directory,
peptide_output_name_stub)
# End of data output------------------------------------------------------------------------------------------------
# End of program
return
......@@ -116,8 +124,8 @@ def main():
def gen_raw_sequences(ftuple: FileTuple) -> FileTuple:
"""
Adds a column to the DataFrame containing a stripped down peptide without the cleavage annotations
:param ftuple: The FileTuple to containing the filedata DataFrame to generate the raw sequence of each fragment
:return: a FileTuple containing the fileID and a filedata DataFrame containing the raw sequence in the
:param ftuple: The FileTuple to containing the FileData DataFrame to generate the raw sequence of each fragment
:return: a FileTuple containing the fileID and a FileData DataFrame containing the raw sequence in the
'stripped_sequence' column
"""
file_data: pd.DataFrame = ftuple.FileData # Extract the DataFrame
......@@ -128,7 +136,7 @@ def gen_raw_sequences(ftuple: FileTuple) -> FileTuple:
return FileTuple(ftuple.FileName, file_data)
def parsemasterlocalizations(ftuple: FileTuple, master_prot_fasta_id, mod_regex, pos_master_regex) -> Tuple[
def parse_masterlocalizations(ftuple: FileTuple, master_prot_fasta_id, mod_regex, pos_master_regex) -> Tuple[
FileTuple, Dict[str, str], Dict[str, str]]:
"""
Parses the modification and fragment localizations from a DataFrame using the positions in master and modifications
......@@ -293,43 +301,53 @@ def calc_residue_mod_abundances(ftuple, localization_col_titles: Dict, frag_loca
for mod in mod_localization:
# add the abundance to each modified residue contained in the fragment
res_abundances[mod][0] += frag_abundance
assert (res_abundances[i][0] <= res_abundances[i][1] for i in range(0, len(res_abundances)))
all_prot_abundances[res_abundance_col_title][abundance_col_title] = res_abundances
return all_prot_abundances
# todo
def calc_peptide_mod_abundances(ftuple, mod_localization_col_titles, frag_localization_col_titles,
abundance_col_titles):
abundance_col_titles, protein_seqrecords):
df = ftuple.FileData
fdata = df.fillna(0)
# Group all fragments of the same sequence into their own groups.
# group each peptide fragment by the sequence
grouped = fdata.groupby('stripped_sequence')
frag_mods = {}
# Calc the abundances for each protein
for mod_col_title_tuple, frag_col_title_tuple, abundance_col_title in \
zip(mod_localization_col_titles.items(), frag_localization_col_titles.items(), abundance_col_titles):
prot_id = mod_col_title_tuple[0]
mod_col_title = mod_col_title_tuple[1]
abundance_col_title = sanitize_str_for_dataframe_index(abundance_col_title)
frag_abundances = np.zeros((len(grouped), 2),
dtype=float)
frag_mods_per_prot = []
i = 0
for frag_sequence, fragment_group in grouped:
frag_abundances[i][1] = fragment_group[abundance_col_title].sum()
for fragment in fragment_group.iterrows():
fragment = fragment[1]
if fragment[mod_col_title]:
frag_abundances[i][0] += fragment[abundance_col_title]
prot_frag_mod_tuple = (prot_id, abundance_col_title, frag_sequence, fragment_group[frag_col_title_tuple[1]] \
.iloc[0], frag_abundances[0], frag_abundances[1])
print(prot_frag_mod_tuple)
i += 1
return frag_mods
sample_frag_abundances = {}
for abund_col_title in abundance_col_titles:
abund_col_title = sanitize_str_for_dataframe_index(abund_col_title)
sample_prot_frag_abundances = {}
for prot_id in protein_seqrecords:
prot_frag_abundances = []
mod_col_title = mod_localization_col_titles[prot_id]
frag_loc_col_title = frag_localization_col_titles[prot_id]
for frags_same_seq in grouped:
# a dd all the abundances for fragments with the same sequence
frag_abundance = frags_same_seq[1][abund_col_title].sum()
# ID all the fragments with modifications
bools = []
for mods in frags_same_seq[1][mod_col_title]:
if mods:
if -1 not in mods:
bools.append(True)
else:
bools.append(False)
else:
bools.append(False)
# add the abundances for all fragments with modifications
mod_abundance = frags_same_seq[1].loc[bools, abund_col_title].sum()
assert (mod_abundance <= frag_abundance)
frag_localization_tuple = frags_same_seq[1][frag_loc_col_title].iloc[0][0]
if -1 in frag_localization_tuple:
prot_frag_abundances.append((frags_same_seq[0], -1, -1, mod_abundance,
frag_abundance))
else:
prot_frag_abundances.append((frags_same_seq[0], frag_localization_tuple[0],
frag_localization_tuple[1], mod_abundance, frag_abundance))
sample_prot_frag_abundances.update({prot_id: prot_frag_abundances})
sample_frag_abundances.update({abund_col_title: sample_prot_frag_abundances})
return sample_frag_abundances
def ingest_file_data(files: List[str]) -> List[FileTuple]:
......@@ -385,18 +403,29 @@ def output_residue_analysis_data(prot_id, fileid, abundance_array, output_direct
writer = csv.writer(outfile)
writer.writerow(["Residue #:", "Modification Abundance", "Residue Abundance", "Modification Proportion"])
for i, arr in enumerate(abundance_array):
# will throw an Runtime warning-invalid value warning when dividing by 0 and print nan
# this is fine, although you could check if arr[1] is 0 and do something if this is not desired
if arr[0] is not 0 or float('NaN') and arr[1] is not 0 or float('NaN'):
writer.writerow([i, arr[0], arr[1], (arr[0] / arr[1])])
if (arr[0] != float('NaN')) and (arr[1] != 0) and (arr[1] != float('NaN')):
mod_prop = arr[0] / arr[1]
writer.writerow([i, arr[0], arr[1], mod_prop])
else:
writer.writerow([i, arr[0], arr[1], float("nan")])
return
# todo implement
def output_peptide_analysis_data():
pass
def output_peptide_analysis_data(prot_id, file_id, fragment_list, output_directory, peptide_output_name_stub):
filename = output_directory + file_id + prot_id + peptide_output_name_stub + ".csv"
with open(filename, 'w') as outfile:
writer = csv.writer(outfile)
writer.writerow(["Fragment", "Start Position", "End Position", "Length", "Phosphorylation Abundance",
"Fragment Abundance", "Modification Proportion"])
for i in fragment_list:
phos_abundance = i[3]
frag_abundance = i[4]
if (phos_abundance != float('NaN')) and (frag_abundance != 0) and (frag_abundance != float('NaN')):
mod_prop = phos_abundance / frag_abundance
writer.writerow([i[0], i[1], i[2], i[3], i[4], mod_prop])
else:
writer.writerow([i[0], i[1], i[2], i[3], i[4], float("nan")])
return
if __name__ == '__main__':
......
Confidence,Annotated Sequence ,Modifications ,Modifications in Master Proteins,# Protein Groups,# PSMs,Master Protein Accessions,Positions in Master Proteins ,# Missed Cleavages,Abundance :F1,Abundance:F2
High ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q] ,2xPhospho [S6; S] ,P10636-8 2xPhospho [S383; S] ,1 ,2 ,P10636-8 ,P10636-8 [378-409] ,0 ,3770206.25 ,84602312
High ,[K].TDHGAEIVYKSPVVSGDTSPR.[H] ,2xPhospho [S11; S15] ,P10636-8 2xPhospho [S367; S371] ,1 ,2 ,P10636-8 ,P10636-8 [357-377] ,1 ,490543.4063 ,85879545
High ,[K].AKTDHGAEIVYKSPVVSGDTSPR.[H] ,2xPhospho [S13; S17] ,P10636-8 2xPhospho [S367; S371] ,1 ,1 ,P10636-8 ,P10636-8 [355-377] ,2 , ,92741293
High ,[R].SGYSSPGSPGTPGSR.[S] ,2xPhospho [S] ,P10636-8 2xPhospho [S] ,1 ,5 ,P10636-8 ,P10636-8 [166-180] ,0 ,1425691.75 ,98087441
High ,[K].VAVVRTPPKSPSSAK.[S] ,1xPhospho [T6] ,P10636-8 1xPhospho [T202] ,1 ,4 ,P10636-8 ,P10636-8 [197-211] ,2 ,35290570.5 ,36243518
High ,[K].KVAVVRTPPKSPSSAK.[S] ,1xPhospho [T7] ,P10636-8 1xPhospho [T202] ,1 ,1 ,P10636-8 ,P10636-8 [196-211] ,3 ,97352696 ,43770115
High ,[K].KVAVVRTPPK.[S] ,1xPhospho [T7] ,P10636-8 1xPhospho [T202] ,1 ,1 ,P10636-8 ,P10636-8 [196-205] ,2 ,1866753.375 ,84341486
High ,[R].TPSLPTPPTR.[E] ,1xPhospho [T6] ,P10636-8 1xPhospho [T188] ,1 ,1 ,P10636-8 ,P10636-8 [183-192] ,0 ,5528542 ,93835341
High ,[R].TPSLPTPPTREPK.[K] ,1xPhospho [T6] ,P10636-8 1xPhospho [T188] ,1 ,2 ,P10636-8 ,P10636-8 [183-195] ,1 ,22805599.5 ,67613272
High ,[K].TPPAPKTPPSSGEPPKSGDR.[S] ,1xPhospho [T7] ,P10636-8 1xPhospho [T152] ,1 ,4 ,P10636-8 ,P10636-8 [146-165] ,2 ,35066952 ,12591257
High ,[K].TPPAPKTPPSSGEPPK.[S] ,1xPhospho [T7] ,P10636-8 1xPhospho [T152] ,1 ,4 ,P10636-8 ,P10636-8 [146-161] ,1 ,147009880 ,94815625
High ,[K].TPPSSGEPPK.[S] ,1xPhospho [T1] ,P10636-8 1xPhospho [T152] ,1 ,1 ,P10636-8 ,P10636-8 [152-161] ,0 ,1141578.25 ,72979440
High ,[K].SPVVSGDTSPR.[H] ,1xPhospho [T/S] ,P10636-8 1xPhospho [T/S] ,1 ,5 ,P10636-8 ,P10636-8 [367-377] ,0 ,291275882 ,76313867
High ,[K].TDHGAEIVYKSPVVSGDTSPR.[H] ,1xPhospho [S11] ,P10636-8 1xPhospho [S367] ,1 ,1 ,P10636-8 ,P10636-8 [357-377] ,1 ,3844751.25 ,90047135
High ,[K].IGSLDNITHVPGGGNK.[K] ,1xPhospho [S3] ,P10636-8 1xPhospho [S327] ,1 ,1 ,P10636-8 ,P10636-8 [325-340] ,0 ,3098245 ,5332778
High ,[R].SRTPSLPTPPTR.[E] ,1xPhospho [S5] ,P10636-8 1xPhospho [S185] ,1 ,1 ,P10636-8 ,P10636-8 [181-192] ,1 ,6525094.5 ,17852668
High ,[R].SGYSSPGSPGTPGSR.[S] ,1xPhospho [S/T] ,P10636-8 1xPhospho [S] ,1 ,7 ,P10636-8 ,P10636-8 [166-180] ,0 ,149733789.5 ,22124243
High ,[K].STPTAEAEEAGIGDTPSLEDEAAGHVTQAR.[M] ,1xPhospho [T/S] ,P10636-8 1xPhospho [S/T] ,1 ,3 ,P10636-8 ,P10636-8 [68-97] ,0 ,7920354.125 ,9185267
High ,[R].TPPKSPSSAK.[S] ,1xPhospho [S/T] ,P10636-8 1xPhospho [S/T] ,1 ,3 ,P10636-8 ,P10636-8 [202-211] ,1 ,2525454.594 ,58090514
High ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q] ,1xPhospho [T/S]; 1xOxidation [M13],P10636-8 1xPhospho [S/T] ,1 ,4 ,P10636-8 ,P10636-8 [378-409] ,0 ,37349012 ,66648919
High ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q] ,1xPhospho [S/T] ,P10636-8 1xPhospho [S/T] ,1 ,5 ,P10636-8 ,P10636-8 [378-409] ,0 ,42549668 ,41174096
High ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],1xPhospho [T/S]; 1xOxidation [M8] ,P10636-8 1xPhospho [S/T] ,1 ,6 ,P10636-8 ,P10636-8 [24-67] ,2 ,2833359 ,90435088
High ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],1xPhospho [S/T] ,P10636-8 1xPhospho [S/T] ,1 ,5 ,P10636-8 ,P10636-8 [24-67] ,2 ,381969 ,33380708
High ,[R].LQTAPVPMPDLKNVK.[S] ,1xOxidation [M8] , ,1 ,2 ,P10636-8 ,P10636-8 [214-228] ,1 ,11043190 ,59925563
High ,[R].LQTAPVPMPDLKNVK.[S] , , ,1 ,2 ,P10636-8 ,P10636-8 [214-228] ,1 ,3375361.5 ,66934687
High ,[R].LQTAPVPMPDLK.[N] ,1xOxidation [M8] , ,1 ,26 ,P10636-8 ,P10636-8 [214-225] ,0 ,2938043353 ,55115903
High ,[R].LQTAPVPMPDLK.[N] , , ,1 ,10 ,P10636-8 ,P10636-8 [214-225] ,0 ,2345507657 ,18349423
High ,[K].LDLSNVQSK.[C] , , ,1 ,9 ,P10636-8 ,P10636-8 [253-261] ,0 ,1056765568 ,24227906
High ,[K].KLDLSNVQSK.[C] , , ,1 ,5 ,P10636-8 ,P10636-8 [252-261] ,1 ,1525892608 ,69084694
High ,[K].IGSLDNITHVPGGGNKK.[I] , , ,1 ,5 ,P10636-8 ,P10636-8 [325-341] ,1 ,963006714.5 ,41478772
High ,[K].IGSLDNITHVPGGGNK.[K] , , ,1 ,16 ,P10636-8 ,P10636-8 [325-340] ,0 ,4624086515 ,19351259
High ,[K].IGSTENLKHQPGGGK.[V] , , ,1 ,1 ,P10636-8 ,P10636-8 [231-245] ,1 ,148045.0938 ,9514705
High ,[R].SRTPSLPTPPTREPK.[K] , , ,1 ,3 ,P10636-8 ,P10636-8 [181-195] ,2 ,698763648 ,36101344
High ,[K].STPTAEAEEAGIGDTPSLEDEAAGHVTQAR.[M] , , ,1 ,12 ,P10636-8 ,P10636-8 [68-97] ,0 ,725873185.9 ,5036078
High ,[K].SPVVSGDTSPR.[H] , , ,1 ,5 ,P10636-8 ,P10636-8 [367-377] ,0 ,1303747568 ,71492316
High ,[R].TPPKSPSSAK.[S] , , ,1 ,5 ,P10636-8 ,P10636-8 [202-211] ,1 ,66745472 ,41408951
High ,[K].TPPSSGEPPK.[S] , , ,1 ,5 ,P10636-8 ,P10636-8 [152-161] ,0 ,257468414.4 ,61879916
High ,[K].TPPSSGEPPKSGDR.[S] , , ,1 ,6 ,P10636-8 ,P10636-8 [152-165] ,1 ,103032414.3 ,38342837
High ,[R].TPSLPTPPTREPK.[K] , , ,1 ,8 ,P10636-8 ,P10636-8 [183-195] ,1 ,1604505144 ,80068010
High ,[K].TDHGAEIVYK.[S] , , ,1 ,5 ,P10636-8 ,P10636-8 [357-366] ,0 ,529712534 ,96010574
High ,[K].SKDGTGSDDKK.[A] , , ,1 ,1 ,P10636-8 ,P10636-8 [102-112] ,2 ,1939050.375 ,32759389
High ,[R].QEFEVMEDHAGTYGLGDRK.[D] ,1xOxidation [M6] , ,1 ,16 ,P10636-8 ,P10636-8 [6-24] ,1 ,234584178.3 ,33165088
High ,[R].QEFEVMEDHAGTYGLGDRK.[D] , , ,1 ,8 ,P10636-8 ,P10636-8 [6-24] ,1 ,352983975.5 ,95560066
High ,[R].QEFEVMEDHAGTYGLGDR.[K] ,1xOxidation [M6] , ,1 ,24 ,P10636-8 ,P10636-8 [6-23] ,0 ,541478609 ,5099972
High ,[R].QEFEVMEDHAGTYGLGDR.[K] , , ,1 ,8 ,P10636-8 ,P10636-8 [6-23] ,0 ,663580316 ,57096692
High ,[R].SGYSSPGSPGTPGSR.[S] , , ,1 ,7 ,P10636-8 ,P10636-8 [166-180] ,0 ,1886451761 ,72762532
High ,[K].DQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S] ,1xOxidation [M7] , ,1 ,8 ,P10636-8 ,P10636-8 [25-67] ,1 ,242811709.3 ,34474881
High ,[K].DQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S] , , ,1 ,4 ,P10636-8 ,P10636-8 [25-67] ,1 ,263117809.9 ,68130546
High ,[K].DQGGYTMHQDQEGDTDAGLK.[E] ,1xOxidation [M7] , ,1 ,15 ,P10636-8 ,P10636-8 [25-44] ,0 ,148222051.8 ,32337478
High ,[K].DQGGYTMHQDQEGDTDAGLK.[E] , , ,1 ,5 ,P10636-8 ,P10636-8 [25-44] ,0 ,169405957.9 ,92807240
High ,[K].DNIKHVSGGGSVQIVYKPVDLSK.[V] , , ,1 ,1 ,P10636-8 ,P10636-8 [266-288] ,1 , ,38907207
High ,[K].ESPLQTPTEDGSEEPGSETSDAK.[S] , , ,1 ,9 ,P10636-8 ,P10636-8 [45-67] ,0 ,740036840 ,99535953
High ,[K].AKTDHGAEIVYK.[S] , , ,1 ,3 ,P10636-8 ,P10636-8 [355-366] ,1 ,652942514.6 ,84744142
High ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q] ,1xOxidation [M13] , ,1 ,17 ,P10636-8 ,P10636-8 [378-409] ,0 ,465597371.5 ,29112938
High ,[K].HVSGGGSVQIVYKPVDLSK.[V] , , ,1 ,56 ,P10636-8 ,P10636-8 [270-288] ,0 ,617176649.5 ,33872557
High ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q] , , ,1 ,13 ,P10636-8 ,P10636-8 [378-409] ,0 ,549771859.3 ,44853427
High ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],1xOxidation [M8] , ,1 ,2 ,P10636-8 ,P10636-8 [24-67] ,2 ,132546086.5 ,98766340
High ,[R].KDQGGYTMHQDQEGDTDAGLK.[E] ,1xOxidation [M8] , ,1 ,12 ,P10636-8 ,P10636-8 [24-44] ,1 ,68147217.5 ,6462453
High ,[R].KDQGGYTMHQDQEGDTDAGLK.[E] , , ,1 ,2 ,P10636-8 ,P10636-8 [24-44] ,1 ,47451437.75 ,34182396
High ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S], , ,1 ,3 ,P10636-8 ,P10636-8 [24-67] ,2 ,25293893.63 ,63120561
High ,[K].SRLQTAPVPMPDLK.[N] , , ,1 ,1 ,P10636-8 ,P10636-8 [212-225] ,1 ,5722389 ,33652639
High ,[R].IPAKTPPAPK.[T] , , ,1 ,1 ,P10636-8 ,P10636-8 [142-151] ,1 ,2015586.75 ,38656450
High ,[K].CGSLGNIHHKPGGGQVEVK.[S] ,1xCarbamidomethyl [C1] , ,1 ,3 ,P10636-8 ,P10636-8 [293-311] ,0 ,218379137.5 ,63191884
High ,[K].IGSTENLK.[H] , , ,1 ,5 ,P10636-8 ,P10636-8 [231-238] ,0 ,887492361 ,95150178
High ,[R].TPSLPTPPTR.[E] , , ,1 ,4 ,P10636-8 ,P10636-8 [183-192] ,0 ,1085564561 ,84114105
High ,[R].SRTPSLPTPPTR.[E] , , ,1 ,1 ,P10636-8 ,P10636-8 [181-192] ,1 ,308283264 ,78590822
High ,[K].SKIGSTENLKHQPGGGK.[V] , , ,1 ,1 ,P10636-8 ,P10636-8 [229-245] ,2 ,551345.375 ,38888975
High ,[K].GADGKTKIATPR.[G] , , ,1 ,1 ,P10636-8 ,P10636-8 [115-126] ,2 ,593317.6875 ,45036840
High ,[K].SEKLDFKDR.[V] , , ,1 ,2 ,P10636-8 ,P10636-8 [312-320] ,2 ,32312068 ,53312265
High ,[KR].CGSKDNIK.[H] ,1xCarbamidomethyl [C1] , ,2 ,1 ,P10636-8; P27546 ,P10636-8 [262-269]; P27546 [981-988],1 ,4449589 ,82881130
......@@ -13,18 +13,18 @@ title="Abundance Parser Configuration"
#input_files=[ "/input/file1",
# path to file2
# ]
input_files=["tests/sampleInput.csv"]
input_files=["tests/sample2.csv"]
#fasta file(s) containing the protein sequence(s) to align against
prot_seq_fasta=["data/2N4R_wt_tau.fasta","data/1N4RP301STau.fasta"]
[output]
#Relative or absolute path to desired output directory
output_directory="output/"
output_directory="output/testOutput/"
#this stub is prepended by the file name of the input file
residue_output_name_stub="residueModificationAnalysis"
peptide_output_name_stub='peptideModificationAnalusis'
peptide_output_name_stub='peptideModificationAnalysis'
#Configuration of parser settings
[parser_config]
......@@ -33,8 +33,8 @@ title="Abundance Parser Configuration"
using_fileID_column=false
#Title of column containing abundance. If using_fileID_column is set to
#true, only the first entry will be used
abundance_col_titles=["Abundance :F1"]
calculate_peptide_modifications=false
abundance_col_titles=["Abundance :F1", "Abundance:F2"]
calculate_peptide_modifications=true
[parser_config.regex]
regex_file="data/parser_regex.toml"
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment