added fragment modification analysis and output

added test for multiple abundance columns removed printing for debugging

added fragment modification analysis and output
added test for multiple abundance columns removed printing for debugging
6f140af3 · Vishruth Mullapudi · 6ba13dac · 6f140af3 · 6f140af3 · 6f140af3
Commit 6f140af3 authored 5 years ago by Vishruth Mullapudi
--- a/config.toml
+++ b/config.toml
@@ -13,10 +13,9 @@ title="Abundance Parser Configuration"
    #input_files=[ "/input/file1",
    #              path to file2
    #             ]
-    input_files="/input/test.csv"
-
+    input_files=[]
    #fasta file(s) containing the protein sequence(s) to align against
-    prot_seq_fasta=["/data/1N4RP301STau.fasta","/data/2N4R_wt_tau.fasta"]
+    prot_seq_fasta=[]


 [output]
@@ -24,33 +23,31 @@ title="Abundance Parser Configuration"
    output_directory="output/"

    #this stub is prepended by the file name of the input file
-    output_name_stub="residueModificationAnalysis"
-
+    residue_output_name_stub="residueModificationAnalysis"
+    peptide_output_name_stub='peptideModificationAnalysis'

-#Configuration of parser settings[regex]
-        regex_file="data/parser_regex.toml"
-        regex_to_use="phosphoregex"
+#Configuration of parser settings
 [parser_config]
+    sequence_column_title="Annotated Sequence"
    #If false, files will be specified via abundance column
    using_fileID_column=false
    #Title of column containing abundance. If using_fileID_column is set to
-    #true, only the first Abundance column specified below will be used
-    abundance_col_titles=["Abundance","Abundance: F1"]
+    #true, only the first entry will be used
+    abundance_col_titles=[]
+    calculate_peptide_modifications=true
+
+    [parser_config.regex]
+        regex_file="data/parser_regex.toml"
+        mod_parsing_regex="phosphoregex"
+        pos_master_regex="pos_master_regex"
+
    [parser_config.master]
        #Configs for use of pre-localized modification in master protein in
        #parsing
        use=false
        #the name in the data of the master protein
        master_protein_name= 'P10636-8'
-        #The id of the master protein as it appears in the fasta data
        master_protein_fasta_ID='sp|P10636-8|TAU_HUMAN'
        #Column header containing the title of the column containing the
        #localized modifications
-        modification_header="Modifications in Master Proteins"
-    [regex]
-        regex_file="data/parser_regex.toml"
-        regex_to_use="phosphoregex"
-
-
-
-
+        modification_header="Modifications in Master Proteins"
\ No newline at end of file
--- a/main.py
+++ b/main.py
@@ -43,10 +43,10 @@ def main():
    # todo implement fileID splitting (pre-processing step?)
    # if the file_id column is used we expect to see only one column of abundances
    using_file_id_column = configuration['parser_config']['using_fileID_column']
-    # todo implement
+
    should_calculate_peptide_modifications = True  # configuration['parser_config']['calculate_peptide_modifications']
    if should_calculate_peptide_modifications:
-        peptide_outpput_name_stub = configuration['output']['peptide_output_name_stub']
+        peptide_output_name_stub = configuration['output']['peptide_output_name_stub']

    # End of configuration reading--------------------------------------------------------------------------------------

@@ -74,7 +74,7 @@ def main():
        # provided in the configuration
        if use_mod_in_master_prot:  # localize using the master protein positions and modifications
            file_headers_tuple: Tuple[FileTuple, Dict[str, str], Dict[str, str]] = \
-                parsemasterlocalizations(ftuple, master_protein_fasta_id, mod_regex, master_regex)
+                parse_masterlocalizations(ftuple, master_protein_fasta_id, mod_regex, master_regex)
        else:  # localize by aligning the fragment against a protein and looking at the modification index within the
            # fragment
            file_headers_tuple: Tuple[FileTuple, Dict[str, str], Dict[str, str]] = \
@@ -90,24 +90,32 @@ def main():

    # Calculate the residue and modification abundances of each input against each desired protein----------------------
    residue_analysis_all_prot: List[Dict[Dict]] = []
+    peptide_analysis_all_prot: List[Dict[Dict]] = []
    for ftuple in localized_data:
        # for residue modification analysis, calculate the amount each residue is modified
        residue_analysis_all_prot.append(calc_residue_mod_abundances(ftuple, modification_localization_col_titles,
                                                                     frag_localization_col_titles, abundance_col_titles,
                                                                     protein_seq_records))
-
+        # for peptide modification analysis, calculate the amount each peptide fragment is modified
        if should_calculate_peptide_modifications:
-            calc_peptide_mod_abundances(ftuple, modification_localization_col_titles, frag_localization_col_titles,
-                                        abundance_col_titles)
+            peptide_analysis_all_prot.append(calc_peptide_mod_abundances(ftuple, modification_localization_col_titles,
+                                                                         frag_localization_col_titles,
+                                                                         abundance_col_titles, protein_seq_records))
    # End of abundance calculations-------------------------------------------------------------------------------------

    # output the abundance data-----------------------------------------------------------------------------------------
    # TODO peptide modification output
    for prot_residue_analysis in residue_analysis_all_prot:
-        for protID, sample_analysis in prot_residue_analysis.items():
-            for fileID, abundance_array in sample_analysis.items():
-                output_residue_analysis_data(protID, fileID, abundance_array, output_directory,
+        for prot_id, sample_analysis in prot_residue_analysis.items():
+            for file_id, abundance_array in sample_analysis.items():
+                output_residue_analysis_data(prot_id, file_id, abundance_array, output_directory,
                                             residue_output_name_stub)
+
+    for prot_peptide_analysis in peptide_analysis_all_prot:
+        for file_id, prot_analysis in prot_peptide_analysis.items():
+            for prot_id, fragment_list in prot_analysis.items():
+                output_peptide_analysis_data(prot_id, file_id, fragment_list, output_directory,
+                                             peptide_output_name_stub)
    # End of data output------------------------------------------------------------------------------------------------
    # End of program
    return
@@ -116,8 +124,8 @@ def main():
 def gen_raw_sequences(ftuple: FileTuple) -> FileTuple:
    """
    Adds a column to the DataFrame containing a stripped down peptide without the cleavage annotations
-    :param ftuple: The FileTuple to containing the filedata DataFrame to generate the raw sequence of each fragment
-    :return: a FileTuple containing the fileID and a filedata DataFrame containing the raw sequence in the
+    :param ftuple: The FileTuple to containing the FileData DataFrame to generate the raw sequence of each fragment
+    :return: a FileTuple containing the fileID and a FileData DataFrame containing the raw sequence in the
    'stripped_sequence' column
    """
    file_data: pd.DataFrame = ftuple.FileData  # Extract the DataFrame
@@ -128,7 +136,7 @@ def gen_raw_sequences(ftuple: FileTuple) -> FileTuple:
    return FileTuple(ftuple.FileName, file_data)


-def parsemasterlocalizations(ftuple: FileTuple, master_prot_fasta_id, mod_regex, pos_master_regex) -> Tuple[
+def parse_masterlocalizations(ftuple: FileTuple, master_prot_fasta_id, mod_regex, pos_master_regex) -> Tuple[
    FileTuple, Dict[str, str], Dict[str, str]]:
    """
    Parses the modification and fragment localizations from a DataFrame using the positions in master and modifications
@@ -293,43 +301,53 @@ def calc_residue_mod_abundances(ftuple, localization_col_titles: Dict, frag_loca
                    for mod in mod_localization:
                        # add the abundance to each modified residue contained in the fragment
                        res_abundances[mod][0] += frag_abundance
-
+            assert (res_abundances[i][0] <= res_abundances[i][1] for i in range(0, len(res_abundances)))
            all_prot_abundances[res_abundance_col_title][abundance_col_title] = res_abundances

    return all_prot_abundances


-# todo
 def calc_peptide_mod_abundances(ftuple, mod_localization_col_titles, frag_localization_col_titles,
-                                abundance_col_titles):
+                                abundance_col_titles, protein_seqrecords):
    df = ftuple.FileData
    fdata = df.fillna(0)
-    # Group all fragments of the same sequence into their own groups.
+
+    # group each peptide fragment by the sequence
    grouped = fdata.groupby('stripped_sequence')
-    frag_mods = {}
-    # Calc the abundances for each protein
-    for mod_col_title_tuple, frag_col_title_tuple, abundance_col_title in \
-            zip(mod_localization_col_titles.items(), frag_localization_col_titles.items(), abundance_col_titles):
-        prot_id = mod_col_title_tuple[0]
-        mod_col_title = mod_col_title_tuple[1]
-        abundance_col_title = sanitize_str_for_dataframe_index(abundance_col_title)
-        frag_abundances = np.zeros((len(grouped), 2),
-                                   dtype=float)
-        frag_mods_per_prot = []
-        i = 0
-        for frag_sequence, fragment_group in grouped:
-            frag_abundances[i][1] = fragment_group[abundance_col_title].sum()
-            for fragment in fragment_group.iterrows():
-                fragment = fragment[1]
-                if fragment[mod_col_title]:
-                    frag_abundances[i][0] += fragment[abundance_col_title]
-            prot_frag_mod_tuple = (prot_id, abundance_col_title, frag_sequence, fragment_group[frag_col_title_tuple[1]] \
-                                   .iloc[0], frag_abundances[0], frag_abundances[1])
-            print(prot_frag_mod_tuple)
-
-            i += 1
-
-    return frag_mods
+    sample_frag_abundances = {}
+    for abund_col_title in abundance_col_titles:
+        abund_col_title = sanitize_str_for_dataframe_index(abund_col_title)
+        sample_prot_frag_abundances = {}
+        for prot_id in protein_seqrecords:
+            prot_frag_abundances = []
+            mod_col_title = mod_localization_col_titles[prot_id]
+            frag_loc_col_title = frag_localization_col_titles[prot_id]
+            for frags_same_seq in grouped:
+                # a dd all the abundances for fragments with the same sequence
+                frag_abundance = frags_same_seq[1][abund_col_title].sum()
+                # ID all the fragments with modifications
+                bools = []
+                for mods in frags_same_seq[1][mod_col_title]:
+                    if mods:
+                        if -1 not in mods:
+                            bools.append(True)
+                        else:
+                            bools.append(False)
+                    else:
+                        bools.append(False)
+                # add the abundances for all fragments with modifications
+                mod_abundance = frags_same_seq[1].loc[bools, abund_col_title].sum()
+                assert (mod_abundance <= frag_abundance)
+                frag_localization_tuple = frags_same_seq[1][frag_loc_col_title].iloc[0][0]
+                if -1 in frag_localization_tuple:
+                    prot_frag_abundances.append((frags_same_seq[0], -1, -1, mod_abundance,
+                                                 frag_abundance))
+                else:
+                    prot_frag_abundances.append((frags_same_seq[0], frag_localization_tuple[0],
+                                                 frag_localization_tuple[1], mod_abundance, frag_abundance))
+            sample_prot_frag_abundances.update({prot_id: prot_frag_abundances})
+        sample_frag_abundances.update({abund_col_title: sample_prot_frag_abundances})
+    return sample_frag_abundances


 def ingest_file_data(files: List[str]) -> List[FileTuple]:
@@ -385,18 +403,29 @@ def output_residue_analysis_data(prot_id, fileid, abundance_array, output_direct
        writer = csv.writer(outfile)
        writer.writerow(["Residue #:", "Modification Abundance", "Residue Abundance", "Modification Proportion"])
        for i, arr in enumerate(abundance_array):
-            # will throw an Runtime warning-invalid value warning when dividing by 0 and print nan
-            # this is fine, although you could check if arr[1] is 0 and do something if this is not desired
-            if arr[0] is not 0 or float('NaN') and arr[1] is not 0 or float('NaN'):
-                writer.writerow([i, arr[0], arr[1], (arr[0] / arr[1])])
+            if (arr[0] != float('NaN')) and (arr[1] != 0) and (arr[1] != float('NaN')):
+                mod_prop = arr[0] / arr[1]
+                writer.writerow([i, arr[0], arr[1], mod_prop])
            else:
                writer.writerow([i, arr[0], arr[1], float("nan")])
    return


-# todo implement
-def output_peptide_analysis_data():
-    pass
+def output_peptide_analysis_data(prot_id, file_id, fragment_list, output_directory, peptide_output_name_stub):
+    filename = output_directory + file_id + prot_id + peptide_output_name_stub + ".csv"
+    with open(filename, 'w') as outfile:
+        writer = csv.writer(outfile)
+        writer.writerow(["Fragment", "Start Position", "End Position", "Length", "Phosphorylation Abundance",
+                         "Fragment Abundance", "Modification Proportion"])
+        for i in fragment_list:
+            phos_abundance = i[3]
+            frag_abundance = i[4]
+            if (phos_abundance != float('NaN')) and (frag_abundance != 0) and (frag_abundance != float('NaN')):
+                mod_prop = phos_abundance / frag_abundance
+                writer.writerow([i[0], i[1], i[2], i[3], i[4], mod_prop])
+            else:
+                writer.writerow([i[0], i[1], i[2], i[3], i[4], float("nan")])
+    return


 if __name__ == '__main__':

--- a/tests/sample2.csv
+++ b/tests/sample2.csv
+Confidence,Annotated Sequence                                  ,Modifications                     ,Modifications in Master Proteins,# Protein Groups,# PSMs,Master Protein Accessions,Positions in Master Proteins        ,# Missed Cleavages,Abundance :F1,Abundance:F2
+High      ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q]            ,2xPhospho [S6; S]                 ,P10636-8 2xPhospho [S383; S]    ,1               ,2     ,P10636-8                 ,P10636-8 [378-409]                  ,0                 ,3770206.25   ,84602312
+High      ,[K].TDHGAEIVYKSPVVSGDTSPR.[H]                       ,2xPhospho [S11; S15]              ,P10636-8 2xPhospho [S367; S371] ,1               ,2     ,P10636-8                 ,P10636-8 [357-377]                  ,1                 ,490543.4063  ,85879545
+High      ,[K].AKTDHGAEIVYKSPVVSGDTSPR.[H]                     ,2xPhospho [S13; S17]              ,P10636-8 2xPhospho [S367; S371] ,1               ,1     ,P10636-8                 ,P10636-8 [355-377]                  ,2                 ,             ,92741293
+High      ,[R].SGYSSPGSPGTPGSR.[S]                             ,2xPhospho [S]                     ,P10636-8 2xPhospho [S]          ,1               ,5     ,P10636-8                 ,P10636-8 [166-180]                  ,0                 ,1425691.75   ,98087441
+High      ,[K].VAVVRTPPKSPSSAK.[S]                             ,1xPhospho [T6]                    ,P10636-8 1xPhospho [T202]       ,1               ,4     ,P10636-8                 ,P10636-8 [197-211]                  ,2                 ,35290570.5   ,36243518
+High      ,[K].KVAVVRTPPKSPSSAK.[S]                            ,1xPhospho [T7]                    ,P10636-8 1xPhospho [T202]       ,1               ,1     ,P10636-8                 ,P10636-8 [196-211]                  ,3                 ,97352696     ,43770115
+High      ,[K].KVAVVRTPPK.[S]                                  ,1xPhospho [T7]                    ,P10636-8 1xPhospho [T202]       ,1               ,1     ,P10636-8                 ,P10636-8 [196-205]                  ,2                 ,1866753.375  ,84341486
+High      ,[R].TPSLPTPPTR.[E]                                  ,1xPhospho [T6]                    ,P10636-8 1xPhospho [T188]       ,1               ,1     ,P10636-8                 ,P10636-8 [183-192]                  ,0                 ,5528542      ,93835341
+High      ,[R].TPSLPTPPTREPK.[K]                               ,1xPhospho [T6]                    ,P10636-8 1xPhospho [T188]       ,1               ,2     ,P10636-8                 ,P10636-8 [183-195]                  ,1                 ,22805599.5   ,67613272
+High      ,[K].TPPAPKTPPSSGEPPKSGDR.[S]                        ,1xPhospho [T7]                    ,P10636-8 1xPhospho [T152]       ,1               ,4     ,P10636-8                 ,P10636-8 [146-165]                  ,2                 ,35066952     ,12591257
+High      ,[K].TPPAPKTPPSSGEPPK.[S]                            ,1xPhospho [T7]                    ,P10636-8 1xPhospho [T152]       ,1               ,4     ,P10636-8                 ,P10636-8 [146-161]                  ,1                 ,147009880    ,94815625
+High      ,[K].TPPSSGEPPK.[S]                                  ,1xPhospho [T1]                    ,P10636-8 1xPhospho [T152]       ,1               ,1     ,P10636-8                 ,P10636-8 [152-161]                  ,0                 ,1141578.25   ,72979440
+High      ,[K].SPVVSGDTSPR.[H]                                 ,1xPhospho [T/S]                   ,P10636-8 1xPhospho [T/S]        ,1               ,5     ,P10636-8                 ,P10636-8 [367-377]                  ,0                 ,291275882    ,76313867
+High      ,[K].TDHGAEIVYKSPVVSGDTSPR.[H]                       ,1xPhospho [S11]                   ,P10636-8 1xPhospho [S367]       ,1               ,1     ,P10636-8                 ,P10636-8 [357-377]                  ,1                 ,3844751.25   ,90047135
+High      ,[K].IGSLDNITHVPGGGNK.[K]                            ,1xPhospho [S3]                    ,P10636-8 1xPhospho [S327]       ,1               ,1     ,P10636-8                 ,P10636-8 [325-340]                  ,0                 ,3098245      ,5332778
+High      ,[R].SRTPSLPTPPTR.[E]                                ,1xPhospho [S5]                    ,P10636-8 1xPhospho [S185]       ,1               ,1     ,P10636-8                 ,P10636-8 [181-192]                  ,1                 ,6525094.5    ,17852668
+High      ,[R].SGYSSPGSPGTPGSR.[S]                             ,1xPhospho [S/T]                   ,P10636-8 1xPhospho [S]          ,1               ,7     ,P10636-8                 ,P10636-8 [166-180]                  ,0                 ,149733789.5  ,22124243
+High      ,[K].STPTAEAEEAGIGDTPSLEDEAAGHVTQAR.[M]              ,1xPhospho [T/S]                   ,P10636-8 1xPhospho [S/T]        ,1               ,3     ,P10636-8                 ,P10636-8 [68-97]                    ,0                 ,7920354.125  ,9185267
+High      ,[R].TPPKSPSSAK.[S]                                  ,1xPhospho [S/T]                   ,P10636-8 1xPhospho [S/T]        ,1               ,3     ,P10636-8                 ,P10636-8 [202-211]                  ,1                 ,2525454.594  ,58090514
+High      ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q]            ,1xPhospho [T/S]; 1xOxidation [M13],P10636-8 1xPhospho [S/T]        ,1               ,4     ,P10636-8                 ,P10636-8 [378-409]                  ,0                 ,37349012     ,66648919
+High      ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q]            ,1xPhospho [S/T]                   ,P10636-8 1xPhospho [S/T]        ,1               ,5     ,P10636-8                 ,P10636-8 [378-409]                  ,0                 ,42549668     ,41174096
+High      ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],1xPhospho [T/S]; 1xOxidation [M8] ,P10636-8 1xPhospho [S/T]        ,1               ,6     ,P10636-8                 ,P10636-8 [24-67]                    ,2                 ,2833359      ,90435088
+High      ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],1xPhospho [S/T]                   ,P10636-8 1xPhospho [S/T]        ,1               ,5     ,P10636-8                 ,P10636-8 [24-67]                    ,2                 ,381969       ,33380708
+High      ,[R].LQTAPVPMPDLKNVK.[S]                             ,1xOxidation [M8]                  ,                                ,1               ,2     ,P10636-8                 ,P10636-8 [214-228]                  ,1                 ,11043190     ,59925563
+High      ,[R].LQTAPVPMPDLKNVK.[S]                             ,                                  ,                                ,1               ,2     ,P10636-8                 ,P10636-8 [214-228]                  ,1                 ,3375361.5    ,66934687
+High      ,[R].LQTAPVPMPDLK.[N]                                ,1xOxidation [M8]                  ,                                ,1               ,26    ,P10636-8                 ,P10636-8 [214-225]                  ,0                 ,2938043353   ,55115903
+High      ,[R].LQTAPVPMPDLK.[N]                                ,                                  ,                                ,1               ,10    ,P10636-8                 ,P10636-8 [214-225]                  ,0                 ,2345507657   ,18349423
+High      ,[K].LDLSNVQSK.[C]                                   ,                                  ,                                ,1               ,9     ,P10636-8                 ,P10636-8 [253-261]                  ,0                 ,1056765568   ,24227906
+High      ,[K].KLDLSNVQSK.[C]                                  ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [252-261]                  ,1                 ,1525892608   ,69084694
+High      ,[K].IGSLDNITHVPGGGNKK.[I]                           ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [325-341]                  ,1                 ,963006714.5  ,41478772
+High      ,[K].IGSLDNITHVPGGGNK.[K]                            ,                                  ,                                ,1               ,16    ,P10636-8                 ,P10636-8 [325-340]                  ,0                 ,4624086515   ,19351259
+High      ,[K].IGSTENLKHQPGGGK.[V]                             ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [231-245]                  ,1                 ,148045.0938  ,9514705
+High      ,[R].SRTPSLPTPPTREPK.[K]                             ,                                  ,                                ,1               ,3     ,P10636-8                 ,P10636-8 [181-195]                  ,2                 ,698763648    ,36101344
+High      ,[K].STPTAEAEEAGIGDTPSLEDEAAGHVTQAR.[M]              ,                                  ,                                ,1               ,12    ,P10636-8                 ,P10636-8 [68-97]                    ,0                 ,725873185.9  ,5036078
+High      ,[K].SPVVSGDTSPR.[H]                                 ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [367-377]                  ,0                 ,1303747568   ,71492316
+High      ,[R].TPPKSPSSAK.[S]                                  ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [202-211]                  ,1                 ,66745472     ,41408951
+High      ,[K].TPPSSGEPPK.[S]                                  ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [152-161]                  ,0                 ,257468414.4  ,61879916
+High      ,[K].TPPSSGEPPKSGDR.[S]                              ,                                  ,                                ,1               ,6     ,P10636-8                 ,P10636-8 [152-165]                  ,1                 ,103032414.3  ,38342837
+High      ,[R].TPSLPTPPTREPK.[K]                               ,                                  ,                                ,1               ,8     ,P10636-8                 ,P10636-8 [183-195]                  ,1                 ,1604505144   ,80068010
+High      ,[K].TDHGAEIVYK.[S]                                  ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [357-366]                  ,0                 ,529712534    ,96010574
+High      ,[K].SKDGTGSDDKK.[A]                                 ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [102-112]                  ,2                 ,1939050.375  ,32759389
+High      ,[R].QEFEVMEDHAGTYGLGDRK.[D]                         ,1xOxidation [M6]                  ,                                ,1               ,16    ,P10636-8                 ,P10636-8 [6-24]                     ,1                 ,234584178.3  ,33165088
+High      ,[R].QEFEVMEDHAGTYGLGDRK.[D]                         ,                                  ,                                ,1               ,8     ,P10636-8                 ,P10636-8 [6-24]                     ,1                 ,352983975.5  ,95560066
+High      ,[R].QEFEVMEDHAGTYGLGDR.[K]                          ,1xOxidation [M6]                  ,                                ,1               ,24    ,P10636-8                 ,P10636-8 [6-23]                     ,0                 ,541478609    ,5099972
+High      ,[R].QEFEVMEDHAGTYGLGDR.[K]                          ,                                  ,                                ,1               ,8     ,P10636-8                 ,P10636-8 [6-23]                     ,0                 ,663580316    ,57096692
+High      ,[R].SGYSSPGSPGTPGSR.[S]                             ,                                  ,                                ,1               ,7     ,P10636-8                 ,P10636-8 [166-180]                  ,0                 ,1886451761   ,72762532
+High      ,[K].DQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S] ,1xOxidation [M7]                  ,                                ,1               ,8     ,P10636-8                 ,P10636-8 [25-67]                    ,1                 ,242811709.3  ,34474881
+High      ,[K].DQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S] ,                                  ,                                ,1               ,4     ,P10636-8                 ,P10636-8 [25-67]                    ,1                 ,263117809.9  ,68130546
+High      ,[K].DQGGYTMHQDQEGDTDAGLK.[E]                        ,1xOxidation [M7]                  ,                                ,1               ,15    ,P10636-8                 ,P10636-8 [25-44]                    ,0                 ,148222051.8  ,32337478
+High      ,[K].DQGGYTMHQDQEGDTDAGLK.[E]                        ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [25-44]                    ,0                 ,169405957.9  ,92807240
+High      ,[K].DNIKHVSGGGSVQIVYKPVDLSK.[V]                     ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [266-288]                  ,1                 ,             ,38907207
+High      ,[K].ESPLQTPTEDGSEEPGSETSDAK.[S]                     ,                                  ,                                ,1               ,9     ,P10636-8                 ,P10636-8 [45-67]                    ,0                 ,740036840    ,99535953
+High      ,[K].AKTDHGAEIVYK.[S]                                ,                                  ,                                ,1               ,3     ,P10636-8                 ,P10636-8 [355-366]                  ,1                 ,652942514.6  ,84744142
+High      ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q]            ,1xOxidation [M13]                 ,                                ,1               ,17    ,P10636-8                 ,P10636-8 [378-409]                  ,0                 ,465597371.5  ,29112938
+High      ,[K].HVSGGGSVQIVYKPVDLSK.[V]                         ,                                  ,                                ,1               ,56    ,P10636-8                 ,P10636-8 [270-288]                  ,0                 ,617176649.5  ,33872557
+High      ,[R].HLSNVSSTGSIDMVDSPQLATLADEVSASLAK.[Q]            ,                                  ,                                ,1               ,13    ,P10636-8                 ,P10636-8 [378-409]                  ,0                 ,549771859.3  ,44853427
+High      ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],1xOxidation [M8]                  ,                                ,1               ,2     ,P10636-8                 ,P10636-8 [24-67]                    ,2                 ,132546086.5  ,98766340
+High      ,[R].KDQGGYTMHQDQEGDTDAGLK.[E]                       ,1xOxidation [M8]                  ,                                ,1               ,12    ,P10636-8                 ,P10636-8 [24-44]                    ,1                 ,68147217.5   ,6462453
+High      ,[R].KDQGGYTMHQDQEGDTDAGLK.[E]                       ,                                  ,                                ,1               ,2     ,P10636-8                 ,P10636-8 [24-44]                    ,1                 ,47451437.75  ,34182396
+High      ,[R].KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK.[S],                                  ,                                ,1               ,3     ,P10636-8                 ,P10636-8 [24-67]                    ,2                 ,25293893.63  ,63120561
+High      ,[K].SRLQTAPVPMPDLK.[N]                              ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [212-225]                  ,1                 ,5722389      ,33652639
+High      ,[R].IPAKTPPAPK.[T]                                  ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [142-151]                  ,1                 ,2015586.75   ,38656450
+High      ,[K].CGSLGNIHHKPGGGQVEVK.[S]                         ,1xCarbamidomethyl [C1]            ,                                ,1               ,3     ,P10636-8                 ,P10636-8 [293-311]                  ,0                 ,218379137.5  ,63191884
+High      ,[K].IGSTENLK.[H]                                    ,                                  ,                                ,1               ,5     ,P10636-8                 ,P10636-8 [231-238]                  ,0                 ,887492361    ,95150178
+High      ,[R].TPSLPTPPTR.[E]                                  ,                                  ,                                ,1               ,4     ,P10636-8                 ,P10636-8 [183-192]                  ,0                 ,1085564561   ,84114105
+High      ,[R].SRTPSLPTPPTR.[E]                                ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [181-192]                  ,1                 ,308283264    ,78590822
+High      ,[K].SKIGSTENLKHQPGGGK.[V]                           ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [229-245]                  ,2                 ,551345.375   ,38888975
+High      ,[K].GADGKTKIATPR.[G]                                ,                                  ,                                ,1               ,1     ,P10636-8                 ,P10636-8 [115-126]                  ,2                 ,593317.6875  ,45036840
+High      ,[K].SEKLDFKDR.[V]                                   ,                                  ,                                ,1               ,2     ,P10636-8                 ,P10636-8 [312-320]                  ,2                 ,32312068     ,53312265
+High      ,[KR].CGSKDNIK.[H]                                   ,1xCarbamidomethyl [C1]            ,                                ,2               ,1     ,P10636-8; P27546         ,P10636-8 [262-269]; P27546 [981-988],1                 ,4449589      ,82881130
--- a/tests/testConfig.toml
+++ b/tests/testConfig.toml
@@ -13,18 +13,18 @@ title="Abundance Parser Configuration"
    #input_files=[ "/input/file1",
    #              path to file2
    #             ]
-    input_files=["tests/sampleInput.csv"]
+    input_files=["tests/sample2.csv"]
    #fasta file(s) containing the protein sequence(s) to align against
    prot_seq_fasta=["data/2N4R_wt_tau.fasta","data/1N4RP301STau.fasta"]


 [output]
    #Relative or absolute path to desired output directory
-    output_directory="output/"
+    output_directory="output/testOutput/"

    #this stub is prepended by the file name of the input file
    residue_output_name_stub="residueModificationAnalysis"
-    peptide_output_name_stub='peptideModificationAnalusis'
+    peptide_output_name_stub='peptideModificationAnalysis'

 #Configuration of parser settings
 [parser_config]
@@ -33,8 +33,8 @@ title="Abundance Parser Configuration"
    using_fileID_column=false
    #Title of column containing abundance. If using_fileID_column is set to
    #true, only the first entry will be used
-    abundance_col_titles=["Abundance :F1"]
-    calculate_peptide_modifications=false
+    abundance_col_titles=["Abundance :F1", "Abundance:F2"]
+    calculate_peptide_modifications=true

    [parser_config.regex]
        regex_file="data/parser_regex.toml"