Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
A
abundanceparser
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Requirements
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
vish_joachimiak_lab
abundanceparser
Commits
64821ef1
Commit
64821ef1
authored
5 years ago
by
Vishruth Mullapudi
Browse files
Options
Downloads
Patches
Plain Diff
outlined non-master localization parsing
parent
68c663db
1 merge request
!1
Fix unlocalized peptide mods not adding
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
main.py
+18
-6
18 additions, 6 deletions
main.py
with
18 additions
and
6 deletions
main.py
+
18
−
6
View file @
64821ef1
import
re
from
collections
import
namedtuple
from
typing
import
List
from
typing
import
List
,
Tuple
import
pandas
as
pd
import
toml
...
...
@@ -23,12 +23,18 @@ def main():
input_data
:
List
[
FileTuple
]
=
ingestfiledata
(
files
=
input_files
)
protein_seqrecords
:
List
[
SeqRecord
]
=
getproteinsequences
(
protein_fasta_files
)
data
=
[
genrawsequences
(
ftuple
)
for
ftuple
in
input_data
]
localization_col_titles
=
[]
localized_data
=
[]
if
use_mod_in_master_prot
:
localized_data
=
(
parsemasterlocalizations
(
ftuple
)
for
ftuple
in
data
)
localized_data
+=
[
parsemasterlocalizations
(
ftuple
)
for
ftuple
in
data
]
localization_col_titles
+=
[
'
master_localized_mods
'
]
print
(
"
Localized:
"
)
print
(
list
(
localized_data
))
else
:
localized_data
=
(
parseprotlocalizations
(
ftuple
,
protein_seqrecords
)
for
ftuple
in
data
)
print
(
"
Localized:
"
)
print
(
list
(
localized_data
))
for
ftuple
in
data
:
file_headers_tuple
=
parseprotlocalizations
(
ftuple
,
protein_seqrecords
)
localized_data
+=
file_headers_tuple
[
0
]
# add the filetuple to the list of localized filetuples
localization_col_titles
+=
file_headers_tuple
[
1
]
# add the list of column titles of the localizations
def
ingestfiledata
(
files
:
List
[
str
])
->
List
[
FileTuple
]:
...
...
@@ -69,8 +75,10 @@ def genrawsequences(ftuple: FileTuple) -> FileTuple:
def
parsemasterlocalizations
(
ftuple
:
FileTuple
)
->
FileTuple
:
# todo: other PTMs
# todo: file specified regex string
# matches serine, threonine, tyrosine and 0 or more digits
# this provides support for unlocalized PTM where only the amino acid is present and not the localization
# regex = eval(r"r'[STY]([\d]{0,})'") can use this syntax to read in regex string regex
regex
=
r
'
[STY]([\d]{0,})
'
file_data
=
ftuple
.
FileData
localizations
=
[]
...
...
@@ -94,8 +102,12 @@ def parsemasterlocalizations(ftuple: FileTuple) -> FileTuple:
return
FileTuple
(
ftuple
.
FileName
,
file_data
)
def
parseprotlocalizations
(
ftuple
:
FileTuple
,
protein_seqrecords
:
list
)
->
FileTuple
:
def
parseprotlocalizations
(
ftuple
:
FileTuple
,
protein_seqrecords
:
list
)
->
Tuple
[
FileTuple
,
List
[
str
]]
:
# TODO
# parses out the localizations of the PTM by aligning each modified fragment to the protein sequences given in\
# protein_seqrecords, parsing out the modification index in each protein fragment, and using the fragment's index in
# the full protein as an offset to calculate the localization's index in the full protein.
# returns a filetuple where the file_data
pass
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment