Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
BICF
KCE
kce_etl
Commits
f4c3ba50
Commit
f4c3ba50
authored
Oct 19, 2020
by
Venkat Malladi
Browse files
Merge branch '30-genomics_ihc' into 'master'
Resolve "Update cancer gene" Closes
#30
See merge request
!12
parents
2d66e0c7
55155ee6
Pipeline
#8204
failed with stage
Changes
4
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
scripts/transform_germline_mutations.py
View file @
f4c3ba50
...
...
@@ -64,6 +64,19 @@ def convert_number(merged, significance):
return
merged
def
reformat_gene
(
merged
):
'''Converts gene names to apporpriate string for database'''
merged
[
'Gene'
]
=
merged
[
'Gene'
].
astype
(
str
)
+
'(Sequencing)'
# If IHC then switch
pat
=
r
"^IHC(?P<one>\w+)\((Sequencing\))"
repl
=
lambda
m
:
m
.
group
(
'one'
)
+
'(IHC)'
merged
.
Gene
=
merged
.
Gene
.
str
.
replace
(
pat
,
repl
,
regex
=
True
)
return
merged
def
calculate_shift
(
cancer_gene
,
date_shift
):
'''Shift Date for start date'''
...
...
@@ -117,8 +130,11 @@ def main():
# Convert missing Number
fix_number
=
convert_number
(
cancer_gene_reformat
,
significance_map
)
# Convert gene names
fix_gene
=
reformat_gene
(
fix_number
)
# Calculate Date Shift
shifted_df
=
calculate_shift
(
fix_
number
,
date_shift
)
shifted_df
=
calculate_shift
(
fix_
gene
,
date_shift
)
# Write out radiation table
shifted_df
.
to_csv
(
cancer_gene_table
,
index
=
False
)
...
...
scripts/transform_patients.py
View file @
f4c3ba50
...
...
@@ -109,7 +109,10 @@ def calculate_shift(merged):
merged
[
'BIRTH_DATE'
]
=
pd
.
to_datetime
(
merged
[
'BIRTH_DATE'
])
merged
[
'DEATH_DATE'
]
=
pd
.
to_datetime
(
merged
[
'DEATH_DATE'
])
merged
[
'Date of Last Contact-Date'
]
=
pd
.
to_datetime
(
merged
[
'Date of Last Contact-Date'
])
merged
[
'Date of Diagnosis'
]
=
pd
.
to_datetime
(
merged
[
'Date of Diagnosis'
])
if
'Date of Diagnosis'
in
merged
.
columns
:
merged
[
'Date of Diagnosis'
]
=
pd
.
to_datetime
(
merged
[
'Date of Diagnosis'
])
else
:
merged
[
'Date of Diagnosis'
]
=
pd
.
NaT
# Calculate date shift relative to 1800/01/01
...
...
tests/test_transform_germline.py
View file @
f4c3ba50
...
...
@@ -112,3 +112,11 @@ def test_check_convert_number_5(germline_5, significance_map):
converted_germline
=
transform_germline_mutations
.
convert_number
(
transform_germline
,
significance_map
)
row_selection
=
converted_germline
[
converted_germline
[
'Gene'
]
==
'SDHC'
].
index
.
item
()
assert
converted_germline
.
loc
[
row_selection
,
'Number'
]
==
'Pending'
@
pytest
.
mark
.
unit
def
test_check_reformat_gene
(
germline_5
):
transform_germline
=
transform_germline_mutations
.
reformat_record
(
germline_5
)
converted_germline
=
transform_germline_mutations
.
reformat_gene
(
transform_germline
)
row_selection
=
converted_germline
[
converted_germline
[
'Gene'
].
str
.
match
(
r
'SDHC'
)
==
True
].
index
.
item
()
assert
converted_germline
.
loc
[
row_selection
,
'Gene'
]
==
'SDHC(Sequencing)'
tests/transform/germline_gene_table.csv
View file @
f4c3ba50
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment