Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
BICF
KCE
kce_etl
Commits
7d991f6f
Commit
7d991f6f
authored
Nov 17, 2019
by
Venkat Malladi
Browse files
First pass at medication and supplemental medication tables.
#4
#5
.
parent
d8d3a003
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
scripts/sql_queries/medications.sql
0 → 100644
View file @
7d991f6f
select
E
.
MRN
,
B
.
*
,
A
.
CONCEPT_CD
,
A
.
START_DATE
,
A
.
END_DATE
,
A
.
VALTYPE_CD
,
A
.
TVAL_CHAR
,
A
.
NVAL_NUM
,
A
.
VALUEFLAG_CD
,
A
.
UNITS_CD
,
A
.
MODIFIER_CD
,
A
.
OBSERVATION_BLOB
from
kidney_obs_fact
A
right
join
(
select
D
.
name
,
D
.
generic_name
,
D
.
medication_id
,
D
.
investigatl_med_yn
from
clarity_medication
D
where
(
lower
(
D
.
name
)
like
'%aldesleukin%'
or
lower
(
D
.
generic_name
)
like
'%aldesleukin%'
or
lower
(
D
.
name
)
like
'%atezolizumab%'
or
lower
(
D
.
generic_name
)
like
'%atezolizumab%'
or
lower
(
D
.
name
)
like
'%avelumab%'
or
lower
(
D
.
generic_name
)
like
'%avelumab%'
or
lower
(
D
.
name
)
like
'%axitinib%'
or
lower
(
D
.
generic_name
)
like
'%axitinib%'
or
lower
(
D
.
name
)
like
'%bevacizumab%'
or
lower
(
D
.
generic_name
)
like
'%bevacizumab%'
or
lower
(
D
.
name
)
like
'%cabozantinib%'
or
lower
(
D
.
generic_name
)
like
'%cabozantinib%'
or
lower
(
D
.
name
)
like
'%everolimus%'
or
lower
(
D
.
generic_name
)
like
'%everolimus%'
or
lower
(
D
.
name
)
like
'interferon alfa-2%'
or
lower
(
D
.
generic_name
)
like
'interferon alfa-2%'
or
lower
(
D
.
name
)
like
'%ipilimumab%'
or
lower
(
D
.
generic_name
)
like
'%ipilimumab%'
or
lower
(
D
.
name
)
like
'%lenvatinib%'
or
lower
(
D
.
generic_name
)
like
'%lenvatinib%'
or
lower
(
D
.
name
)
like
'%nivolumab%'
or
lower
(
D
.
generic_name
)
like
'%nivolumab%'
or
lower
(
D
.
name
)
like
'%pazopanib%'
or
lower
(
D
.
generic_name
)
like
'%pazopanib%'
or
lower
(
D
.
name
)
like
'%pembrolizumab%'
or
lower
(
D
.
generic_name
)
like
'%pembrolizumab%'
or
lower
(
D
.
name
)
like
'%sirolimus%'
or
lower
(
D
.
generic_name
)
like
'%sirolimus%'
or
lower
(
D
.
name
)
like
'%sorafenib%'
or
lower
(
D
.
generic_name
)
like
'%sorafenib%'
or
lower
(
D
.
name
)
like
'%sunitinib%'
or
lower
(
D
.
generic_name
)
like
'%sunitinib%'
or
lower
(
D
.
name
)
like
'%cpi-444%'
or
lower
(
D
.
name
)
like
'%nktr%'
or
lower
(
D
.
name
)
like
'%pt2385%'
or
lower
(
D
.
name
)
like
'%rad001%'
or
lower
(
D
.
name
)
like
'%denosumab%'
or
lower
(
D
.
generic_name
)
like
'%denosumab%'
or
lower
(
D
.
name
)
like
'%zoledronic acid%'
or
lower
(
D
.
generic_name
)
like
'%zoledronic acid%'
)
)
B
on
A
.
concept_cd
=
'MED:'
||
B
.
medication_id
inner
join
KIDNEY_MRN_LIST
E
on
A
.
PATIENT_NUM
=
E
.
PATIENT_NUM
inner
join
"MRN_Filter_KCE"
F
on
E
.
MRN
=
F
.
MRN
;
scripts/transform_medications.py
0 → 100644
View file @
7d991f6f
#!/usr/bin/env python3
'''Generate Medications Tables'''
import
argparse
import
datetime
import
os
import
pandas
as
pd
import
numpy
as
np
EPILOG
=
'''
For more details:
%(prog)s --help
'''
def
get_args
():
'''Define arguments.'''
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
,
epilog
=
EPILOG
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
)
parser
.
add_argument
(
'-f'
,
'--file'
,
help
=
"Epic File (csv format)."
,
required
=
True
)
parser
.
add_argument
(
'-s'
,
'--supportive'
,
help
=
"Supportive Meds Filter (tsv format)."
,
required
=
True
)
parser
.
add_argument
(
'-m'
,
'--medsmap'
,
help
=
"Medications mapping (csv format)."
,
required
=
True
)
parser
.
add_argument
(
'-d'
,
'--date'
,
help
=
"Date Shift (tsv format)."
,
required
=
True
)
parser
.
add_argument
(
'-o'
,
'--out'
,
help
=
"The output path (csv format)."
,
required
=
True
)
args
=
parser
.
parse_args
()
return
args
def
filter_supplemental
(
epic
,
sup
,
meds_map
):
'''Filter by supplemental med name'''
# Filter for columns
sup_list
=
list
(
sup
[
'ID'
])
sup_filter
=
epic
[
epic
[
'CONCEPT_CD'
].
isin
(
sup_list
)]
onco_list
=
list
(
meds_map
[
'ID'
])
onco_filter
=
epic
[
epic
[
'CONCEPT_CD'
].
isin
(
onco_list
)]
missing_meds
=
epic
[
~
(
epic
[
'CONCEPT_CD'
].
isin
(
onco_list
)
|
epic
[
'CONCEPT_CD'
].
isin
(
sup_list
))
]
# Merge supp meds mapping
meds_named
=
pd
.
merge
(
onco_filter
,
meds_map
,
left_on
=
'CONCEPT_CD'
,
right_on
=
'ID'
)
supmeds_named
=
pd
.
merge
(
sup_filter
,
sup
,
left_on
=
'CONCEPT_CD'
,
right_on
=
'ID'
)
return
supmeds_named
,
meds_named
,
missing_meds
def
calculate_supp_frequency
(
supp_meds
):
'''Calculate supportive meds frequency'''
# Filter Adminstered drugs
meds_admin
=
supp_meds
[
supp_meds
[
'MODIFIER_CD'
].
isin
([
'RX|ADMIN'
,
'RX|FILLED'
])]
# Calculate Frequency
meds_frequency
=
pd
.
crosstab
(
meds_admin
.
MRN
,
meds_admin
.
Meds
).
\
replace
(
0
,
np
.
nan
).
stack
().
reset_index
().
\
rename
(
columns
=
{
0
:
'Frequency'
})
# Add Earliest date of Adminstration
meds_date
=
meds_admin
.
groupby
([
'MRN'
,
'Meds'
]).
\
agg
({
'START_DATE'
:
np
.
min
}).
reset_index
().
\
rename
(
columns
=
{
0
:
'Start_date'
})
# Merge dataframes and output
meds_merge
=
meds_frequency
.
merge
(
meds_date
)
return
meds_merge
def
parse_blob
(
meds
):
'''Parse observation blob for values in blob'''
meds_tups
=
[]
for
index
,
row
in
meds
.
iterrows
():
mrn
=
row
[
'MRN'
]
name
=
row
[
'Meds'
]
order_type
=
row
[
'MODIFIER_CD'
]
date
=
row
[
'START_DATE'
]
ordering_date
=
''
start_date
=
''
end_date
=
''
quantity
=
''
sig
=
''
refills
=
''
frequency
=
''
pcori_freq
=
''
dispense_amt
=
''
dispense_date
=
''
dispense_sup
=
''
ndc
=
''
if
pd
.
notnull
(
row
[
'OBSERVATION_BLOB'
]):
soup
=
BeautifulSoup
(
row
[
'OBSERVATION_BLOB'
],
'html.parser'
)
tags
=
[
tag
.
name
for
tag
in
soup
.
find_all
()]
if
'odering_date'
in
tags
:
ordering_date
=
soup
.
ordering_date
.
text
if
'start_date'
in
tags
:
start_date
=
soup
.
start_date
.
text
if
'end_date'
in
tags
:
end_date
=
soup
.
end_date
.
text
if
'quantity'
in
tags
:
quantity
=
soup
.
quantity
.
text
if
'sig'
in
tags
:
sig
=
soup
.
sig
.
text
if
'refills'
in
tags
:
refills
=
soup
.
refills
.
text
if
'frequency'
in
tags
:
frequency
=
soup
.
frequency
.
text
if
'pcori_freq'
in
tags
:
pcori_freq
=
soup
.
pcori_freq
.
text
if
'dispense_amt'
in
tags
:
dispense_amt
=
soup
.
dispense_amt
.
text
if
'dispense_date'
in
tags
:
dispense_date
=
soup
.
dispense_date
.
text
if
'dispense_sup'
in
tags
:
dispense_sup
=
soup
.
dispense_sup
.
text
if
'ndc'
in
tags
:
ndc
=
soup
.
ndc
.
text
meds_tups
.
append
((
mrn
,
name
,
order_type
,
date
,
ordering_date
,
start_date
,
end_date
,
quantity
,
sig
,
refills
,
frequency
,
pcori_freq
,
dispense_date
,
dispense_amt
,
dispense_sup
,
ndc
))
meds_columns
=
[
"mrn"
,
'name'
,
'order_type'
,
"date"
,
'ordering_date'
,
"start"
,
"end"
,
"quantity"
,
'sig'
,
'refills'
,
'frequency'
,
'pcori_freq'
,
'dispense_date'
,
'dispanse_amt'
,
'dispense_sup'
,
'ndc'
]
meds_transformed
=
pd
.
DataFrame
(
meds_tups
,
columns
=
meds_columns
)
return
meds_transformed
def
calculate_regime
(
meds
):
'''Calculate main meds regime'''
# Filter for columns
meds_transformed
=
parse_blob
(
meds
)
# Conver to dates
meds_transformed
[
'date'
]
=
pd
.
to_datetime
(
meds_transformed
[
'date'
])
meds_transformed
[
'ordering_date'
]
=
pd
.
to_datetime
(
meds_transformed
[
'ordering_date'
])
meds_transformed
[
'start'
]
=
pd
.
to_datetime
(
meds_transformed
[
'start'
])
meds_transformed
[
'end'
]
=
pd
.
to_datetime
(
meds_transformed
[
'end'
])
meds_transformed
[
'dispense_date'
]
=
pd
.
to_datetime
(
meds_transformed
[
'dispense_date'
])
# Format Oncological Meds
meds_filtered_uniq
=
meds_transformed
.
groupby
([
'mrn'
,
'name'
],
as_index
=
False
).
agg
({
'date'
:
[
np
.
min
,
np
.
max
],
'start'
:
[
np
.
min
],
'end'
:
[
np
.
max
]
})
meds_filtered_uniq
.
columns
=
[
'mrn'
,
'name'
,
'date_start'
,
'date_end'
,
'start'
,
'end'
]
meds_filtered_uniq
[
'Start_date'
]
=
meds_filtered_uniq
[[
'date_start'
,
'date_end'
,
'start'
,
'end'
]].
min
(
axis
=
1
)
meds_filtered_uniq
[
'End_date'
]
=
meds_filtered_uniq
[[
'date_start'
,
'date_end'
,
'start'
,
'end'
]].
max
(
axis
=
1
)
meds_filtered_uniq
[
'Duration'
]
=
(
meds_filtered_uniq
[
'End_date'
]
-
meds_filtered_uniq
[
'Start_date'
]).
dt
.
days
onco_meds_reformated
=
meds_filtered_uniq
[[
'mrn'
,
'name'
,
'Start_date'
,
'End_date'
,
'Duration'
]]
# Reset duration for 0
onco_meds_reformated
.
loc
[
onco_meds_reformated
.
Duration
==
0
,
'End_date'
]
=
None
onco_meds_reformated
.
loc
[
onco_meds_reformated
.
Duration
==
0
,
'Duration'
]
=
None
# If End Date > thatn current pull reset to null
onco_meds_reformated
.
loc
[
onco_meds_reformated
.
End_date
>
datetime
.
datetime
(
2019
,
1
,
1
),
'End_date'
]
=
None
onco_meds_reformated
.
loc
[
onco_meds_reformated
.
End_date
>
datetime
.
datetime
(
2019
,
1
,
1
),
'Duration'
]
=
None
onco_meds_reformated
=
onco_meds_reformated
.
reset_index
(
drop
=
True
)
return
onco_meds_reformated
def
calculate_shift
(
labs
,
date_shift
):
'''Shift Date for start date'''
# Convert to DateTime
date_shift
[
'Shift'
]
=
pd
.
to_timedelta
(
date_shift
[
'Shift'
],
unit
=
's'
)
labs
[
'START_DATE'
]
=
pd
.
to_datetime
(
labs
[
'START_DATE'
])
# Merge data
merged
=
labs
.
merge
(
date_shift
,
left_on
=
'MRN'
,
right_on
=
'MRN'
,
how
=
'inner'
)
# Calculate Date Shift
merged
[
'START_DATE'
]
=
merged
[
'START_DATE'
]
+
merged
[
'Shift'
]
# Drop Shift column
merged
.
drop
([
'Shift'
],
axis
=
1
,
inplace
=
True
)
# Int MRN
merged
.
MRN
=
merged
.
MRN
.
astype
(
int
)
return
merged
def
main
():
args
=
get_args
()
epic
=
args
.
file
sup
=
args
.
supportive
medsmap
=
args
.
medsmap
date
=
args
.
date
out_path
=
args
.
out
# Make output files
meds_table
=
os
.
path
.
join
(
out_path
+
'med_table.csv'
)
unmapped_table
=
os
.
path
.
join
(
out_path
+
'unmapped_med_table.csv'
)
suppmeds_table
=
os
.
path
.
join
(
out_path
+
'suppmed_table.csv'
)
# Read in files
epic_df
=
pd
.
read_csv
(
epic
)
supmedsmap_df
=
pd
.
read_csv
(
sup
)
medsmap_df
=
pd
.
read_csv
(
medsmap
)
date_shift
=
pd
.
read_csv
(
date
)
# Filter for supplemental meds
supp_meds
,
onco_meds
,
missing_meds
=
filter_supplemental
(
epic_df
,
supmedsmap_df
,
medsmap_df
)
# Find Supplemental Meds Frequency
meds_freq
=
calculate_supp_frequency
(
supp_meds
)
# Calculate start and end dates
# Need to include Follow-up Date
onco_meds_regime
=
calculate_regime
(
onco_meds
)
# Calculate Date Shift
meds_freq_shifted_df
=
calculate_shift
(
meds_freq
,
date_shift
)
# Write out meds tables
meds_freq
.
to_csv
(
suppmeds_table
,
index
=
False
)
onco_meds_regime
.
to_csv
(
meds_table
,
index
=
False
)
missing_meds
.
to_csv
(
unmapped_table
,
index
=
False
)
if
__name__
==
'__main__'
:
main
()
standards/cancer_meds_map.csv
0 → 100644
View file @
7d991f6f
Meds,ID
Aldesleukin,MED:7846
Aldesleukin,MED:10672
Aldesleukin,MED:43802
Aldesleukin,MED:61700
Aldesleukin,MED:105635
Aldesleukin,MED:121140
Aldesleukin,MED:161431
Aldesleukin,MED:730442
Aldesleukin,MED:852524
Aldesleukin,MED:852525
Aldesleukin,MED:852526
Atezolizumab,MED:731401
Atezolizumab,MED:853026
Atezolizumab,MED:853120
Avelumab,MED:731859
Avelumab,MED:852013
Avelumab,MED:853172
Axitinib,MED:247563
Axitinib,MED:247564
Axitinib,MED:247682
Axitinib,MED:247683
Axitinib,MED:247876
Axitinib,MED:247974
Axitinib,MED:853099
Bevacizumab,MED:69065
Bevacizumab,MED:69067
Bevacizumab,MED:69324
Bevacizumab,MED:730448
Bevacizumab,MED:730888
Bevacizumab,MED:852245
Bevacizumab,MED:852278
Bevacizumab,MED:852573
Cabozantinib,MED:253271
Cabozantinib,MED:253273
Cabozantinib,MED:253392
Cabozantinib,MED:253394
Cabozantinib,MED:253578
Cabozantinib,MED:253596
Cabozantinib,MED:267418
Cabozantinib,MED:267419
Cabozantinib,MED:267420
Cabozantinib,MED:267422
Cabozantinib,MED:267423
Cabozantinib,MED:267424
Cabozantinib,MED:267528
CPI-444,MED:853024
Everolimus,MED:234836
Everolimus,MED:234837
Everolimus,MED:234843
Everolimus,MED:234844
Everolimus,MED:235136
Everolimus,MED:235199
Everolimus,MED:239612
Everolimus,MED:239613
Everolimus,MED:239614
Everolimus,MED:239615
Everolimus,MED:239616
Everolimus,MED:239617
Everolimus,MED:240573
Everolimus,MED:240574
Everolimus,MED:248182
Everolimus,MED:248239
Everolimus,MED:254968
Everolimus,MED:254976
Everolimus,MED:852158
Everolimus,MED:852427
IL-2,MED:10672
IL-2,MED:730442
IL-2,MED:852526
Interferon,MED:12108
Interferon,MED:12109
Interferon,MED:155209
Interferon,MED:155215
Interferon,MED:220970
Interferon,MED:23465
Interferon,MED:23497
Interferon,MED:27586
Interferon,MED:27595
Interferon,MED:28326
Interferon,MED:28327
Interferon,MED:28328
Interferon,MED:30197
Interferon,MED:4753
Interferon,MED:4754
Interferon,MED:4755
Interferon,MED:54858
Interferon,MED:54860
Interferon,MED:852532
Interferon,MED:97446
Interferon,MED:98144
Ipilimumab,MED:243828
Ipilimumab,MED:244146
Ipilimumab,MED:244227
Ipilimumab,MED:249027
Ipilimumab,MED:852537
Ipilimumab,MED:852766
Ipilimumab,MED:853032
Ipilimumab,MED:853174
Ipilimumab,MED:853253
Lenvatinib,MED:262146
Lenvatinib,MED:262147
Lenvatinib,MED:262159
Lenvatinib,MED:262160
Lenvatinib,MED:267829
Lenvatinib,MED:267830
Lenvatinib,MED:267856
Lenvatinib,MED:267857
Lenvatinib,MED:275251
Nivolumab,MED:261428
Nivolumab,MED:261429
Nivolumab,MED:261432
Nivolumab,MED:261496
Nivolumab,MED:261501
Nivolumab,MED:273507
Nivolumab,MED:731719
Nivolumab,MED:852764
Nivolumab,MED:852907
Nivolumab,MED:852977
Nivolumab,MED:852988
Nivolumab,MED:853030
Nivolumab,MED:853140
Nivolumab,MED:853167
Nivolumab,MED:853250
NKTR-214,MED:853361
NKTR-262,MED:853372
Pazopanib,MED:237450
Pazopanib,MED:237453
Pazopanib,MED:237614
Pazopanib,MED:237654
Pazopanib,MED:852748
Pembrolizumab,MED:260612
Pembrolizumab,MED:260627
Pembrolizumab,MED:261829
Pembrolizumab,MED:731190
Pembrolizumab,MED:852984
Pembrolizumab,MED:852994
Pembrolizumab,MED:853084
Pembrolizumab,MED:853098
Pembrolizumab,MED:853322
PT2385,MED:852791
Sirolimus,MED:107783
Sirolimus,MED:239512
Sirolimus,MED:239534
Sirolimus,MED:30774
Sirolimus,MED:35686
Sirolimus,MED:36020
Sirolimus,MED:40437
Sirolimus,MED:40441
Sirolimus,MED:62768
Sirolimus,MED:64482
Sorafenib,MED:226829
Sorafenib,MED:79714
Sorafenib,MED:79722
Sorafenib,MED:79825
Sunitinib,MED:227223
Sunitinib,MED:259330
Sunitinib,MED:259336
Sunitinib,MED:80076
Sunitinib,MED:80077
Sunitinib,MED:80082
Sunitinib,MED:80083
Sunitinib,MED:80084
Sunitinib,MED:80155
Sunitinib,MED:80276
Sunitinib,MED:853096
Temsirolimus,MED:730376
Temsirolimus,MED:92164
Temsirolimus,MED:92331
Temsirolimus,MED:92570
standards/supportive_meds_filter.tsv
0 → 100644
View file @
7d991f6f
Meds,ID
Denosumab,MED:239980
Denosumab,MED:239981
Denosumab,MED:240075
Denosumab,MED:240208
Denosumab,MED:240377
Denosumab,MED:242251
Denosumab,MED:242257
Denosumab,MED:242491
Denosumab,MED:852340
Denosumab,MED:852341
Zoledronic Acid,MED:35688
Zoledronic Acid,MED:36138
Zoledronic Acid,MED:40851
Zoledronic Acid,MED:40854
Zoledronic Acid,MED:68981
Zoledronic Acid,MED:68987
Zoledronic Acid,MED:90803
Zoledronic Acid,MED:91157
Zoledronic Acid,MED:91335
Zoledronic Acid,MED:115080
Zoledronic Acid,MED:155942
Zoledronic Acid,MED:155998
Zoledronic Acid,MED:163172
Zoledronic Acid,MED:164546
Zoledronic Acid,MED:229262
Zoledronic Acid,MED:246007
Zoledronic Acid,MED:246013
Zoledronic Acid,MED:253243
Zoledronic Acid,MED:256033
Zoledronic Acid,MED:256768
Zoledronic Acid,MED:271306
Zoledronic Acid,MED:730414
Zoledronic Acid,MED:731784
Zoledronic Acid,MED:50080112
Zoledronic Acid,MED:50080155
tests/data/medications.csv
0 → 100644
View file @
7d991f6f
This diff is collapsed.
Click to expand it.
tests/transform/med_table.csv
0 → 100644
View file @
7d991f6f
mrn,name,Start_date,End_date,Duration
88,Sorafenib,2010-04-04 00:00:00,2011-01-26 00:00:00,297.0
313,Everolimus,2010-05-14 00:00:00,2011-06-05 21:00:00,387.0
313,Sunitinib,2009-11-15 00:00:00,2010-09-29 00:00:00,318.0
313,Temsirolimus,2009-03-08 00:00:00,2009-12-27 08:37:00,294.0
822,Aldesleukin,2015-03-15 00:00:00,2015-10-07 05:07:00,206.0
822,Everolimus,2016-05-17 00:00:00,2017-01-04 00:00:00,232.0
822,IL-2,2015-03-15 00:00:00,2015-10-07 05:07:00,206.0
822,Nivolumab,2016-11-09 00:00:00,2017-04-19 00:00:00,161.0
822,Sunitinib,2016-02-28 00:00:00,2016-07-05 00:00:00,128.0
903,Sunitinib,2015-02-08 00:00:00,2016-05-17 00:00:00,464.0
934,Pazopanib,2017-12-05 00:00:00,2018-12-05 00:00:00,365.0
tests/transform/suppmed_table.csv
0 → 100644
View file @
7d991f6f
MRN,Meds,Frequency,START_DATE
313,Denosumab,2.0,2009-03-22 15:20:00
903,Zoledronic Acid,3.0,2015-03-22 10:45:00
tests/transform/unmapped_med_table.csv
0 → 100644
View file @
7d991f6f
MRN,NAME,GENERIC_NAME,CONCEPT_CD,START_DATE,END_DATE,MODIFIER_CD,TVAL_CHAR,UNITS_CD,OBSERVATION_BLOB
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment