Commit 5bf27624 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in vitals conversion.

parent 2f4a2b91
Pipeline #4941 passed with stage
in 39 seconds
#!/usr/bin/env python3
'''Generate Vitals'''
import argparse
import os
import re
import pandas as pd
EPILOG = '''
For more details:
%(prog)s --help
'''
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-f', '--file',
help="Vitals File (csv format).",
required=True)
parser.add_argument('-u', '--unitmap',
help="Labs unitmap (csv format).",
required=True)
parser.add_argument('-d', '--date',
help="Date Shift (tsv format).",
required=True)
parser.add_argument('-o', '--out',
help="The output path (csv format).",
required=True)
args = parser.parse_args()
return args
def filter_recorded(vitals, unitmap):
'''Filter for recorded vitals values'''
# Convert CONCEPT_CD to remove VISIT
vitals['CONCEPT_CD'] = pd.DataFrame(list(vitals['CONCEPT_CD'].str.split(':')))[1]
# Remove missing and dummy values
df_clean = vitals[~(vitals.NVAL_NUM.isna() | (vitals.NVAL_NUM == 9999999))]
# Filter for only vitals we are tracking
df_cleaned_tracked = df_clean[(df_clean.CONCEPT_CD).isin(unitmap.Vitals)]
vital_columns = ["mrn", "vital", "date",
"value", "value_units"]
df_cleaned_tracked.columns = vital_columns
return df_cleaned_tracked
def remove_outliers(vitals, measurement):
'''Remove outliers for a specific measurement'''
##TODO: Make this patient specific
# Calculate extremes
specific_vitals = vitals[vitals.vital == measurement]
q1 = specific_vitals['value'].quantile(0.25)
q3 = specific_vitals['value'].quantile(0.75)
iqr = q3 - q1
upper = (iqr * 3) + q3
lower = q1 - (iqr * 3)
# Filter for extremes
filter_df = vitals[-((vitals.vital == measurement) & ((vitals.value > upper) | (vitals.value < lower)))]
return filter_df
def calculate_shift(vitals, date_shift):
'''Shift Date for start date'''
# Convert to DateTime
date_shift['Shift'] = pd.to_timedelta(date_shift['Shift'], unit='s')
vitals['date'] = pd.to_datetime(vitals['date'])
# Merge data
merged = vitals.merge(date_shift, left_on='mrn', right_on='MRN', how='inner')
# Calculate Date Shift
merged['date'] = merged['date'] + merged['Shift']
merged['date'] = merged['date'].dt.date
# Drop Shift column
merged.drop(['Shift'], axis=1, inplace=True)
merged.drop(['MRN'], axis=1, inplace=True)
# Int MRN
merged.mrn = merged.mrn.astype(int)
return merged
def main():
args = get_args()
vitals = args.file
unitmap = args.unitmap
date = args.date
out_path = args.out
# Make output files
vital_table = os.path.join(out_path + 'vital_table.csv')
# Read in files
vitals_df = pd.read_csv(vitals)
unitmap_df = pd.read_csv(unitmap)
date_shift = pd.read_csv(date)
# Filter for true records data
vitals_filtered = filter_recorded(vitals_df, unitmap_df)
# Remove outliers
##TODO: Will need to update this code
#for m in unitmap_df['Vitals']:
# temp = remove_outliers(vitals_filtered, m)
# vitals_filtered = temp
# Calculate Date Shift
shifted_df = calculate_shift(vitals_filtered, date_shift)
# Write out lab table
shifted_df.to_csv(vital_table, index=False)
if __name__ == '__main__':
main()
Vitals,Units
BMI,kg/m2
BP_DIAS,mm Hg
BP_SYS,mm Hg
HEIGHT,Inches
WEIGHT,Pounds
This diff is collapsed.
#!/usr/bin/env python3
import pytest
import os
import pandas as pd
from StringIO import StringIO
import transform_vitals
VITALS_STRING = """MRN,CONCEPT_CD,START_DATE,NVAL_NUM,UNITS_CD
33,VISIT:WEIGHT,9/14/2013 0:00,172,Pounds"
"""
standards_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../standards/'
@pytest.fixture
def vital():
vitals_file = StringIO(VITALS_STRING)
vitals_df = pd.read_csv(vitals_file)
return vitals_df
@pytest.fixture
def unit_map():
unit_map = standards_path + 'vital_unitmap.csv'
unit_map_df = pd.read_csv(unit_map)
return unit_map_df
@pytest.fixture
def vital_1(vital):
vital.loc[0, 'NVAL_NUM'] = 9999999
return vital
@pytest.fixture
def vital_2(vital):
vital.loc[0, 'NVAL_NUM'] = float('nan')
return vital
@pytest.mark.unit
def test_check_removal_missing_vitals(vital_1, unit_map):
missing_vitals = transform_vitals.filter_recorded(vital_1, unit_map)
assert missing_vitals.shape[0] == 0
@pytest.mark.unit
def test_check_removal_empty_vitals(vital_2, unit_map):
empty_vitals = transform_vitals.filter_recorded(vital_2, unit_map)
assert empty_vitals.shape[0] == 0
@pytest.mark.unit
def test_check_removal_unwanted_vitals(vital, unit_map):
vital.loc[0, 'CONCEPT_CD'] = "VISIT:PULSE"
empty_labs = transform_vitals.filter_recorded(vital, unit_map)
assert empty_labs.shape[0] == 0
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment