Commit 18c523aa authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update the patient data.

parent f569e8c7
Pipeline #7819 failed with stage
in 2 minutes and 33 seconds
......@@ -42,16 +42,33 @@ def filter_table(demographics, tumor_registry):
# Column filters
demo_filter_col = ["MRN", "GENDER", "ETHNICITY", "RACE", "BIRTH_DATE",
"DEATH_DATE", "DEATH_SOURCE"]
tumor_filter_col = ["Medical Record Number", "Date of Last Contact-Date", "Vital Status", 'Date of Diagnosis-Date']
tumor_filter_col = ["Medical Record Number", "Date of Last Contact-Date", "Vital Status",
'Date of Diagnosis-Date']
# Filter
demo_filter = demographics.loc[:, demo_filter_col]
tumor_filter = tumor_registry.loc[:, tumor_filter_col]
# Filter for dead status
tumor_dead = tumor_filter[tumor_filter["Vital Status"] == 0]
# merge
df_merged = demo_filter.merge(tumor_filter, left_on='MRN',
df_merged = demo_filter.merge(tumor_dead, left_on='MRN',
right_on='Medical Record Number', how='left')
# Filter for early diagnosis date
tumor_kidney = tumor_registry[tumor_registry['Primary Site'] == 'C649']
for mrn in df_merged.MRN:
tmp = tumor_kidney[tumor_kidney['Medical Record Number'] == mrn]
if tmp.shape[0] > 1:
date = str(int(tmp['Date of Diagnosis'].min()))
if len(date) == 4:
date = date + '0715'
elif len(date) == 6:
date = date + '15'
df_merged.loc[df_merged['MRN'] == mrn, 'Date of Diagnosis'] = date
return df_merged
......@@ -92,7 +109,7 @@ def calculate_shift(merged):
merged['BIRTH_DATE'] = pd.to_datetime(merged['BIRTH_DATE'])
merged['DEATH_DATE'] = pd.to_datetime(merged['DEATH_DATE'])
merged['Date of Last Contact-Date'] = pd.to_datetime(merged['Date of Last Contact-Date'])
merged['Date of Diagnosis-Date'] = pd.to_datetime(merged['Date of Diagnosis-Date'])
merged['Date of Diagnosis'] = pd.to_datetime(merged['Date of Diagnosis'])
# Calculate date shift relative to 1800/01/01
......@@ -108,7 +125,7 @@ def calculate_shift(merged):
# Calculate date Shift for death date
merged['DEATH_DATE'] = merged['DEATH_DATE'] + merged['Shift']
merged['DIAGNOSIS_DATE'] = merged['Date of Diagnosis-Date'] + merged['Shift']
merged['DIAGNOSIS_DATE'] = merged['Date of Diagnosis'] + merged['Shift']
# Shift to total seconds
time_delta_series = merged['Shift']
......@@ -151,10 +168,12 @@ def main():
shifted_filtered = shifted_df[["MRN", "GENDER", "ETHNICITY", "RACE",
'DEATH_DATE', 'DEATH_SOURCE', 'DIAGNOSIS_DATE']]
shifted_filtered.columns = ['mrn', 'sex', 'ethnicity', 'race', 'death_date', 'death_source', 'diagnosis_date']
shifted_filtered.drop_duplicates(inplace=True)
shifted_filtered.to_csv(patients, index=False)
# Write out shift table
shift_only = shifted_df[["MRN", "Shift"]]
shift_only.drop_duplicates(inplace=True)
shift_only.to_csv(shift_file, index=False)
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment