Resolve radiation issues.

parent 6978a10c
......@@ -113,6 +113,24 @@ def convert_treatment(merged):
return merged
def add_rt(rt_history, radiation_add):
'''Add's addtional RT history not in MOSAIC'''
# Need to convert to date to align both date formats
radiation_add['STARTDATE'] = pd.to_datetime(radiation_add['STARTDATE'])
rt_history['STARTDATE'] = pd.to_datetime(rt_history['STARTDATE'])
# Index and remove brain dates that the same.
radiation_add.set_index(["MRN","SITE_GENERAL", "STARTDATE"], inplace=True)
rt_history.set_index(["MRN","SITE_GENERAL", "STARTDATE"], inplace=True)
filtered_df = radiation_add[~radiation_add.index.isin(rt_history.index)]
merged = pd.concat([rt_history,filtered_df])
return merged
def calculate_shift(radiation, date_shift):
'''Shift Date for start and last date'''
......@@ -141,14 +159,28 @@ def calculate_shift(radiation, date_shift):
merged['dose_units'] = 'cGy'
# Rename columns
radiation_columns = ["mrn", "site_specific", "site_general",
"dose", "fractions", 'start_date', 'end_date',
radiation_columns = ["mrn", "site_general", 'start_date', "site_specific",
"dose", "fractions", 'end_date',
'treatment_intention', "dose_units"]
merged.columns = radiation_columns
merged = merged[["mrn", "site_specific", "site_general",
"dose", "fractions", 'start_date', 'end_date',
'treatment_intention', "dose_units"]]
return merged
def fill_sites(df):
'''Takes site_general and adds to site_speficic if issing'''
df.loc[df['site_specific'] == 'Not Available', 'site_specific'] = df['site_general']
# Drop duplicates
return df
def main():
args = get_args()
rthistory = args.file
......@@ -178,13 +210,11 @@ def main():
# Map Specific Sites to General Sites
fix_sites, unmapped_sites = convert_sites(fix_icd, site_map)
# Merge sites with additional data
# Merge sites with additional data and filer
if additional_rthistory:
radiation_add = pd.read_csv(additional_rthistory)
radiation_add.drop(['DIAGNOSIS'], inplace=True, axis=1)
merge_sites = pd.concat([fix_sites, radiation_add])
fix_sites = merge_sites
fix_sites = add_rt(fix_sites, radiation_add)
# Convert missing Treatment
fix_treatment = convert_treatment(fix_sites)
......@@ -192,11 +222,11 @@ def main():
# Calculate Date Shift
shifted_df = calculate_shift(fix_treatment, date_shift)
# Remove duplicate rows
shifted_df.drop_duplicates(["mrn","site_general", "start_date"], keep='first', inplace=True)
# Move site_general to site_specific if not present
fix_sites = fill_sites(shifted_df)
# Write out radiation table
shifted_df.to_csv(radiation_table, index=False)
fix_sites.to_csv(radiation_table, index=False)
# Write out unmapped sites if there are any
if unmapped_sites.shape[0] > 1:
