Skip to content
Snippets Groups Projects
Commit 52025320 authored by Cooper Mellema's avatar Cooper Mellema
Browse files

Readme and license update

parent a0143a85
No related merge requests found
LICENSE 0 → 100644
Copyright (c) 2018 [IH1] The University of Texas Southwestern Medical Center.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted for academic and research use only (subject to the limitations in the disclaimer below) provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holders nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
# Parkinson's Code Readme
## Purpose
Code to process and predict prognosis for the PDBP dataset, PPMI dataset, and internally acquired UTSW prospective dataset
\ No newline at end of file
This diff is collapsed.
......@@ -109,7 +109,7 @@ def fColumnSort(pdMeds):
pdMeds_Prior_OtherMeds = pdMeds[lsPrior_CommonCols + lsPrior_OtherMeds]
pdMeds_Park_NeuroMeds = pdMeds[lsPark_CommonCols + lsPark_NeuroMeds]
pdMeds_Park_OtherMeds = pdMeds[lsPark_CommonCols + lsPark_OtherMeds]
#rename columns
lsNewCols = ['Site', 'Visit', 'Date', 'GUID', 'Age', 'Med', 'Dose', 'Unit', 'Frequency']
pdMeds_Prior_NeuroMeds.columns = lsNewCols
......@@ -162,7 +162,7 @@ def fSelectPDMeds(pdMeds):
Return:
pdMeds (pd dataframe): medication table with only PD meds included, all renamed to common naming convention
"""
dMedicationLookup = ledd.fLoadMedicationLookup('/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_MedicationSynonyms.json')
dMedicationLookup = ledd.fLoadMedicationLookup('/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/medication/PDBP_MedicationSynonyms.json')
# rename medications, putting np.nan in for non-pd medications
pdMeds[['Med']] = \
......@@ -184,9 +184,9 @@ def fDoseCorrect(pdMeds):
# Load the doses/day lookup table
# Note: PRN medications are ignored, as we have no way of knowing how much was taken
# Note 2: Some medications were given as a range, the average value was taken for these
dDosesPerDay = ledd.fLoadJson(sJsonPath='/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_DosesPerDay.json')
dDoseLookup = ledd.fLoadJson(sJsonPath='/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_DoseStrings.json')
dDosesPerDay = ledd.fLoadJson(sJsonPath='/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/medication/PDBP_DosesPerDay.json')
dDoseLookup = ledd.fLoadJson(sJsonPath='/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/medication/PDBP_DoseStrings.json')
# Fill in frequency strings
# where there are defined values in the concomitant meds field, copy that value to the corresponding Parkinsonism_meds filed
# the concom encoded tables used string frequencies, so we need to use another lookup table
......@@ -243,7 +243,7 @@ def fUnitCorrect(pdMeds):
Returns:
pdMeds (dataframe): dataframe corrected to all mg units
"""
dUnitConversions = ledd.fLoadJson(sJsonPath='/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_UnitConvert.json')
dUnitConversions = ledd.fLoadJson(sJsonPath='/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/medication/PDBP_UnitConvert.json')
for sKey in [x for x in dUnitConversions.keys() if not '__' in x]:
if sKey in pdMeds['Unit'].values:
if type(dUnitConversions[sKey])==list:
......@@ -266,9 +266,9 @@ def fDEPRECIATEDMedicationSort(pdMeds):
# Load the doses/day lookup table
# Note: PRN medications are ignored, as we have no way of knowing how much was taken
# Note 2: Some medications were given as a range, the average value was taken for these
dDosesPerDay = ledd.fLoadJson(sJsonPath='/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_DosesPerDay.json')
dDoseLookup = ledd.fLoadJson(sJsonPath='/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_DoseStrings.json')
dDosesPerDay = ledd.fLoadJson(sJsonPath='/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/medication/PDBP_DosesPerDay.json')
dDoseLookup = ledd.fLoadJson(sJsonPath='/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/medication/PDBP_DoseStrings.json')
# Fill in Parkinsons meds from the PriorAndConcomitantMeds fields (naming conventions are off, rewriting everythin in Parkinsonism_Meds fields)
for sConcom, sPD in [('Required Fields.GUID', 'Required.GUID'),\
......@@ -320,7 +320,7 @@ def fDEPRECIATEDMedicationSort(pdMeds):
else:
pdMeds.loc[~pdMeds[f'PriorAndConcomitantMeds.{sConcom}'].isnull(), [f'Parkinsonism_Meds.{sPD}']] = \
pdMeds.loc[~pdMeds[f'PriorAndConcomitantMeds.{sConcom}'].isnull(), [f'PriorAndConcomitantMeds.{sConcom}']].values
# fill in cols that should have been duplicated (only first row filled for
# a given patient, i.e pt 1 has 3 entries, but info only in the first)
lsFillCols=['Study ID',
......@@ -331,7 +331,6 @@ def fDEPRECIATEDMedicationSort(pdMeds):
'Parkinsonism_Meds.Required.VisitDate',
'Parkinsonism_Meds.Required.AgeYrs',
'Parkinsonism_Meds.Required.AgeRemaindrMonths',
'Parkinsonism_Meds.Required.AgeVal']
pdMeds[lsFillCols]=pdMeds[lsFillCols].fillna(method='ffill')
pdMeds=pdMeds.set_index(['Parkinsonism_Meds.Required.GUID'])
......@@ -385,7 +384,7 @@ def fManualFixes(pdMeds):
lsSuspectEntries.append(pdMeds.loc[(pdMeds['Med']=='Rotigotine').values & (pdMeds['Dose']>=20).values])
pdMeds.loc[(pdMeds['Med']=='Rotigotine').values & (pdMeds['Dose']>=20).values, 'Dose'] = \
pdMeds.loc[(pdMeds['Med']=='Rotigotine').values & (pdMeds['Dose']>=20).values, 'Dose'].values/10.0
# Patient PDZJ672JCJ, PDCU336WW9, 'PDWF428BU7', 'PDPP163NDH','PDHE978YX5', 'PDDM149NRN','PDWG274ZZ4' and 'NIHTD106ZTUEA'
# have dosage for carb/levodopa recorded at 25,000 mg. I corrected these all to 25 mg based on the other doses nearby in
# the table (e.g the patient wouldn't be taking 13 mg pills
......@@ -393,7 +392,7 @@ def fManualFixes(pdMeds):
lsSuspectEntries.append(pdMeds.loc[pdMeds['Dose']>1000])
pdMeds.loc[(pdMeds['Med']=='Carbidopa-Levodopa IR').values & (pdMeds['Dose']>1000).values, 'Dose'] = \
pdMeds.loc[(pdMeds['Med']=='Carbidopa-Levodopa IR').values & (pdMeds['Dose']>1000).values, 'Dose'].values/1000.0
# Note: patient PDXM370FA1 is suspect as well, they appear to have gone from 0 LEDD at baseline to the highest at 12 months and
# look like they have duplicated rows in the medication table
......@@ -402,7 +401,7 @@ def fManualFixes(pdMeds):
lsSuspectEntries.append(pdMeds.loc[(pdMeds['Med']=='Carbidopa-Levodopa IR').values & (pdMeds['Dose']<1).values])
pdMeds.loc[(pdMeds['Med']=='Carbidopa-Levodopa IR').values & (pdMeds['Dose']<1).values, 'Dose'] = \
pdMeds.loc[(pdMeds['Med']=='Carbidopa-Levodopa IR').values & (pdMeds['Dose']<1).values, 'Dose'].values*10.0
# For Selegiline, patient PDRR084TGM is receiving '50 mg' per dose. Based on typical selegiline levels,
# this is likely 5 mg. (recommended dose for PD is in 5mg pills)
lsSuspectEntries.append(pdMeds.loc[(pdMeds['Med']=='Selegiline').values & (pdMeds['Dose']>10).values])
......@@ -602,7 +601,7 @@ def fCollectSuspect(pdErrors, pdLEDD_PerSub, pdMeds, pdManuallyAltered):
}
)
], ignore_index=True)
# collect all of the high delta LEDD in one visit
# (change greater than 500 mg of LEDD) from one visit to next
for i in range(len(pdLEDD_PerSub.index)):
......@@ -672,12 +671,11 @@ if __name__ == "__main__":
'24 months','30 months','36 months','42 months','60 months']]
# save
pdLEDD_PerSub.to_csv('/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_Metadata/PDBP_LEDD.csv')
pdLEDD_PerSub.to_csv('/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/index_files/PDBP_LEDD.csv')
# plot
ledd.fPlotErrors(pdErrors)
# collect suspect subjects and save
pdSuspect = fCollectSuspect(pdErrors, pdLEDD_PerSub, pdMeds, pdManuallyAltered)
pdSuspect.to_csv('/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_Metadata/PDBP_Suspect_Patients.csv')
#pdSuspect=pd.read_csv('/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/PDBP_Metadata/PDBP_Suspect_Patients.csv', index_col=0)
\ No newline at end of file
pdSuspect.to_csv('/archive/bioinformatics/DLLab/CooperMellema/src/parkinsons/index_files/PDBP_LEDD_Suspect.csv')
......@@ -431,7 +431,7 @@ if __name__ == '__main__':
# loop through each experiment:
for sTarget in lsTargets:
sSaveRoot=f'/project/bioinformatics/DLLab/Cooper/Code/Parkinsons/TrainedModels/{sTarget}Regression'
iModel=int(sIniFile.split('Dense_')[-1].split('.ini')[0])
sSelection=None
if 'UnivariateSelected' in sTarget:
......@@ -460,7 +460,7 @@ if __name__ == '__main__':
for iInner in dSplits[iOuter].keys():
if not os.path.isfile(f'{sSaveRoot}/{sAtlas}/{sMetric}/{sLoss}_Optimized/Dense_{iModel}_Inner{iInner}_Outer{iOuter}.p'):
idxTr, idxVal, idxTe = dSplits[iOuter][iInner]
# separate data into tr, val, te
aTr, aVal, aTe, aTrY, aValY, aTeY = fFetchData(pdData, idxTr, idxVal, idxTe, sCol, sTarget, sSelection=sSelection)
......
{
"AAHead_Scout_64ch-head-coil": "anat_T1w_acq-scout",
"t1_mprage_sag_p3_1mm_iso": "anat_T1w_acq-sag",
"ABCD_fMRI_DistortionMap_PA": "fmap_epi_intendedfor_func_dir-PA",
"ABCD_fMRI_DistortionMap_AP": "fmap_epi_intendedfor_func_dir-AP",
"ABCD_fMRI_rest_Fixation": "func_bold_task-rest",
"ME_4e_2-2mm_G2M4_TR1500_42ms": "func_bold_task-rest_acq-multiband_echos-4_res-22mm_TR-1500",
"ME_4e_2.4mm_G2M4_TR1598": "func_bold_task-rest_acq-multiband_echos-4_res-24mm_TR-1598",
"ME_4e_2.4mm_G2M4_TR1598_PA": "func_bold_task-rest_acq-multiband_echos-4_res-24mm_TR-1598_dir-PA",
"ME_5e_2.4mm_G2M4_TR1598_AP": "func_bold_task-rest_acq-multiband_echos-5_res-24mm_TR-1598_dir-AP",
"ME_5e_2.4mm_G2M4_TR1598_PA": "func_bold_task-rest_acq-multiband_echos-5_res-24mm_TR-1598_dir-PA",
"ME_3e_2.4mm_G2M4_TR1274_MB4_AP":"func_bold_task-rest_acq-multiband_echos-3_res-24mm_TR-1274_dir-AP",
"ME_3e_2.4mm_G2M4_TR1274_MB4_PA": "func_bold_task-rest_acq-multiband_echos-3_res-24mm_TR-1274_dir-PA",
"ME_3e_2.0mm_G2M4_TR1500_MB6_AP": "func_bold_task-rest_acq-multiband_echos-3_res-2mm_TR-1500_dir-AP",
"ME_3e_2.0mm_G2M4_TR1500_MB6_PA": "func_bold_task-rest_acq-multiband_echos-3_res-2mm_TR-1500_dir-PA",
"ME_5e_2.4mm_G2M4_TR1355_MB6_AP": "func_bold_task-rest_acq-multiband_echos-5_res-24mm_TR-1355_dir-AP",
"ME_5e_2.4mm_G2M4_TR1355_MB6_PA": "func_bold_task-rest_acq-multiband_echos-5_res-24mm_TR-1355_dir-PA",
"3D_T2_Sagittal": "anat_T2w_acq-sag",
"qsm_Gradient mode Fast_0.8x0.8x0.8": "anat_QSM",
"ABCD_Task_Car vs Dog": "func_bold_task-gonogocardog",
"ABCD_Task_Cars vs Dogs": "func_bold_task-gonogocarsdogs",
"ABCD_Task_Living vs NonLiving": "func_bold_task-gonogoliving",
"ABCD_Task_Finger Tap": "func_bold_task-tap",
"ABCD_dMRI_DistortionMap_PA": "fmap_epi_intendedfor_dwi_dir-PA",,
"ABCD_dMRI_DistortionMap_AP": "fmap_epi_intendedfor_dwi_dir-AP",
"ABCD_dMRI_Changed_Rise_Time": "dwi",
"AMRI_GRE_STE_ref_2D": "anat_qsm",
"AMRI_GRE_STE_ref_2D_BDC": "anat_qsm_acq-BDC",
"AMRI_GRE_STE_0p8_S2x2": "anat_qsm_acq-S2x2",
"R2*_GRE": "anat_R2star",
"t2_space_dark-fluid_sag_c4_iso_fast": "anat_T2flair",
"MT_TR30_88slices": "anat_neuromelanin"
}
\ No newline at end of file
......@@ -20,6 +20,7 @@ import os
import pandas as pd
import numpy as np
import json
from pathlib import Path
def fGetData():
"""fetches the old LME data and the new LEDD scores to replace the old with
......@@ -28,10 +29,10 @@ def fGetData():
pdLME_Prepped_Data, pdNew_LEDD: dataframes with old and new data
"""
# set paths to LEDD data
sRoot = os.getcwd()
sRoot = Path(os.getcwd()).parent
# First, the pre-prepped LME data
sLME_Prepped_Data = os.path.join(sRoot, 'PDBP_Metadata', 'prepared_lme_data.csv')
sLME_Prepped_Data = os.path.join(sRoot, 'index_files', 'prepared_lme_data.csv')
pdLME_Prepped_Data = pd.read_csv(sLME_Prepped_Data, index_col=1).drop(['Unnamed: 0'], axis=1)
# Then, the UTSW data
......@@ -39,7 +40,7 @@ def fGetData():
pdUTSW_LEDD = pdUTSW_LEDD[['Site', 'Visit Description', 'Visit#', 'Total_LED']]
# Then, the new LEDD data
sNew_LEDD = os.path.join(sRoot, 'PDBP_Metadata', 'PDBP_LEDD.csv')
sNew_LEDD = os.path.join(sRoot, 'index_files', 'PDBP_LEDD.csv')
pdNew_LEDD = pd.read_csv(sNew_LEDD, index_col=0)
#Then, the Metadata for the new LEDD data
......@@ -47,7 +48,7 @@ def fGetData():
pdMetadata=pd.read_csv(sMetadata)
# then, the demographics metadata:
with open('./PDBP_Demographics_Paths.json', 'rb') as f:
with open(os.path.join(sRoot, 'configs', 'PDBP_Demographics_Paths.json'), 'rb') as f:
dFiles=json.load(f)
pdDemographics = pd.concat([pd.read_csv(x) for x in dFiles.values()])
pdDemographics.set_index('Demographics.Required Fields.GUID', inplace=True)
......@@ -63,7 +64,7 @@ def fGetData():
]]
# numerically encode ethnicity and Female
with open('./PDBP_Ethnicity_Codes.json', 'rb') as f:
with open(os.path.join(sRoot, 'configs', 'PDBP_Ethnicity_Codes.json'), 'rb') as f:
dEthnicities=json.load(f)
pdDemographics['Ethnicity'] = [dEthnicities[x] for x in pdDemographics['Demographics.Demographics.RaceExpndCatPDBP']]
pdDemographics.loc[pdDemographics['Demographics.Demographics.EthnUSACat']=='Hispanic or Latino', 'Ethnicity'] = 'Latino'
......@@ -83,7 +84,6 @@ def fGetData():
return pdLME_Prepped_Data, pdNew_LEDD, pdUTSW_LEDD, pdMetadata, pdDemographics
def fReformatData(pdLME_Prepped_Data, pdNew_LEDD, pdUTSW_LEDD, pdMetadata, pdDemographics, bTotalUPDRS=False):
# flatten out Table
pdNew_LEDD['GUID'] = pdNew_LEDD.index
pdNew_LEDD = pdNew_LEDD.melt(['GUID','Site'], var_name='Visit', value_name='LEDD')
......@@ -170,7 +170,7 @@ def fReformatData(pdLME_Prepped_Data, pdNew_LEDD, pdUTSW_LEDD, pdMetadata, pdDem
# assign ID to each subject
pdData['GUID'] = pdData.index
pdData['ID'] = pdData['GUID'].astype('category').cat.codes + 1 + pdLME_Prepped_Data['ID'].max()
# select out proper columns
pdData = pdData[['ID', 'Age', 'First_Score', 'First_LEDD', 'Months', 'Female',\
'Ethnicity', 'Total.Score', 'LEDD', 'LEDD_2', 'PDQ39', 'MoCA', 'Schwab',\
......@@ -186,7 +186,7 @@ def fReformatData(pdLME_Prepped_Data, pdNew_LEDD, pdUTSW_LEDD, pdMetadata, pdDem
pdData['Age'] = pdData['Age2']
dFirstLEDD = pdData.sort_values(by='Months').groupby(pdData.index).first()['LEDD'].to_dict()
pdData['First_LEDD'] = [dFirstLEDD[x] if x in dFirstLEDD.keys() else np.nan for x in pdData.index]
# z-score necessary columns
for sCol in ['First_LEDD', 'Age', 'GCO']:
pdData[sCol] = (pdData[sCol] - pdData[sCol].mean())/pdData[sCol].std(ddof=0)
......@@ -201,6 +201,6 @@ if __name__ == "__main__":
pdLME_Formatted_Data = fReformatData(pdLME_Prepped_Data, pdNew_LEDD, pdUTSW_LEDD, pdMetadata, pdDemographics)
# save trainable data
sRoot = os.getcwd()
sRoot = Path(os.getcwd()).parent
pdLME_Formatted_Data.dropna(subset=['Total.Score', 'LEDD']).sort_values(by=['GUID', 'Months'])\
.to_csv(os.path.join(sRoot, 'PPMI_Metadata', 'PDBP_Prepped_For_LME.csv'))
.to_csv(os.path.join(sRoot, 'index_files', 'PDBP_Prepped_For_LME.csv'))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment