Commit f4c3ba50 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '30-genomics_ihc' into 'master'

Resolve "Update cancer gene"

Closes #30

See merge request !12
parents 2d66e0c7 55155ee6
Pipeline #8204 failed with stage
...@@ -64,6 +64,19 @@ def convert_number(merged, significance): ...@@ -64,6 +64,19 @@ def convert_number(merged, significance):
return merged return merged
def reformat_gene(merged):
'''Converts gene names to apporpriate string for database'''
merged['Gene'] = merged['Gene'].astype(str) + '(Sequencing)'
# If IHC then switch
pat = r"^IHC(?P<one>\w+)\((Sequencing\))"
repl = lambda m: m.group('one') + '(IHC)'
merged.Gene = merged.Gene.str.replace(pat, repl, regex=True)
return merged
def calculate_shift(cancer_gene, date_shift): def calculate_shift(cancer_gene, date_shift):
'''Shift Date for start date''' '''Shift Date for start date'''
...@@ -117,8 +130,11 @@ def main(): ...@@ -117,8 +130,11 @@ def main():
# Convert missing Number # Convert missing Number
fix_number = convert_number(cancer_gene_reformat, significance_map) fix_number = convert_number(cancer_gene_reformat, significance_map)
# Convert gene names
fix_gene = reformat_gene(fix_number)
# Calculate Date Shift # Calculate Date Shift
shifted_df = calculate_shift(fix_number, date_shift) shifted_df = calculate_shift(fix_gene, date_shift)
# Write out radiation table # Write out radiation table
shifted_df.to_csv(cancer_gene_table, index=False) shifted_df.to_csv(cancer_gene_table, index=False)
......
...@@ -109,7 +109,10 @@ def calculate_shift(merged): ...@@ -109,7 +109,10 @@ def calculate_shift(merged):
merged['BIRTH_DATE'] = pd.to_datetime(merged['BIRTH_DATE']) merged['BIRTH_DATE'] = pd.to_datetime(merged['BIRTH_DATE'])
merged['DEATH_DATE'] = pd.to_datetime(merged['DEATH_DATE']) merged['DEATH_DATE'] = pd.to_datetime(merged['DEATH_DATE'])
merged['Date of Last Contact-Date'] = pd.to_datetime(merged['Date of Last Contact-Date']) merged['Date of Last Contact-Date'] = pd.to_datetime(merged['Date of Last Contact-Date'])
merged['Date of Diagnosis'] = pd.to_datetime(merged['Date of Diagnosis']) if 'Date of Diagnosis' in merged.columns:
merged['Date of Diagnosis'] = pd.to_datetime(merged['Date of Diagnosis'])
else:
merged['Date of Diagnosis'] = pd.NaT
# Calculate date shift relative to 1800/01/01 # Calculate date shift relative to 1800/01/01
......
...@@ -112,3 +112,11 @@ def test_check_convert_number_5(germline_5, significance_map): ...@@ -112,3 +112,11 @@ def test_check_convert_number_5(germline_5, significance_map):
converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map) converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
row_selection = converted_germline[converted_germline['Gene'] == 'SDHC'].index.item() row_selection = converted_germline[converted_germline['Gene'] == 'SDHC'].index.item()
assert converted_germline.loc[row_selection, 'Number'] == 'Pending' assert converted_germline.loc[row_selection, 'Number'] == 'Pending'
@pytest.mark.unit
def test_check_reformat_gene(germline_5):
transform_germline = transform_germline_mutations.reformat_record(germline_5)
converted_germline = transform_germline_mutations.reformat_gene(transform_germline)
row_selection = converted_germline[converted_germline['Gene'].str.match(r'SDHC')==True].index.item()
assert converted_germline.loc[row_selection, 'Gene'] == 'SDHC(Sequencing)'
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment