test_transform_germline.py 4.82 KB
Newer Older
1
2
3
4
5
#!/usr/bin/env python3

import pytest
import os
import pandas as pd
Venkat Malladi's avatar
Venkat Malladi committed
6
from io import StringIO
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import transform_germline_mutations


GERMLINE_STRING = """MRN,REGISTER_DATE,SERVICE_DATE,CLINIC,BRCA1,BRCA2,MULTISITE3,BART,MLH1,MSH2,MSH6,PMS2,MSI,IHCMLH1,IHCMSH2,IHCMSH6,IHCPMS2,BRAF,EPCAM,APC,MYH,MLH1METH,ALK,ATM,ATR,AXIN2,BAP1,BARD1,BMPR1A,BRIP1,CDH1,CDK4,CDKN2A,CHEK1,CHEK2,FAM175A,FLCN,FH,GALNT12,GEN1,GREM1,HOXB1B,MAX,MEN1,MET,MITF,MLH3,MRE11A,NBN,NF1,NF2,PHOX2B,PALB2,PTCH1,PTEN,P16,P53,PRSS1,RAD50,RAD51,RAD51C,RAD51D,RB,RET,SDHA,SDHAF2,SDHC,SDHD,SDHB,SMAD4,STK11,SUFU,TMEM127,TP53BP1,TSC1,TSC2,VHL,XRCC2,OTHER,RESEARCH
934,11/9/2016,11/13/2016,,2,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""

standards_path = os.path.dirname(os.path.abspath(__file__)) + \
                '/../standards/'


@pytest.fixture
def germline():
    germline_file = StringIO(GERMLINE_STRING)
    germline_df = pd.read_csv(germline_file)
    return germline_df


@pytest.fixture
def significance_map():
    significance_map = standards_path + 'germline_significance.csv'
    significance_map_df = pd.read_csv(significance_map)
    return significance_map_df


@pytest.fixture
def germline_1(germline):
    germline.loc[0, 'PMS2'] = 1
    return germline


@pytest.fixture
def germline_2(germline):
    germline.loc[0, 'IHCMLH1'] = 2
    return germline


@pytest.fixture
def germline_3(germline):
    germline.loc[0, 'ATR'] = 3
    return germline


@pytest.fixture
def germline_4(germline):
    germline.loc[0, 'BAP1'] = 4
    return germline


@pytest.fixture
def germline_5(germline):
    germline.loc[0, 'SDHC'] = 5
    return germline


@pytest.mark.unit
def test_check_conversion(germline_1):
    transform_germline = transform_germline_mutations.reformat_record(germline_1)
    assert transform_germline.shape[0] == 76
    assert transform_germline.shape[1] == 6


@pytest.mark.unit
def test_check_convert_number_0(germline_1, significance_map):
    transform_germline = transform_germline_mutations.reformat_record(germline_1)
    converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
    row_selection = converted_germline[converted_germline['Gene'] == 'MULTISITE3'].index.item()
    assert converted_germline.loc[row_selection, 'Number'] == 'Not Available'


@pytest.mark.unit
def test_check_convert_number_1(germline_1, significance_map):
    transform_germline = transform_germline_mutations.reformat_record(germline_1)
    converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
    row_selection = converted_germline[converted_germline['Gene'] == 'PMS2'].index.item()
    assert converted_germline.loc[row_selection, 'Number'] == 'Positive'


@pytest.mark.unit
def test_check_convert_number_2(germline_2, significance_map):
    transform_germline = transform_germline_mutations.reformat_record(germline_2)
    converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
    row_selection = converted_germline[converted_germline['Gene'] == 'IHCMLH1'].index.item()
    assert converted_germline.loc[row_selection, 'Number'] == 'Negative'


@pytest.mark.unit
def test_check_convert_number_3(germline_3, significance_map):
    transform_germline = transform_germline_mutations.reformat_record(germline_3)
    converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
    row_selection = converted_germline[converted_germline['Gene'] == 'ATR'].index.item()
    assert converted_germline.loc[row_selection, 'Number'] == 'Positive and Variant'


@pytest.mark.unit
def test_check_convert_number_4(germline_4, significance_map):
    transform_germline = transform_germline_mutations.reformat_record(germline_4)
    converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
    row_selection = converted_germline[converted_germline['Gene'] == 'BAP1'].index.item()
    assert converted_germline.loc[row_selection, 'Number'] == 'Variant'


@pytest.mark.unit
def test_check_convert_number_5(germline_5, significance_map):
    transform_germline = transform_germline_mutations.reformat_record(germline_5)
    converted_germline = transform_germline_mutations.convert_number(transform_germline, significance_map)
    row_selection = converted_germline[converted_germline['Gene'] == 'SDHC'].index.item()
    assert converted_germline.loc[row_selection, 'Number'] == 'Pending'
115
116
117
118
119
120
121
122


@pytest.mark.unit
def test_check_reformat_gene(germline_5):
    transform_germline = transform_germline_mutations.reformat_record(germline_5)
    converted_germline = transform_germline_mutations.reformat_gene(transform_germline)
    row_selection = converted_germline[converted_germline['Gene'].str.match(r'SDHC')==True].index.item()
    assert converted_germline.loc[row_selection, 'Gene'] == 'SDHC(Sequencing)'