Skip to content
Snippets Groups Projects
Commit b1c3fb87 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add test for sample and experiment id conformance.

parent cec86acd
Branches
Tags
No related merge requests found
......@@ -5,6 +5,7 @@
import argparse
import logging
import pandas as pd
import re
EPILOG = '''
For more details:
......@@ -72,6 +73,46 @@ def check_design_headers(design, paired):
raise Exception("Missing column headers: %s" % list(missing_headers))
def check_samples(design):
'''Check if design file has the correct sample name mapping.'''
logger.info("Running sample check.")
samples = design.groupby('sample_id') \
.apply(list)
malformated_samples = []
chars = set('-.')
for sample in samples.index.values:
if ( any(char.isspace() for char in sample) | any((char in chars) for char in sample) ):
malformated_samples.append(sample)
if len(malformated_samples) > 0:
logger.error('Malformed samples from design file: %s', list(malformated_samples))
raise Exception("Malformed samples from design file: %s" %
list(malformated_samples))
def check_experiments(design):
'''Check if design file has the correct experiment name mapping.'''
logger.info("Running experiment check.")
experiments = design.groupby('experiment_id') \
.apply(list)
malformated_experiments = []
chars = set('-.')
for experiment in experiments.index.values:
if ( any(char.isspace() for char in experiment) | any((char in chars) for char in experiment) ):
malformated_experiments.append(experiment)
if len(malformated_experiments) > 0:
logger.error('Malformed experiment from design file: %s', list(malformated_experiments))
raise Exception("Malformed experiment from design file: %s" %
list(malformated_experiments))
def check_controls(design):
'''Check if design file has the correct control mapping.'''
......
......@@ -63,6 +63,24 @@ def design_4(design):
return design
@pytest.fixture
def design_5(design):
# Update sample_id to have -, spaces or periods
design.loc[design['sample_id'] == 'A_1', 'sample_id'] = 'A 1'
design.loc[design['sample_id'] == 'A_2', 'sample_id'] = 'A.2'
design.loc[design['sample_id'] == 'B_1', 'sample_id'] = 'B-1'
return design
@pytest.fixture
def design_6(design):
# Update experiment_id to have -, spaces or periods
design.loc[design['sample_id'] == 'A_1', 'experiment_id'] = 'A ChIP'
design.loc[design['sample_id'] == 'A_2', 'experiment_id'] = 'A.ChIP'
design.loc[design['sample_id'] == 'B_1', 'experiment_id'] = 'B-ChIP'
return design
@pytest.fixture
def fastq_files_1(fastq_files):
# Drop B_2.fastq.gz
......@@ -115,10 +133,25 @@ def test_check_files_output_pairedend(design_3, fastq_files):
assert new_design.loc[0, 'fastq_read2'] == "/path/to/file/A_2.fastq.gz"
@pytest.mark.unit
def test_check_replicates(design_4):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_replicates(design_4)
assert str(excinfo.value) == "Duplicate replicates in experiments: ['B']"
@pytest.mark.unit
def test_check_samples(design_5):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_samples(design_5)
assert str(excinfo.value) == "Malformed samples from design file: ['A 1', 'A.2', 'B-1']"
@pytest.mark.unit
def test_check_experiments(design_6):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_experiments(design_6)
assert str(excinfo.value) == "Malformed experiment from design file: ['A ChIP', 'A.ChIP', 'B-ChIP']"
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment