Skip to content
Snippets Groups Projects
Commit b1c3fb87 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add test for sample and experiment id conformance.

parent cec86acd
1 merge request!20Resolve "Use SampleIds/ Experiment Id as file names throughtout pipeline"
......@@ -5,6 +5,7 @@
import argparse
import logging
import pandas as pd
import re
EPILOG = '''
For more details:
......@@ -72,6 +73,46 @@ def check_design_headers(design, paired):
raise Exception("Missing column headers: %s" % list(missing_headers))
def check_samples(design):
'''Check if design file has the correct sample name mapping.'''
logger.info("Running sample check.")
samples = design.groupby('sample_id') \
.apply(list)
malformated_samples = []
chars = set('-.')
for sample in samples.index.values:
if ( any(char.isspace() for char in sample) | any((char in chars) for char in sample) ):
malformated_samples.append(sample)
if len(malformated_samples) > 0:
logger.error('Malformed samples from design file: %s', list(malformated_samples))
raise Exception("Malformed samples from design file: %s" %
list(malformated_samples))
def check_experiments(design):
'''Check if design file has the correct experiment name mapping.'''
logger.info("Running experiment check.")
experiments = design.groupby('experiment_id') \
.apply(list)
malformated_experiments = []
chars = set('-.')
for experiment in experiments.index.values:
if ( any(char.isspace() for char in experiment) | any((char in chars) for char in experiment) ):
malformated_experiments.append(experiment)
if len(malformated_experiments) > 0:
logger.error('Malformed experiment from design file: %s', list(malformated_experiments))
raise Exception("Malformed experiment from design file: %s" %
list(malformated_experiments))
def check_controls(design):
'''Check if design file has the correct control mapping.'''
......
......@@ -63,6 +63,24 @@ def design_4(design):
return design
@pytest.fixture
def design_5(design):
# Update sample_id to have -, spaces or periods
design.loc[design['sample_id'] == 'A_1', 'sample_id'] = 'A 1'
design.loc[design['sample_id'] == 'A_2', 'sample_id'] = 'A.2'
design.loc[design['sample_id'] == 'B_1', 'sample_id'] = 'B-1'
return design
@pytest.fixture
def design_6(design):
# Update experiment_id to have -, spaces or periods
design.loc[design['sample_id'] == 'A_1', 'experiment_id'] = 'A ChIP'
design.loc[design['sample_id'] == 'A_2', 'experiment_id'] = 'A.ChIP'
design.loc[design['sample_id'] == 'B_1', 'experiment_id'] = 'B-ChIP'
return design
@pytest.fixture
def fastq_files_1(fastq_files):
# Drop B_2.fastq.gz
......@@ -115,10 +133,25 @@ def test_check_files_output_pairedend(design_3, fastq_files):
assert new_design.loc[0, 'fastq_read2'] == "/path/to/file/A_2.fastq.gz"
@pytest.mark.unit
def test_check_replicates(design_4):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_replicates(design_4)
assert str(excinfo.value) == "Duplicate replicates in experiments: ['B']"
@pytest.mark.unit
def test_check_samples(design_5):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_samples(design_5)
assert str(excinfo.value) == "Malformed samples from design file: ['A 1', 'A.2', 'B-1']"
@pytest.mark.unit
def test_check_experiments(design_6):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_experiments(design_6)
assert str(excinfo.value) == "Malformed experiment from design file: ['A ChIP', 'A.ChIP', 'B-ChIP']"
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment