Skip to content
Snippets Groups Projects
Commit 6931f8c7 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '10-grouping' into 'master'

Add in experiment_id and check to see if unique replicate ids are given.

Closes #10

See merge request !7
parents 674ba19c 2bb7f6ed
Branches
Tags
1 merge request!7Add in experiment_id and check to see if unique replicate ids are given.
Pipeline #1063 failed with stage
in 12 seconds
sample_id biosample factor treatment replicate control_id fastq_read1
ENCSR238SGC limb H3K4me1 None 1 ENCSR687ALB ENCFF833BLU.fastq.gz
ENCSR238SGC limb H3K4me1 None 2 ENCSR687ALB ENCFF646LXU.fastq.gz
ENCSR687ALB limb Control None 1 ENCSR687ALB ENCFF524CAC.fastq.gz
ENCSR687ALB limb Control None 2 ENCSR687ALB ENCFF163AJI.fastq.gz
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1
ENCBS844FSC ENCSR238SGC limb H3K4me1 None 1 ENCBS844FSC ENCFF833BLU.fastq.gz
ENCBS892NXC ENCSR238SGC limb H3K4me1 None 2 ENCBS892NXC ENCFF646LXU.fastq.gz
ENCBS844FSC ENCSR687ALB limb Control None 1 ENCBS844FSC ENCFF524CAC.fastq.gz
ENCBS892NXC ENCSR687ALB limb Control None 2 ENCBS892NXC ENCFF163AJI.fastq.gz
sample_id biosample factor treatment replicate control_id fastq_read1 fastq_read2
ENCSR729LGA MCF-7 SP1 None 1 ENCSR217LRF ENCFF957SQS.fastq.gz ENCFF582IOZ.fastq.gz
ENCSR729LGA MCF-7 SP1 None 2 ENCSR217LRF ENCFF330MCZ.fastq.gz ENCFF293YFE.fastq.gz
ENCSR217LRF MCF-7 Control None 1 ENCSR217LRF ENCFF002DTU.fastq.gz ENCFF002EFI.fastq.gz
ENCSR217LRF MCF-7 Control None 1 ENCSR217LRF ENCFF002EFG.fastq.gz ENCFF002DTS.fastq.gz
sample_id experiment_id biosample factor treatment replicate control_id fastq_read1 fastq_read2
ENCBS609QTY ENCSR729LGA MCF-7 SP1 None 1 ENCBS216AOQ ENCFF957SQS.fastq.gz ENCFF582IOZ.fastq.gz
ENCBS200IWR ENCSR729LGA MCF-7 SP1 None 2 ENCBS034XKZ ENCFF330MCZ.fastq.gz ENCFF293YFE.fastq.gz
ENCBS216AOQ ENCSR217LRF MCF-7 Control None 1 ENCBS216AOQ ENCFF002DTU.fastq.gz ENCFF002EFI.fastq.gz
ENCBS034XKZ ENCSR217LRF MCF-7 Control None 2 ENCBS034XKZ ENCFF002EFG.fastq.gz ENCFF002DTS.fastq.gz
......@@ -48,6 +48,7 @@ def check_design_headers(design, paired):
# Default headers
design_template = [
'sample_id',
'experiment_id',
'biosample',
'factor',
'treatment',
......@@ -83,6 +84,27 @@ def check_controls(design):
list(missing_controls))
def check_replicates(design):
'''Check if design file has unique replicate numbersfor an experiment.'''
logger.info("Running replicate check.")
experiment_replicates = design.groupby('experiment_id')['replicate'] \
.apply(list)
duplicated_replicates = []
for experiment in experiment_replicates.index.values:
replicates = experiment_replicates[experiment]
unique_replicates = set(replicates)
if len(replicates) != len(unique_replicates):
duplicated_replicates.append(experiment)
if len(duplicated_replicates) > 0:
logger.error('Duplicate replicates in experiments: %s', list(duplicated_replicates))
raise Exception("Duplicate replicates in experiments: %s" %
list(duplicated_replicates))
def check_files(design, fastq, paired):
'''Check if design file has the files found.'''
......@@ -126,6 +148,7 @@ def main():
# Check design file
check_design_headers(design_file, args.paired)
check_controls(design_file)
check_replicates(design_file)
new_design = check_files(design_file, fastq_file, args.paired)
# Write out new design file
......
......@@ -6,11 +6,11 @@ from io import StringIO
import check_design
DESIGN_STRING = """sample_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\tfastq_read1
A_1\tLiver\tH3K27ac\tNone\t1\tB_1\tA_1.fastq.gz
A_2\tLiver\tH3K27ac\tNone\t2\tB_2\tA_2.fastq.gz
B_1\tLiver\tInput\tNone\t1\tB_1\tB_1.fastq.gz
B_2\tLiver\tInput\tNone\t2\tB_2\tB_2.fastq.gz
DESIGN_STRING = """sample_id\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\tfastq_read1
A_1\tA\tLiver\tH3K27ac\tNone\t1\tB_1\tA_1.fastq.gz
A_2\tA\tLiver\tH3K27ac\tNone\t2\tB_2\tA_2.fastq.gz
B_1\tB\tLiver\tInput\tNone\t1\tB_1\tB_1.fastq.gz
B_2\tB\tLiver\tInput\tNone\t2\tB_2\tB_2.fastq.gz
"""
FASTQ_STRING = """
......@@ -56,6 +56,13 @@ def design_3(design):
return design_df
@pytest.fixture
def design_4(design):
# Update replicate 2 for experiment B to be 1
design.loc[design['sample_id'] == 'B_2', 'replicate'] = 1
return design
@pytest.fixture
def fastq_files_1(fastq_files):
# Drop B_2.fastq.gz
......@@ -100,3 +107,10 @@ def test_check_files_output_pairedend(design_3, fastq_files):
paired = True
new_design = check_design.check_files(design_3, fastq_files, paired)
assert new_design.loc[0, 'fastq_read2'] == "/path/to/file/A_2.fastq.gz"
def test_check_replicates(design_4):
paired = False
with pytest.raises(Exception) as excinfo:
new_design = check_design.check_replicates(design_4)
assert str(excinfo.value) == "Duplicate replicates in experiments: ['B']"
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment