Skip to content
Snippets Groups Projects
Commit 3d6faead authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in making experiment design files.

parent 17b7de97
Branches
Tags
1 merge request!12Resolve "Pool and Pseudoreplication"
......@@ -31,6 +31,10 @@ process {
module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2']
cpus = 32
}
$defineExpDesignFiles {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
}
params {
......
......@@ -164,7 +164,7 @@ process filterReads {
}
// Define channel collecting new design file
// Define channel collecting dedup reads intp new design file
dedupDesign = dedupReads
.map{ sampleId, bam, bai, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
......@@ -232,7 +232,7 @@ process crossReads {
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), biosample, factor, treatment, replicate, controlId into xcorReads
set sampleId, tagAlign, file('*.cc.qc'), biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
script:
......@@ -249,3 +249,30 @@ process crossReads {
}
}
// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
.map{ sampleId, tagAlign, xcor, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$tagAlign\t$xcor\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
publishDir "$baseDir/output/design", mode: 'copy'
input:
file xcorDesign
output:
file '*.tsv' into experimentObjs
script:
"""
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
}
#!/usr/bin/env python3
'''Generate experiment design files for downstream processing.'''
import argparse
import logging
import pandas as pd
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to make experiemnts (tsv format).",
required=True)
args = parser.parse_args()
return args
def update_controls(design):
'''Update design file to append controls list.'''
logger.info("Running control file update.")
file_dict = design[['sample_id', 'tag_align']] \
.set_index('sample_id').T.to_dict()
design['control_tag_align'] = design['control_id'] \
.apply(lambda x: file_dict[x]['tag_align'])
logger.info("Removing rows that are there own control.")
design = design[design['control_id'] != design['sample_id']]
return design
def make_experiment_design(design):
'''Make design file by grouping for each experiment'''
logger.info("Running experiment design generation.")
for experiment, df_experiment in design.groupby('experiment_id'):
experiment_file = experiment + '.tsv'
new_design.to_csv(experiment_file, header=True, sep='\t', index=False)
def main():
args = get_args()
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files
design_file = pd.read_csv(args.design, sep='\t')
# Update design file for check_controls
new_design = update_controls(design_file)
# write out experiment design files
make_experiment_design(new_design)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
import experiment_design
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/design/'
DESIGN_STRING = """sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id
A_1\tA_1.tagAlign.gz\tA\tLiver\tH3K27ac\tNone\t1\tB_1
A_2\tA_2.tagAlign.gz\tA\tLiver\tH3K27ac\tNone\t2\tB_2
B_1\tB_1.tagAlign.gz\tB\tLiver\tInput\tNone\t1\tB_1
B_2\tB_2.tagAlign.gz\tB\tLiver\tInput\tNone\t2\tB_2
"""
@pytest.fixture
def design_tag():
design_file = StringIO(DESIGN_STRING)
design_df = pd.read_csv(design_file, sep="\t")
return design_df
def test_check_update_controls_tag(design_tag):
new_design = experiment_design.update_controls(design_tag)
assert new_design.loc[0, 'control_tag_align'] == "B_1.tagAlign.gz"
def test_experiment_design_single_end():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.tsv'))
def test_experiment_design_paired_end():
# Do the same thing for paired end data
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment