Skip to content
Snippets Groups Projects
Commit 3d6faead authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in making experiment design files.

parent 17b7de97
Branches
Tags
No related merge requests found
......@@ -31,6 +31,10 @@ process {
module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2']
cpus = 32
}
$defineExpDesignFiles {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
}
params {
......
......@@ -164,7 +164,7 @@ process filterReads {
}
// Define channel collecting new design file
// Define channel collecting dedup reads intp new design file
dedupDesign = dedupReads
.map{ sampleId, bam, bai, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
......@@ -232,7 +232,7 @@ process crossReads {
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), biosample, factor, treatment, replicate, controlId into xcorReads
set sampleId, tagAlign, file('*.cc.qc'), biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
script:
......@@ -249,3 +249,30 @@ process crossReads {
}
}
// Define channel collecting tagAlign and xcor into design file
xcorDesign = xcorReads
.map{ sampleId, tagAlign, xcor, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$tagAlign\t$xcor\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
publishDir "$baseDir/output/design", mode: 'copy'
input:
file xcorDesign
output:
file '*.tsv' into experimentObjs
script:
"""
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
"""
}
#!/usr/bin/env python3
'''Generate experiment design files for downstream processing.'''
import argparse
import logging
import pandas as pd
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to make experiemnts (tsv format).",
required=True)
args = parser.parse_args()
return args
def update_controls(design):
'''Update design file to append controls list.'''
logger.info("Running control file update.")
file_dict = design[['sample_id', 'tag_align']] \
.set_index('sample_id').T.to_dict()
design['control_tag_align'] = design['control_id'] \
.apply(lambda x: file_dict[x]['tag_align'])
logger.info("Removing rows that are there own control.")
design = design[design['control_id'] != design['sample_id']]
return design
def make_experiment_design(design):
'''Make design file by grouping for each experiment'''
logger.info("Running experiment design generation.")
for experiment, df_experiment in design.groupby('experiment_id'):
experiment_file = experiment + '.tsv'
new_design.to_csv(experiment_file, header=True, sep='\t', index=False)
def main():
args = get_args()
# Create a file handler
handler = logging.FileHandler('experiment_generation.log')
logger.addHandler(handler)
# Read files
design_file = pd.read_csv(args.design, sep='\t')
# Update design file for check_controls
new_design = update_controls(design_file)
# write out experiment design files
make_experiment_design(new_design)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
import experiment_design
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/design/'
DESIGN_STRING = """sample_id\ttag_align\txcor\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id
A_1\tA_1.tagAlign.gz\tA\tLiver\tH3K27ac\tNone\t1\tB_1
A_2\tA_2.tagAlign.gz\tA\tLiver\tH3K27ac\tNone\t2\tB_2
B_1\tB_1.tagAlign.gz\tB\tLiver\tInput\tNone\t1\tB_1
B_2\tB_2.tagAlign.gz\tB\tLiver\tInput\tNone\t2\tB_2
"""
@pytest.fixture
def design_tag():
design_file = StringIO(DESIGN_STRING)
design_df = pd.read_csv(design_file, sep="\t")
return design_df
def test_check_update_controls_tag(design_tag):
new_design = experiment_design.update_controls(design_tag)
assert new_design.loc[0, 'control_tag_align'] == "B_1.tagAlign.gz"
def test_experiment_design_single_end():
assert os.path.exists(os.path.join(test_output_path, 'ENCSR238SGC.tsv'))
def test_experiment_design_paired_end():
# Do the same thing for paired end data
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment