Skip to content
Snippets Groups Projects
Commit ba3af111 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '12-tagAlign' into 'master'

Resolve "Make tagAlign files"

Closes #12

See merge request !10
parents bdf276ec b801983f
1 merge request!10Resolve "Make tagAlign files"
Pipeline #1102 failed with stage
in 1 hour, 20 minutes, and 51 seconds
......@@ -21,7 +21,11 @@ process {
}
$experimentQC {
module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1']
executor = 'local'
cpus = 32
}
$convertReads {
module = ['python/3.6.1-2-anaconda', 'samtools/1.4.1', 'bedtools/2.26.0']
cpus = 32
}
}
......
......@@ -144,6 +144,7 @@ process filterReads {
output:
set sampleId, file('*.bam'), file('*.bai'), biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), biosample, factor, treatment, replicate, controlId into convertReads
file '*flagstat.qc' into dedupReadsStats
file '*pbc.qc' into dedupReadsComplexity
file '*dup.qc' into dupReads
......@@ -189,3 +190,32 @@ process experimentQC {
"""
}
// Convert reads to bam
process convertReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, deduped, biosample, factor, treatment, replicate, controlId from convertReads
output:
set sampleId, file('*.tagAlign.gz'), file('*.bed{pe,se}.gz'), biosample, factor, treatment, replicate, controlId into tagReads
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
}
#!/usr/bin/env python3
'''Convert tagAlign files for further processing.'''
import os
import argparse
import shutil
import shlex
import logging
from multiprocessing import cpu_count
import utils
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-b', '--bam',
help="The bam file to run filtering and qc on.",
required=True)
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=False,
action='store_true')
args = parser.parse_args()
return args
# Functions
def check_tools():
'''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system')
bedtools_path = shutil.which("bedtools")
if bedtools_path:
logger.info('Found bedtools: %s', bedtools_path)
else:
logger.error('Missing bedtools')
raise Exception('Missing bedtools')
samtools_path = shutil.which("samtools")
if samtools_path:
logger.info('Found samtools: %s', samtools_path)
else:
logger.error('Missing samtools')
raise Exception('Missing samtools')
def convert_mapped(bam, tag_filename):
'''Use bedtools to convert to tagAlign.'''
out, err = utils.run_pipe([
"bamToBed -i %s" % (bam),
r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""",
"gzip -nc"], outfile=tag_filename)
def convert_mapped_pe(bam, bam_basename):
'''Use bedtools to convert to tagAlign PE data.'''
bedpe_filename = bam_basename + ".bedpe.gz"
# Name sort bam to make BEDPE
nmsrt_bam_filename = bam_basename + ".nmsrt.bam"
samtools_sort_command = \
"samtools sort -n -@%d -o %s %s" \
% (cpu_count(), nmsrt_bam_filename, bam)
logger.info(samtools_sort_command)
utils.check_output(shlex.split(samtools_sort_command))
out, err = utils.run_pipe([
"bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename),
"gzip -nc"],
outfile=bedpe_filename)
def main():
args = get_args()
paired = args.paired
bam = args.bam
# Create a file handler
handler = logging.FileHandler('convert_reads.log')
logger.addHandler(handler)
# Check if tools are present
check_tools()
# Convert PE or SE to tagAlign
bam_basename = os.path.basename(
utils.strip_extensions(bam, ['.bam']))
tag_filename = bam_basename + 'tagAlign.gz'
convert_mapped(bam, tag_filename)
if paired: # paired-end data
convert_mapped_pe(bam, bam_basename)
else:
bedse_filename = bam_basename + ".bedse.gz"
shutil(tag_filename, bedse_filename)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import pytest
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/convertReads/'
def test_convert_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.tagAlign.gz'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.bedse.gz'))
def test_map_qc_pairedend():
# Do the same thing for paired end data
# Also check that bedpe exists
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment