Skip to content
Snippets Groups Projects
Commit 4c00f7ea authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Convert bam to tagAlign.

parent bdf276ec
Branches
Tags
No related merge requests found
...@@ -23,6 +23,10 @@ process { ...@@ -23,6 +23,10 @@ process {
module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1'] module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1']
executor = 'local' executor = 'local'
} }
$convertReads {
module = ['python/3.6.1-2-anaconda', 'samtools/1.4.1', 'bedtools/2.26.0']
cpus = 32
}
} }
params { params {
......
...@@ -144,6 +144,7 @@ process filterReads { ...@@ -144,6 +144,7 @@ process filterReads {
output: output:
set sampleId, file('*.bam'), file('*.bai'), biosample, factor, treatment, replicate, controlId into dedupReads set sampleId, file('*.bam'), file('*.bai'), biosample, factor, treatment, replicate, controlId into dedupReads
set sampleId, file('*.bam'), biosample, factor, treatment, replicate, controlId into convertReads
file '*flagstat.qc' into dedupReadsStats file '*flagstat.qc' into dedupReadsStats
file '*pbc.qc' into dedupReadsComplexity file '*pbc.qc' into dedupReadsComplexity
file '*dup.qc' into dupReads file '*dup.qc' into dupReads
...@@ -189,3 +190,32 @@ process experimentQC { ...@@ -189,3 +190,32 @@ process experimentQC {
""" """
} }
// Convert reads to bam
process convertReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, deduped, biosample, factor, treatment, replicate, controlId from convertReads
output:
set sampleId, file('*.tagAlign.gz'), file('*.bedpe.gz'), biosample, factor, treatment, replicate, controlId into dedupReads
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped -p
"""
}
else {
"""
python3 $baseDir/scripts/convert_reads.py -b $deduped
"""
}
}
#!/usr/bin/env python3
'''Convert tagAlign files for further processing.'''
import os
import argparse
import shutil
import shlex
import logging
from multiprocessing import cpu_count
import utils
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-b', '--bam',
help="The bam file to run filtering and qc on.",
required=True)
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=False,
action='store_true')
args = parser.parse_args()
return args
# Functions
def check_tools():
'''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system')
bedtools_path = shutil.which("bedtools")
if bedtools_path:
logger.info('Found bedtools: %s', bedtools_path)
else:
logger.error('Missing bedtools')
raise Exception('Missing bedtools')
samtools_path = shutil.which("samtools")
if samtools_path:
logger.info('Found samtools: %s', samtools_path)
else:
logger.error('Missing samtools')
raise Exception('Missing samtools')
def convert_mapped(bam, bam_basename):
'''Use bedtools to convert to tagAlign.'''
tag_filename = bam_basename + '.tagAlign.gz'
out, err = utils.run_pipe([
"bamToBed -i %s" % (bam),
r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""",
"gzip -nc"], outfile=tag_filename)
def convert_mapped_pe(bam, bam_basename):
'''Use bedtools to convert to tagAlign PE data.'''
bedpe_filename = bam_basename + ".bedpe.gz"
# Name sort bam to make BEDPE
nmsrt_bam_filename = bam_basename + ".nmsrt.bam"
samtools_sort_command = \
"samtools sort -n -@%d -o %s %s" \
% (cpu_count(), nmsrt_bam_filename, bam)
logger.info(samtools_sort_command)
utils.check_output(shlex.split(samtools_sort_command))
out, err = utils.run_pipe([
"bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename),
"gzip -nc"],
outfile=bedpe_filename)
def main():
args = get_args()
paired = args.paired
bam = args.bam
# Create a file handler
handler = logging.FileHandler('convert_reads.log')
logger.addHandler(handler)
# Check if tools are present
check_tools()
# Convert PE or SE to tagAlign
bam_basename = os.path.basename(
utils.strip_extensions(bam, ['.bam']))
convert_mapped(bam, bam_basename)
if paired: # paired-end data
convert_mapped_pe(bam, bam_basename)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import pytest
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/convertReads/'
def test_convert_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.tagAlign.gz'))
def test_map_qc_pairedend():
# Do the same thing for paired end data
# Also check that bedpe exists
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment