Skip to content
Snippets Groups Projects
convert_reads.py 3.99 KiB
Newer Older
#!/usr/bin/env python3

Venkat Malladi's avatar
Venkat Malladi committed
#
# * --------------------------------------------------------------------------
# * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
# * --------------------------------------------------------------------------
#

'''Convert tagAlign files for further processing.'''

import os
import argparse
import shutil
Venkat Malladi's avatar
Venkat Malladi committed
import subprocess
import shlex
import logging
from multiprocessing import cpu_count
import utils

EPILOG = '''
For more details:
        %(prog)s --help
'''

# SETTINGS

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)


def get_args():
    '''Define arguments.'''
    parser = argparse.ArgumentParser(
        description=__doc__, epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('-b', '--bam',
Venkat Malladi's avatar
Venkat Malladi committed
                        help="The bam file to convert.",
                        required=True)

    parser.add_argument('-p', '--paired',
                        help="True/False if paired-end or single end.",
                        default=False,
                        action='store_true')

    args = parser.parse_args()
    return args

# Functions


def check_tools():
    '''Checks for required componenets on user system'''

    logger.info('Checking for required libraries and components on this system')

    bedtools_path = shutil.which("bedtools")
    if bedtools_path:
        logger.info('Found bedtools: %s', bedtools_path)

        # Get Version
        bedtools_version_command = "bedtools --version"
        bedtools_version = subprocess.check_output(bedtools_version_command, shell=True)

        # Write to file
Venkat Malladi's avatar
Venkat Malladi committed
        bedtools_file = open("version_bedtools.txt", "wb")
        bedtools_file.write(bedtools_version)
        bedtools_file.close()
    else:
        logger.error('Missing bedtools')
        raise Exception('Missing bedtools')

    samtools_path = shutil.which("samtools")
    if samtools_path:
        logger.info('Found samtools: %s', samtools_path)

        # Get Version
        samtools_version_command = "samtools --version"
        samtools_version = subprocess.check_output(samtools_version_command, shell=True)

        # Write to file
Venkat Malladi's avatar
Venkat Malladi committed
        samtools_file = open("version_samtools.txt", "wb")
        samtools_file.write(samtools_version)
        samtools_file.close()
    else:
        logger.error('Missing samtools')
        raise Exception('Missing samtools')


def convert_mapped(bam, tag_filename):
    '''Use bedtools to convert to tagAlign.'''

    out, err = utils.run_pipe([
        "bamToBed -i %s" % (bam),
        r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'""",
        "gzip -nc"], outfile=tag_filename)


def convert_mapped_pe(bam, bam_basename):
    '''Use bedtools to convert to tagAlign PE data.'''

    bedpe_filename = bam_basename + ".bedpe.gz"

    # Name sort bam to make BEDPE
    nmsrt_bam_filename = bam_basename + ".nmsrt.bam"
    samtools_sort_command = \
        "samtools sort -n -@%d -o %s %s" \
        % (cpu_count(), nmsrt_bam_filename, bam)

    logger.info(samtools_sort_command)
    subprocess.check_output(shlex.split(samtools_sort_command))

    out, err = utils.run_pipe([
        "bamToBed -bedpe -mate1 -i %s" % (nmsrt_bam_filename),
Venkat Malladi's avatar
Venkat Malladi committed
        "gzip -nc"], outfile=bedpe_filename)


def main():
    args = get_args()
    paired = args.paired
    bam = args.bam

    # Create a file handler
    handler = logging.FileHandler('convert_reads.log')
    logger.addHandler(handler)

    # Check if tools are present
    check_tools()

    # Convert PE or SE to tagAlign
    bam_basename = os.path.basename(
        utils.strip_extensions(bam, ['.bam', '.dedup']))
Venkat Malladi's avatar
Venkat Malladi committed
    tag_filename = bam_basename + '.tagAlign.gz'
    convert_mapped(bam, tag_filename)

    if paired:  # paired-end data
        convert_mapped_pe(bam, bam_basename)
Venkat Malladi's avatar
Venkat Malladi committed
        bedse_filename = bam_basename + ".bedse.gz"
Venkat Malladi's avatar
Venkat Malladi committed
        shutil.copy(tag_filename, bedse_filename)


if __name__ == '__main__':
    main()