diff --git a/cross_correlation/xcor.py b/cross_correlation/xcor.py deleted file mode 100644 index 672fdbb4f14bc8edc64c018e6991a8c7b0d47be2..0000000000000000000000000000000000000000 --- a/cross_correlation/xcor.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 - -'''Compute cross-correlation analysis.''' - -import os -import argparse -import shutil -import logging -from multiprocessing import cpu_count -import sys -sys.path.append(os.path.abspath('../python_utils')) -import utils - -EPILOG = ''' -For more details: - %(prog)s --help -''' - -# SETTINGS - -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) -logger.propagate = False -logger.setLevel(logging.INFO) - - -def get_args(): - '''Define arguments.''' - - parser = argparse.ArgumentParser( - description=__doc__, epilog=EPILOG, - formatter_class=argparse.RawDescriptionHelpFormatter) - - parser.add_argument('-t', '--tag', - help="The tagAlign file to qc on.", - required=True) - - parser.add_argument('-p', '--paired', - help="True/False if paired-end or single end.", - default=False, - action='store_true') - - args = parser.parse_args() - return args - -# Functions - - -def check_tools(): - '''Checks for required componenets on user system''' - - logger.info('Checking for required libraries and components on this system') - - r_path = shutil.which("R") - if r_path: - logger.info('Found R: %s', r_path) - else: - logger.error('Missing R') - raise Exception('Missing R') - - -def xcor(tag, paired): - '''Use spp to calculate cross-correlation stats.''' - - tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz'])) - - uncompressed_tag_filename = tag_basename - out, err = utils.run_pipe([ - 'gzip -dc %s > ' % (tag)], outfile=uncompressed_tag_filename) - - # Subsample tagAlign file - numReads = 15000000 - - subsampled_tag_filename = \ - tag_basename + ".%d.tagAlign.gz" % (numReads/1000000) - steps = [ - 'grep -v "chrM" %s' % (uncompressed_tag_filename), - 'shuf -n %d --random-source=%s' % (numReads, uncompressed_tag_filename) - ] - if paired: - steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""]) - - steps.extend(['gzip -nc']) - - out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename) - - # Calculate Cross-correlation QC scores - cc_scores_filename = subsampled_tag_filename + ".cc.qc" - cc_plot_filename = subsampled_tag_filename + ".cc.plot.pdf" - - # CC_SCORE FILE format - # Filename <tab> - # numReads <tab> - # estFragLen <tab> - # corr_estFragLen <tab> - # PhantomPeak <tab> - # corr_phantomPeak <tab> - # argmin_corr <tab> - # min_corr <tab> - # phantomPeakCoef <tab> - # relPhantomPeakCoef <tab> - # QualityTag - - run_spp_command = shutil.which("run_spp.R") - out, err = utils.run_pipe([ - "Rscript %s -c=%s -p=%d -filtchr=chrM -savp=%s -out=%s" % - (run_spp_command, subsampled_tag_filename, cpu_count(), - cc_plot_filename, cc_scores_filename) - ]) - - return cc_scores_filename - - -def main(): - args = get_args() - paired = args.paired - tag = args.tag - - # Create a file handler - handler = logging.FileHandler('xcor.log') - logger.addHandler(handler) - - # Check if tools are present - check_tools() - - # Calculate Cross-correlation - xcor_filename = xcor(tag, paired) - - -if __name__ == '__main__': - main()