From 45277ef8ea8cf3dfa10ed436f6414a4128c13b05 Mon Sep 17 00:00:00 2001 From: Venkat Malladi <venkat.malladi@utsouthwestern.edu> Date: Wed, 15 Nov 2017 12:34:50 -0600 Subject: [PATCH] Fix syntax. --- workflow/conf/biohpc.config | 8 ++++---- workflow/scripts/call_peaks_macs.py | 29 ++++++++++++++++++++++++----- workflow/scripts/utils.py | 13 +++++++------ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index d382fe8..74ee96d 100644 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -40,7 +40,7 @@ process { executor = 'local' } $callPeaksMACS { - module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2', 'macs/2.1.0-20151222', 'UCSC_userApps/v317'] + module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2', 'macs/2.1.0-20151222', 'UCSC_userApps/v317', 'bedtools/2.26.0'] cpus = 32 } } @@ -51,17 +51,17 @@ params { 'GRCh38' { bwa = '/project/shared/bicf_workflow_ref/GRCh38' genomesize = 'hs' - chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/chrom.sizes' + chromsizes = '/project/shared/bicf_workflow_ref/GRCh38/genomefile.txt' } 'GRCh37' { bwa = '/project/shared/bicf_workflow_ref/GRCh37' genomesize = 'hs' - chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/chrom.sizes' + chromsizes = '/project/shared/bicf_workflow_ref/GRCh37/genomefile.txt' } 'GRCm38' { bwa = '/project/shared/bicf_workflow_ref/GRCm38' genomesize = 'mm' - chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/chrom.sizes' + chromsizes = '/project/shared/bicf_workflow_ref/GRCm38/genomefile.txt' } } } diff --git a/workflow/scripts/call_peaks_macs.py b/workflow/scripts/call_peaks_macs.py index c89c12e..e670496 100644 --- a/workflow/scripts/call_peaks_macs.py +++ b/workflow/scripts/call_peaks_macs.py @@ -85,12 +85,19 @@ def check_tools(): raise Exception('Missing MACS2') bg_bw_path = shutil.which("bedGraphToBigWig") - if r_path: + if bg_bw_path: logger.info('Found bedGraphToBigWig: %s', bg_bw_path) else: logger.error('Missing bedGraphToBigWig') raise Exception('Missing bedGraphToBigWig') + bedtools_path = shutil.which("bedtools") + if bedtools_path: + logger.info('Found bedtools: %s', bedtools_path) + else: + logger.error('Missing bedtools') + raise Exception('Missing bedtools') + def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes): @@ -119,7 +126,7 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes) # Remove coordinates outside chromosome sizes narrowpeak_fn = '%s_peaks.narrowPeak' % (prefix) - clipped_narrowpeak_fn = '%s-clipped' % (filename) + clipped_narrowpeak_fn = 'clipped-%s' % (narrowpeak_fn) steps = ['slopBed -i %s -g %s -b 0' % (narrowpeak_fn, chrom_sizes), @@ -159,15 +166,21 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes) # Remove coordinates outside chromosome sizes (MACS2 bug) fc_bedgraph_fn = '%s.fc.signal.bedgraph' % (prefix) + fc_bedgraph_sorted_fn = 'sorted-%s' % (fc_bedgraph_fn) fc_signal_fn = "%s.fc_signal.bw" % (prefix) steps = ['slopBed -i %s_FE.bdg -g %s -b 0' % (prefix, chrom_sizes), 'bedClip stdin %s %s' % (chrom_sizes, fc_bedgraph_fn)] out, err = utils.run_pipe(steps) + # Sort file + out, err = utils.run_pipe([ + 'sort -k1,1 -k2,2n %s' % (fc_bedgraph_fn)], + fc_bedgraph_sorted_fn) + # Convert bedgraph to bigwig command = 'bedGraphToBigWig ' + \ - '%s ' % (fc_bedgraph_fn) + \ + '%s ' % (fc_bedgraph_sorted_fn) + \ '%s ' % (chrom_sizes) + \ '%s' % (fc_signal_fn) @@ -182,7 +195,7 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes) # min(no. of reads in ChIP, no. of reads in control) / 1,000,000 out, err = utils.run_pipe(['gzip -dc %s' % (experiment), 'wc -l']) chip_reads = out.strip() - out, err = common.run_pipe(['gzip -dc %s' % (control), 'wc -l']) + out, err = utils.run_pipe(['gzip -dc %s' % (control), 'wc -l']) control_reads = out.strip() sval = str(min(float(chip_reads), float(control_reads)) / 1000000) @@ -201,15 +214,21 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes) # Remove coordinates outside chromosome sizes (MACS2 bug) pvalue_bedgraph_fn = '%s.pval.signal.bedgraph' % (prefix) + pvalue_bedgraph_sorted_fn = 'sort-%s' % (pvalue_bedgraph_fn) pvalue_signal_fn = "%s.pvalue_signal.bw" % (prefix) steps = ['slopBed -i %s_ppois.bdg -g %s -b 0' % (prefix, chrom_sizes), 'bedClip stdin %s %s' % (chrom_sizes, pvalue_bedgraph_fn)] out, err = utils.run_pipe(steps) + # Sort file + out, err = utils.run_pipe([ + 'sort -k1,1 -k2,2n %s' % (fc_bedgraph_fn)], + pvalue_bedgraph_sorted_fn) + # Convert bedgraph to bigwig command = 'bedGraphToBigWig ' + \ - '%s ' % (pvalue_bedgraph_fn) + \ + '%s ' % (pvalue_bedgraph_sorted_fn) + \ '%s ' % (chrom_sizes) + \ '%s' % (fc_signal_fn) diff --git a/workflow/scripts/utils.py b/workflow/scripts/utils.py index ffedd7f..5fe8e4f 100644 --- a/workflow/scripts/utils.py +++ b/workflow/scripts/utils.py @@ -6,6 +6,7 @@ import shlex import logging import subprocess +import sys logger = logging.getLogger(__name__) @@ -48,9 +49,9 @@ def run_pipe(steps, outfile=None): def block_on(command): process = subprocess.Popen(shlex.split(command), stderr=subprocess.STDOUT, stdout=subprocess.PIPE) - for line in iter(process.stdout.readline, ''): - sys.stdout.buffer.write(line) - process.wait() + for line in iter(process.stdout.readline, b''): + sys.stdout.write(line.decode('utf-8')) + process.communicate() return process.returncode @@ -85,11 +86,11 @@ def count_lines(filename): def rescale_scores(filename, scores_col, new_min=10, new_max=1000): n_peaks = count_lines(filename) - sorted_fn = '%s-sorted' % (filename) - rescaled_fn = '%s-rescaled' % (filename) + sorted_fn = 'sorted-%s' % (filename) + rescaled_fn = 'rescaled-%s' % (filename) out, err = run_pipe([ - 'sort -k %dgr,%dgr %s' % (scores_col, scores_col, fn), + 'sort -k %dgr,%dgr %s' % (scores_col, scores_col, filename), r"""awk 'BEGIN{FS="\t";OFS="\t"}{if (NF != 0) print $0}'"""], sorted_fn) -- GitLab