Skip to content
Snippets Groups Projects

Resolve "fix consensus peaks"

Merged Holly Ruess requested to merge 24-fixConsensusPeaks into master
Compare and
6 files
+ 178
173
Preferences
File browser
Compare changes
@@ -5,8 +5,8 @@
import os
import argparse
import shutil
import subprocess
import logging
from multiprocessing import cpu_count
import utils
from xcor import xcor as calculate_xcor
@@ -38,10 +38,6 @@ def get_args():
help="The cross-correlation file (if already calculated).",
required=True)
parser.add_argument('-c', '--con',
help="The control tagAling file used for peak calling.",
default='none')
parser.add_argument('-s', '--sample',
help="The sample id to name the peak files.",
required=True)
@@ -59,11 +55,6 @@ def get_args():
default=False,
action='store_true')
parser.add_argument('-a', '--atac',
help="True/False if ATAC-seq or ChIP-seq.",
default=False,
action='store_true')
args = parser.parse_args()
return args
@@ -75,16 +66,19 @@ def check_tools():
logger.info('Checking for required libraries and components on this system')
r_path = shutil.which("R")
if r_path:
logger.info('Found R: %s', r_path)
else:
logger.error('Missing R')
raise Exception('Missing R')
macs_path = shutil.which("macs2")
if r_path:
if macs_path:
logger.info('Found MACS2: %s', macs_path)
# Get Version
macs_version_command = "macs2 --version"
macs_version = subprocess.check_output(macs_version_command, shell=True, stderr=subprocess.STDOUT)
# Write to file
macs_file = open("version_macs.txt", "wb")
macs_file.write(macs_version)
macs_file.close()
else:
logger.error('Missing MACS2')
raise Exception('Missing MACS2')
@@ -92,6 +86,18 @@ def check_tools():
bg_bw_path = shutil.which("bedGraphToBigWig")
if bg_bw_path:
logger.info('Found bedGraphToBigWig: %s', bg_bw_path)
# Get Version
bg_bw_version_command = "bedGraphToBigWig"
try:
subprocess.check_output(bg_bw_version_command, shell=True, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
bg_bw_version = e.output
# Write to file
bg_bw_file = open("version_bedGraphToBigWig.txt", "wb")
bg_bw_file.write(bg_bw_version)
bg_bw_file.close()
else:
logger.error('Missing bedGraphToBigWig')
raise Exception('Missing bedGraphToBigWig')
@@ -99,37 +105,30 @@ def check_tools():
bedtools_path = shutil.which("bedtools")
if bedtools_path:
logger.info('Found bedtools: %s', bedtools_path)
# Get Version
bedtools_version_command = "bedtools --version"
bedtools_version = subprocess.check_output(bedtools_version_command, shell=True)
# Write to file
bedtools_file = open("version_bedtools.txt", "wb")
bedtools_file.write(bedtools_version)
bedtools_file.close()
else:
logger.error('Missing bedtools')
raise Exception('Missing bedtools')
def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes, atac):
# Extract the fragment length estimate from column 3 of the
# cross-correlation scores file
if not atac:
with open(xcor, 'r') as xcor_fh:
firstline = xcor_fh.readline()
frag_lengths = firstline.split()[2] # third column
fragment_length = frag_lengths.split(',')[0] # grab first value
logger.info("Fraglen %s" % (fragment_length))
else:
fragment_length = int(round(float(73)/2.0))
logger.info("Fraglen %s" % (fragment_length))
def call_peaks_macs(experiment, xcor, prefix, genome_size, chrom_sizes):
'''Call peaks and signal tracks'''
# Generate narrow peaks and preliminary signal tracks
if atac:
command = 'macs2 callpeak ' + \
# For ATAC-seq use a shift of 37 and extsize of 73
command = 'macs2 callpeak ' + \
'-t %s ' % (experiment) + \
'-f BED -n %s ' % (prefix) + \
'-g %s -p 1e-2 --nomodel --shift 73 --extsize %s --keep-dup all -B --SPMR' % (genome_size, fragment_length)
else:
command = 'macs2 callpeak ' + \
'-t %s -c %s ' % (experiment, control) + \
'-f BED -n %s ' % (prefix) + \
'-g %s -p 1e-2 --nomodel --shift 0 --extsize %s --keep-dup all -B --SPMR' % (genome_size, fragment_length)
'-g %s -p 1e-2 --nomodel --shift 37 --extsize 73 --keep-dup all -B --SPMR' % (genome_size)
logger.info(command)
returncode = utils.block_on(command)
@@ -138,12 +137,11 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
# MACS2 sometimes calls features off the end of chromosomes.
# Remove coordinates outside chromosome sizes
narrowpeak_fn = '%s_peaks.narrowPeak' % (prefix)
int_narrowpeak_fn = '%s_peaks.narrowPeak' % (prefix)
narrowpeak_fn = '%s.narrowPeak' % (prefix)
clipped_narrowpeak_fn = 'clipped-%s' % (narrowpeak_fn)
steps = ['slopBed -i %s -g %s -b 0' % (narrowpeak_fn, chrom_sizes),
steps = ['slopBed -i %s -g %s -b 0' % (int_narrowpeak_fn, chrom_sizes),
'bedClip stdin %s %s' % (chrom_sizes, clipped_narrowpeak_fn)]
out, err = utils.run_pipe(steps)
@@ -155,10 +153,9 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
# Sort by Col8 in descending order and replace long peak names in Column 4
# with Peak_<peakRank>
steps = [
'sort -k 8gr,8gr %s' % (rescaled_narrowpeak_fn),
r"""awk 'BEGIN{OFS="\t"}{$4="Peak_"NR ; print $0}'"""
]
steps = ['sort -k 8gr,8gr %s' % (rescaled_narrowpeak_fn),
r"""awk 'BEGIN{OFS="\t"}{$4="Peak_"NR ; print $0}'""",
]
out, err = utils.run_pipe(steps, '%s' % (narrowpeak_fn))
@@ -168,10 +165,10 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
# Col2 (chromosome size in bp).
command = 'macs2 bdgcmp ' + \
'-t %s_treat_pileup.bdg ' % (prefix) + \
'-c %s_control_lambda.bdg ' % (prefix) + \
'-o %s_FE.bdg ' % (prefix) + \
'-m FE'
'-t %s_treat_pileup.bdg ' % (prefix) + \
'-c %s_control_lambda.bdg ' % (prefix) + \
'-o %s_FE.bdg ' % (prefix) + \
'-m FE'
logger.info(command)
returncode = utils.block_on(command)
@@ -193,9 +190,9 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
# Convert bedgraph to bigwig
command = 'bedGraphToBigWig ' + \
'%s ' % (fc_bedgraph_sorted_fn) + \
'%s ' % (chrom_sizes) + \
'%s' % (fc_signal_fn)
'%s ' % (fc_bedgraph_sorted_fn) + \
'%s ' % (chrom_sizes) + \
'%s' % (fc_signal_fn)
logger.info(command)
returncode = utils.block_on(command)
@@ -203,31 +200,11 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
assert returncode == 0, "bedGraphToBigWig non-zero return"
# For -log10(p-value) signal tracks
if atac:
command = 'macs2 bdgcmp ' + \
'-t %s_treat_pileup.bdg ' % (prefix) + \
'-c %s_control_lambda.bdg ' % (prefix) + \
'-o %s_ppois.bdg ' % (prefix) + \
'-m FE'
else:
# Compute sval =
# min(no. of reads in ChIP, no. of reads in control) / 1,000,000
out, err = utils.run_pipe(['gzip -dc %s' % (experiment), 'wc -l'])
chip_reads = out.strip()
out, err = utils.run_pipe(['gzip -dc %s' % (control), 'wc -l'])
control_reads = out.strip()
sval = str(min(float(chip_reads), float(control_reads)) / 1000000)
logger.info("chip_reads = %s, control_reads = %s, sval = %s" %
(chip_reads, control_reads, sval))
command = 'macs2 bdgcmp ' + \
command = 'macs2 bdgcmp ' + \
'-t %s_treat_pileup.bdg ' % (prefix) + \
'-c %s_control_lambda.bdg ' % (prefix) + \
'-o %s_ppois.bdg ' % (prefix) + \
'-m ppois -S %s' % (sval)
'-m FE'
logger.info(command)
returncode = utils.block_on(command)
@@ -248,9 +225,9 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
# Convert bedgraph to bigwig
command = 'bedGraphToBigWig ' + \
'%s ' % (pvalue_bedgraph_sorted_fn) + \
'%s ' % (chrom_sizes) + \
'%s' % (pvalue_signal_fn)
'%s ' % (pvalue_bedgraph_sorted_fn) + \
'%s ' % (chrom_sizes) + \
'%s' % (pvalue_signal_fn)
logger.info(command)
returncode = utils.block_on(command)
@@ -260,18 +237,16 @@ def call_peaks_macs(experiment, xcor, control, prefix, genome_size, chrom_sizes,
# Remove temporary files
os.remove(clipped_narrowpeak_fn)
os.remove(rescaled_narrowpeak_fn)
os.remove(int_narrowpeak_fn)
def main():
args = get_args()
tag = args.tag
xcor = args.xcor
con = args.con
sample = args.sample
genome_size = args.genome
chrom_size = args.size
paired = args.paired
atac = args.atac
# Create a file handler
handler = logging.FileHandler('call_peaks.log')
@@ -281,13 +256,14 @@ def main():
check_tools()
# Calculate Cross-correlation if not already calcualted
if xcor == 'Calculate' and not atac:
if xcor == 'Calculate':
xcor_file = calculate_xcor(tag, paired)
else:
xcor_file = xcor
# Call Peaks using MACS2
call_peaks_macs(tag, xcor_file, con, sample, genome_size, chrom_size, atac)
call_peaks_macs(tag, xcor_file, sample, genome_size, chrom_size)
if __name__ == '__main__':