Skip to content
Snippets Groups Projects
Commit 17b7de97 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '5-phantompeakcalls' into 'master'

Resolve "Include Phantompeakqualtools step"

Closes #5

See merge request !11
parents ea889a62 48fa7dbb
Branches
Tags
1 merge request!11Resolve "Include Phantompeakqualtools step"
Pipeline #1119 passed with stage
in 2 hours, 11 minutes, and 53 seconds
......@@ -27,6 +27,10 @@ process {
module = ['python/3.6.1-2-anaconda', 'samtools/1.4.1', 'bedtools/2.26.0']
cpus = 32
}
$crossReads {
module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2']
cpus = 32
}
}
params {
......
install.packages("snow", repos="http://cran.us.r-project.org")
install.packages("snowfall", repos="http://cran.us.r-project.org")
install.packages("bitops", repos="http://cran.us.r-project.org")
install.packages("caTools", repos="http://cran.us.r-project.org")
source("http://bioconductor.org/biocLite.R")
biocLite("Rsamtools",suppressUpdates=TRUE)
install.packages("../../phantompeakqualtools/spp_1.14.tar.gz")
......@@ -219,3 +219,33 @@ process convertReads {
}
}
// Calculate Cross-correlation using phantompeaktools
process crossReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, seTagAlign, tagAlign, biosample, factor, treatment, replicate, controlId from tagReads
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc'), biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
"""
}
else {
"""
python3 $baseDir/scripts/xcor.py -t $seTagAlign
"""
}
}
#!/usr/bin/env python3
'''Compute cross-correlation analysis.'''
import os
import argparse
import shutil
import logging
from multiprocessing import cpu_count
import utils
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-t', '--tag',
help="The tagAlign file to qc on.",
required=True)
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=False,
action='store_true')
args = parser.parse_args()
return args
# Functions
def check_tools():
'''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system')
r_path = shutil.which("R")
if r_path:
logger.info('Found R: %s', r_path)
else:
logger.error('Missing R')
raise Exception('Missing R')
def xcor(tag, paired):
'''Use spp to calculate cross-correlation stats.'''
tag_basename = os.path.basename(utils.strip_extensions(tag, ['.gz']))
uncompressed_tag_filename = tag_basename
out, err = utils.run_pipe([
'gzip -dc %s > ' % (tag)], outfile=uncompressed_tag_filename)
# Subsample tagAlign file
NREADS = 15000000
subsampled_tag_filename = \
tag_basename + ".%d.tagAlign.gz" % (NREADS/1000000)
steps = [
'grep -v "chrM" %s' % (uncompressed_tag_filename),
'shuf -n %d --random-source=%s' % (NREADS, uncompressed_tag_filename)
]
if paired:
steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
steps.extend(['gzip -nc'])
out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename)
# Calculate Cross-correlation QC scores
cc_scores_filename = subsampled_tag_filename + ".cc.qc"
cc_plot_filename = subsampled_tag_filename + ".cc.plot.pdf"
# CC_SCORE FILE format
# Filename <tab>
# numReads <tab>
# estFragLen <tab>
# corr_estFragLen <tab>
# PhantomPeak <tab>
# corr_phantomPeak <tab>
# argmin_corr <tab>
# min_corr <tab>
# phantomPeakCoef <tab>
# relPhantomPeakCoef <tab>
# QualityTag
run_spp_command = shutil.which("run_spp.R")
out, err = utils.run_pipe([
"Rscript %s -c=%s -p=%d -filtchr=chrM -savp=%s -out=%s" %
(run_spp_command, subsampled_tag_filename, cpu_count(),
cc_plot_filename, cc_scores_filename)
])
def main():
args = get_args()
paired = args.paired
tag = args.tag
# Create a file handler
handler = logging.FileHandler('xcor.log')
logger.addHandler(handler)
# Check if tools are present
check_tools()
# Calculate Cross-correlation
xcor(tag, paired)
if __name__ == '__main__':
main()
......@@ -8,8 +8,8 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
def test_convert_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.tagAlign.gz'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.bedse.gz'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.tagAlign.gz'))
assert os.path.exists(os.path.join(test_output_path, 'ENCFF646LXU.filt.nodup.bedse.gz'))
def test_map_qc_pairedend():
......
#!/usr/bin/env python3
import pytest
import os
import pandas as pd
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/crossReads/'
def test_convert_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf'))
qc_file = os.path.join(test_output_path,"ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc")
df_xcor = pd.read_csv(qc_file, sep="\t", header=None)
assert df_xcor[2].iloc[0] == '195,215,230'
assert df_xcor[8].iloc[0] == 1.024836
assert df_xcor[9].iloc[0] == 1.266678
def test_map_qc_pairedend():
# Do the same thing for paired end data
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment