Skip to content
Snippets Groups Projects
Commit b5d90522 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Add in xcor step and qc values.

parent 853c4d18
Branches
Tags
No related merge requests found
......@@ -27,6 +27,10 @@ process {
module = ['python/3.6.1-2-anaconda', 'samtools/1.4.1', 'bedtools/2.26.0']
cpus = 32
}
$crossReads {
module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2']
cpus = 32
}
}
params {
......
install.packages("snow", repos="http://cran.us.r-project.org")
install.packages("snowfall", repos="http://cran.us.r-project.org")
install.packages("bitops", repos="http://cran.us.r-project.org")
install.packages("caTools", repos="http://cran.us.r-project.org")
source("http://bioconductor.org/biocLite.R")
biocLite("Rsamtools",suppressUpdates=TRUE)
install.packages("../../phantompeakqualtools/spp_1.14.tar.gz")
......@@ -219,3 +219,33 @@ process convertReads {
}
}
// Calculate Cross-correlation using phantompeaktools
process crossReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
input:
set sampleId, seTagAlign, tagAlign, biosample, factor, treatment, replicate, controlId from tagReads
output:
set sampleId, seTagAlign, tagAlign, file('*.cc.qc') biosample, factor, treatment, replicate, controlId into xcorReads
set file('*.cc.qc'), file('*.cc.plot.pdf') into xcorReadsStats
script:
if (pairedEnd) {
"""
python3 $baseDir/scripts/xcor.py -b $seTagAlign -p
"""
}
else {
"""
python3 $baseDir/scripts/xcor.py.py -b $seTagAlign
"""
}
}
#!/usr/bin/env python3
'''Compute cross-correlation analysis.'''
import os
import argparse
import shutil
import shlex
import logging
from multiprocessing import cpu_count
import utils
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-t', '--tag',
help="The tagAlign file to qc on.",
required=True)
parser.add_argument('-p', '--paired',
help="True/False if paired-end or single end.",
default=False,
action='store_true')
args = parser.parse_args()
return args
# Functions
def check_tools():
'''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system')
r_path = shutil.which("R")
if r_path:
logger.info('Found R: %s', r_path)
else:
logger.error('Missing R')
raise Exception('Missing R')
def xcor(tag, paired):
'''Use spp to calculate cross-correlation stats.'''
tag_basename = tag.rstrip('.gz')
uncompressed_tag_filename = tag_basename
out, err = utils.run_pipe(['gzip -d %s' % (tag_basename)])
# Subsample tagAlign file
NREADS = 15000000
subsampled_tag_filename = \
tag_basename + \
".%d.tagAlign.gz" % (NREADS/1000000)
steps = [
'grep -v "chrM" %s' % (uncompressed_tag_filename),
'shuf -n %d --random-source=%s' % (NREADS, uncompressed_tag_filename)
]
if paired:
steps.extend([r"""awk 'BEGIN{OFS="\t"}{$4="N";$5="1000";print $0}'"""])
steps.extend(['gzip -nc'])
out, err = utils.run_pipe(steps, outfile=subsampled_tag_filename)
# Calculate Cross-correlation QC scores
cc_scores_filename = subsampled_tag_filename + ".cc.qc"
cc_plot_filename = subsampled_tag_filename + ".cc.plot.pdf"
# CC_SCORE FILE format
# Filename <tab>
# numReads <tab>
# estFragLen <tab>
# corr_estFragLen <tab>
# PhantomPeak <tab>
# corr_phantomPeak <tab>
# argmin_corr <tab>
# min_corr <tab>
# phantomPeakCoef <tab>
# relPhantomPeakCoef <tab>
# QualityTag
run_spp_command = shutil.which("run_spp.R")
out, err = utils.run_pipe([
"Rscript %s -c=%s -p=%d -filtchr=chrM -savp=%s -out=%s" %
(run_spp_command, subsampled_tag_filename, cpu_count(),
cc_plot_filename, cc_scores_filename)
])
def main():
args = get_args()
paired = args.paired
tag = args.tag
# Create a file handler
handler = logging.FileHandler('xcor.log')
logger.addHandler(handler)
# Check if tools are present
check_tools()
# Calculate Cross-correlation
xcor(tag, paired)
if __name__ == '__main__':
main()
#!/usr/bin/env python3
import pytest
import os
import pandas as pd
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/crossReads/'
def test_convert_reads_singleend():
assert os.path.exists(os.path.join(test_output_path, 'ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.plot.pdf'))
df_xcor = pd.read_csv("ENCFF833BLU.filt.nodup.tagAlign.15.tagAlign.gz.cc.qc", sep="\t", header=None)
assert df_xcor[2].iloc[0] = '195,215,230'
assert df_xcor[8].iloc[0] = 1.024836
assert df_xcor[9].iloc[0] = 1.266678
def test_map_qc_pairedend():
# Do the same thing for paired end data
pass
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment