Skip to content
Snippets Groups Projects
Commit 78b410c7 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Reslove merge conflicts.

parents ac16a48e cbc05001
Branches
Tags
1 merge request!18Resolve "Add in current chip-analysis functionality."
Pipeline #2730 failed with stages
in 6 hours, 23 minutes, and 3 seconds
......@@ -14,18 +14,10 @@ params.genomeSize = params.genome ? params.genomes[ params.genome ].genomesize ?
params.chromSizes = params.genome ? params.genomes[ params.genome ].chromsizes ?: false : false
params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false
params.cutoffRatio = 1.2
params.outDir= "$baseDir/output"
params.extendReadsLen = 100
params.topPeakCount = 600
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
genome = params.genome
chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio
topPeakCount = params.topPeakCount
// Check inputs
if( params.bwaIndex ){
bwaIndex = Channel
......@@ -42,11 +34,21 @@ readsList = Channel
.map { file -> [ file.getFileName().toString(), file.toString() ].join("\t")}
.collectFile( name: 'fileList.tsv', newLine: true )
// Define regular variables
pairedEnd = params.pairedEnd
designFile = params.designFile
genomeSize = params.genomeSize
chromSizes = params.chromSizes
fasta = params.fasta
cutoffRatio = params.cutoffRatio
outDir = params.outDir
extendReadsLen = params.extendReadsLen
topPeakCount = params.topPeakCount
// Check design file for errors
process checkDesignFile {
publishDir "$baseDir/output/design", mode: 'copy'
publishDir "$outDir/design", mode: 'copy'
input:
......@@ -87,7 +89,7 @@ rawReads = designFilePaths
process trimReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -117,7 +119,7 @@ process trimReads {
process alignReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -148,7 +150,7 @@ process alignReads {
process filterReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -181,13 +183,13 @@ process filterReads {
dedupReads
.map{ sampleId, bam, bai, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$bam\t$bai\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
.collectFile(name:'design_dedup.tsv', seed:"sample_id\tbam_reads\tbam_index\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
.into { dedupDesign; preDiffDesign }
// Quality Metrics using deeptools
process experimentQC {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -200,7 +202,7 @@ process experimentQC {
script:
"""
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
"""
}
......@@ -209,7 +211,7 @@ process experimentQC {
process convertReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -238,7 +240,7 @@ process convertReads {
process crossReads {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......@@ -268,12 +270,12 @@ process crossReads {
xcorDesign = xcorReads
.map{ sampleId, seTagAlign, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$seTagAlign\t$tagAlign\t$xcor\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
.collectFile(name:'design_xcor.tsv', seed:"sample_id\tse_tag_align\ttag_align\txcor\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
// Make Experiment design files to be read in for downstream analysis
process defineExpDesignFiles {
publishDir "$baseDir/output/design", mode: 'copy'
publishDir "$outDir/design", mode: 'copy'
input:
......@@ -297,7 +299,7 @@ process poolAndPsuedoReads {
tag "${experimentObjs.baseName}"
publishDir "$baseDir/output/design", mode: 'copy'
publishDir "$outDir/design", mode: 'copy'
input:
......@@ -331,7 +333,7 @@ experimentRows = experimentPoolObjs
process callPeaksMACS {
tag "$sampleId-$replicate"
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
set sampleId, tagAlign, xcor, experimentId, biosample, factor, treatment, replicate, controlId, controlTagAlign from experimentRows
......@@ -359,12 +361,12 @@ process callPeaksMACS {
peaksDesign = experimentPeaks
.map{ sampleId, peak, fcSignal, pvalueSignal, experimentId, biosample, factor, treatment, replicate, controlId ->
"$sampleId\t$peak\t$fcSignal\t$pvalueSignal\t$experimentId\t$biosample\t$factor\t$treatment\t$replicate\t$controlId\n"}
.collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$baseDir/output/design")
.collectFile(name:'design_peak.tsv', seed:"sample_id\tpeaks\tfc_signal\tpvalue_signal\texperiment_id\tbiosample\tfactor\ttreatment\treplicate\tcontrol_id\n", storeDir:"$outDir/design")
// Calculate Consensus Peaks
process consensusPeaks {
publishDir "$baseDir/output/${task.process}", mode: 'copy'
publishDir "$outDir/${task.process}", mode: 'copy'
input:
......
......@@ -34,6 +34,11 @@ def get_args():
help="The design file to run QC (tsv format).",
required=True)
parser.add_argument('-e', '--extension',
help="Number of base pairs to extend the reads",
type=int,
required=True)
args = parser.parse_args()
return args
......@@ -51,7 +56,7 @@ def check_tools():
raise Exception('Missing deeptools')
def generate_read_summary(design):
def generate_read_summary(design, extension):
'''Generate summary of data based on read counts.'''
bam_files = ' '.join(design['bam_reads'])
......@@ -59,8 +64,8 @@ def generate_read_summary(design):
mbs_filename = 'sample_mbs.npz'
mbs_command = (
"multiBamSummary bins -p %d --bamfiles %s --labels %s -out %s"
% (cpu_count(), bam_files, labels, mbs_filename)
"multiBamSummary bins -p %d --bamfiles %s --extendReads %d --labels %s -out %s"
% (cpu_count(), bam_files, extension, labels, mbs_filename)
)
logger.info("Running deeptools with %s", mbs_command)
......@@ -90,7 +95,7 @@ def check_correlation(mbs):
out, err = spearman_correlation.communicate()
def check_coverage(design):
def check_coverage(design, extension):
'''Asses the sequencing depth of samples.'''
bam_files = ' '.join(design['bam_reads'])
......@@ -100,8 +105,8 @@ def check_coverage(design):
" --ignoreDuplicates --minMappingQuality 10"
coverage_command = (
"plotCoverage -b %s --labels %s %s --plotFile %s"
% (bam_files, labels, coverage_params, coverage_filename)
"plotCoverage -b %s --extendReads %d --labels %s %s --plotFile %s"
% (bam_files, extension, labels, coverage_params, coverage_filename)
)
logger.info("Running deeptools with %s", coverage_command)
......@@ -128,14 +133,14 @@ def update_controls(design):
return design
def check_enrichment(sample_id, control_id, sample_reads, control_reads):
def check_enrichment(sample_id, control_id, sample_reads, control_reads, extension):
'''Asses the enrichment per sample.'''
fingerprint_filename = sample_id + '_fingerprint.png'
fingerprint_command = (
"plotFingerprint -b %s %s --labels %s %s --plotFile %s"
% (sample_reads, control_reads, sample_id, control_id, fingerprint_filename)
"plotFingerprint -b %s %s --extendReads %d --labels %s %s --plotFile %s"
% (sample_reads, control_reads, extension, sample_id, control_id, fingerprint_filename)
)
logger.info("Running deeptools with %s", fingerprint_command)
......@@ -147,6 +152,7 @@ def check_enrichment(sample_id, control_id, sample_reads, control_reads):
def main():
args = get_args()
design = args.design
extension = args.extension
# Create a file handler
handler = logging.FileHandler('experiment_qc.log')
......@@ -159,11 +165,11 @@ def main():
design_df = pd.read_csv(design, sep='\t')
# Run correlation
mbs_filename = generate_read_summary(design_df)
mbs_filename = generate_read_summary(design_df, extension)
check_correlation(mbs_filename)
# Run coverage
check_coverage(design_df)
check_coverage(design_df, extension)
# Run enrichment
new_design_df = update_controls(design_df)
......@@ -172,7 +178,8 @@ def main():
row['sample_id'],
row['control_id'],
row['bam_reads'],
row['control_reads'])
row['control_reads'],
extension)
if __name__ == '__main__':
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment