Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • BICF/Astrocyte/chipseq_analysis
  • s190984/chipseq_analysis
  • astrocyte/workflows/bicf/chipseq_analysis
  • s219741/chipseq-analysis-containerized
Show changes
singularity {
enabled = true
runOptions = '' // Please do NOT use "--disable-cache" in this runOptions.
// Starting from version 2.0.0, the astrocyte_cli will clean up the cache automatically.
// runOptions = '--bind /vagrant:/vagrant' // Use this one for vagrant development env only
cacheDir = "$baseDir/images/singularity" // Singularity images specified in `workflow_containers` of astrocyte_pkg.yml will be saved to
// this folder automatically, before running the workflow. The images will be renamed as
// "NAME-TAG.img", e.g. ubuntu-latest.img, centos-centos8.img, r-4.1.1.img, etc.
}
process { process {
executor = 'slurm' executor = 'slurm'
queue = 'super' queue = 'super'
...@@ -5,69 +15,73 @@ process { ...@@ -5,69 +15,73 @@ process {
beforeScript= 'ulimit -Ss unlimited' beforeScript= 'ulimit -Ss unlimited'
// Process specific configuration // Process specific configuration
withName: checkDesignFile { withName: trackStart {
module = ['python/3.6.1-2-anaconda']
executor = 'local' executor = 'local'
} }
withName: checkDesignFile {
container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/py36:1.0.0'
queue = 'super'
}
withName: trimReads { withName: trimReads {
module = ['python/3.6.1-2-anaconda', 'trimgalore/0.4.1'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
cpus = 32 queue = '256GB,256GBv1'
} }
withName: alignReads{ withName: alignReads{
module = ['python/3.6.1-2-anaconda', 'bwa/intel/0.7.12', 'samtools/1.6'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
queue = '128GB,256GB,256GBv1' queue = '256GB,256GBv1'
} }
withName: filterReads{ withName: filterReads{
module = ['python/3.6.1-2-anaconda', 'samtools/1.6', 'sambamba/0.6.6', 'bedtools/2.26.0'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
queue = '128GB,256GB,256GBv1' queue = '256GB,256GBv1'
} }
withName: experimentQC { withName: experimentQC {
module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
queue = '128GB,256GB,256GBv1' queue = '256GB,256GBv1'
} }
withName: convertReads { withName: convertReads {
module = ['python/3.6.1-2-anaconda', 'samtools/1.6', 'bedtools/2.26.0'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
queue = '128GB,256GB,256GBv1' queue = '256GB,256GBv1'
} }
withName: crossReads { withName: crossReads {
module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
cpus = 32 queue = 'super'
} }
withName: defineExpDesignFiles { withName: defineExpDesignFiles {
module = ['python/3.6.1-2-anaconda'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/py36:1.0.0'
executor = 'local' queue = 'super'
} }
withName: poolAndPsuedoReads { withName: poolAndPsuedoReads {
module = ['python/3.6.1-2-anaconda'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/py36:1.0.0'
executor = 'local' queue = 'super'
} }
withName: callPeaksMACS { withName: callPeaksMACS {
module = ['python/3.6.1-2-anaconda', 'macs/2.1.0-20151222', 'UCSC_userApps/v317', 'bedtools/2.26.0', 'phantompeakqualtools/1.2'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
queue = '128GB,256GB,256GBv1' queue = '128GB,256GB,256GBv1'
} }
withName: plotProfile { withName: plotProfile {
module = ['deeptools/2.5.0.1'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
cpus = 32 queue = 'super'
} }
withName: consensusPeaks { withName: consensusPeaks {
module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
executor = 'local' queue = 'super'
} }
withName: peakAnnotation { withName: peakAnnotation {
module = ['R/3.3.2-gccmkl'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/r:3.3.2'
executor = 'local' queue = 'super'
} }
withName: diffPeaks { withName: diffPeaks {
module = ['R/3.3.2-gccmkl'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/r:3.3.2'
cpus = 32 queue = '128GB,256GB,256GBv1'
} }
withName: motifSearch { withName: motifSearch {
module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/motif-search:meme-5.5.4'
cpus = 32 queue = 'super'
} }
withName: multiqcReport { withName: multiqcReport {
module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'singularity/3.0.2'] container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
executor = 'local' queue = 'super'
} }
} }
...@@ -116,3 +130,16 @@ report { ...@@ -116,3 +130,16 @@ report {
enabled = true enabled = true
file = 'report.html' file = 'report.html'
} }
env {
http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128'
all_proxy = 'http://proxy.swmed.edu:3128'
}
manifest {
mainScript = 'main.nf'
version = '2.0.1'
nextflowVersion = '>=19.07.0'
}
...@@ -27,7 +27,7 @@ params.skipDiff = false ...@@ -27,7 +27,7 @@ params.skipDiff = false
params.skipMotif = false params.skipMotif = false
params.skipPlotProfile = false params.skipPlotProfile = false
params.references = "$baseDir/../docs/references.md" params.references = "$baseDir/../docs/references.md"
params.multiqc = "$baseDir/conf/multiqc_config.yaml" params.multiqc = "$baseDir/configs/multiqc_config.yaml"
params.ci = false params.ci = false
params.dev = false params.dev = false
...@@ -147,14 +147,14 @@ process checkDesignFile { ...@@ -147,14 +147,14 @@ process checkDesignFile {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /python361/entrypoint.sh
python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /python361/entrypoint.sh
python $baseDir/scripts/check_design.py -d $designFile -f $readsList python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList
""" """
} }
...@@ -176,7 +176,7 @@ process trimReads { ...@@ -176,7 +176,7 @@ process trimReads {
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
maxForks 4
input: input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
...@@ -191,15 +191,13 @@ process trimReads { ...@@ -191,15 +191,13 @@ process trimReads {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load trimgalore/0.4.1
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load trimgalore/0.4.1
python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
""" """
} }
...@@ -209,10 +207,10 @@ process trimReads { ...@@ -209,10 +207,10 @@ process trimReads {
// Align trimmed reads using bwa // Align trimmed reads using bwa
process alignReads { process alignReads {
queue '128GB,256GB,256GBv1' queue '256GB,256GBv1'
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
maxForks 4
input: input:
set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
...@@ -228,17 +226,15 @@ process alignReads { ...@@ -228,17 +226,15 @@ process alignReads {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load bwa/intel/0.7.12 free -hm
module load samtools/1.6
python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load bwa/intel/0.7.12 free -hm
module load samtools/1.6
python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId
""" """
} }
...@@ -251,7 +247,7 @@ process filterReads { ...@@ -251,7 +247,7 @@ process filterReads {
queue '128GB,256GB,256GBv1' queue '128GB,256GB,256GBv1'
tag "$sampleId-$replicate" tag "$sampleId-$replicate"
publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy' publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
maxForks 4
input: input:
set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
...@@ -269,19 +265,13 @@ process filterReads { ...@@ -269,19 +265,13 @@ process filterReads {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load samtools/1.6
module load sambamba/0.6.6
module load bedtools/2.26.0
python3 $baseDir/scripts/map_qc.py -b $mapped -p python3 $baseDir/scripts/map_qc.py -b $mapped -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load samtools/1.6
module load sambamba/0.6.6
module load bedtools/2.26.0
python3 $baseDir/scripts/map_qc.py -b $mapped python3 $baseDir/scripts/map_qc.py -b $mapped
""" """
} }
...@@ -313,8 +303,7 @@ process experimentQC { ...@@ -313,8 +303,7 @@ process experimentQC {
script: script:
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load deeptools/2.5.0.1
python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
""" """
...@@ -340,17 +329,13 @@ process convertReads { ...@@ -340,17 +329,13 @@ process convertReads {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load samtools/1.6
module load bedtools/2.26.0
python3 $baseDir/scripts/convert_reads.py -b $deduped -p python3 $baseDir/scripts/convert_reads.py -b $deduped -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load samtools/1.6
module load bedtools/2.26.0
python3 $baseDir/scripts/convert_reads.py -b $deduped python3 $baseDir/scripts/convert_reads.py -b $deduped
""" """
} }
...@@ -377,15 +362,13 @@ process crossReads { ...@@ -377,15 +362,13 @@ process crossReads {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/xcor.py -t $seTagAlign -p python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/xcor.py -t $seTagAlign python3 $baseDir/scripts/xcor.py -t $seTagAlign
""" """
} }
...@@ -414,7 +397,7 @@ process defineExpDesignFiles { ...@@ -414,7 +397,7 @@ process defineExpDesignFiles {
script: script:
""" """
module load python/3.6.1-2-anaconda source /python361/entrypoint.sh
python3 $baseDir/scripts/experiment_design.py -d $xcorDesign python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
""" """
...@@ -440,13 +423,13 @@ process poolAndPsuedoReads { ...@@ -440,13 +423,13 @@ process poolAndPsuedoReads {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /python361/entrypoint.sh
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /python361/entrypoint.sh
python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
""" """
} }
...@@ -478,21 +461,13 @@ process callPeaksMACS { ...@@ -478,21 +461,13 @@ process callPeaksMACS {
if (pairedEnd) { if (pairedEnd) {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load macs/2.1.0-20151222
module load UCSC_userApps/v317
module load bedtools/2.26.0
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
""" """
} }
else { else {
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load macs/2.1.0-20151222
module load UCSC_userApps/v317
module load bedtools/2.26.0
module load phantompeakqualtools/1.2
python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
""" """
} }
...@@ -523,8 +498,8 @@ process plotProfile { ...@@ -523,8 +498,8 @@ process plotProfile {
script: script:
""" """
module load deeptools/2.5.0.1 source /chipseq/entrypoint.sh
bash $baseDir/scripts/plot_profile.sh -g $gtfFile /bin/bash $baseDir/scripts/plot_profile.sh -g $gtfFile
""" """
} }
...@@ -551,8 +526,7 @@ process consensusPeaks { ...@@ -551,8 +526,7 @@ process consensusPeaks {
script: script:
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load bedtools/2.26.0
python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
""" """
...@@ -575,7 +549,7 @@ process peakAnnotation { ...@@ -575,7 +549,7 @@ process peakAnnotation {
script: script:
""" """
module load R/3.3.2-gccmkl source /r-332/entrypoint.sh
Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $gtfFile $geneNames Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $gtfFile $geneNames
""" """
...@@ -603,10 +577,8 @@ process motifSearch { ...@@ -603,10 +577,8 @@ process motifSearch {
script: script:
""" """
module load python/3.6.1-2-anaconda source /motif-search/entrypoint.sh
module load meme/4.11.1-gcc-openmpi python $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
module load bedtools/2.26.0
python3 $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
""" """
} }
...@@ -639,13 +611,12 @@ process diffPeaks { ...@@ -639,13 +611,12 @@ process diffPeaks {
script: script:
""" """
module load python/3.6.1-2-anaconda source /r-332/entrypoint.sh
module load R/3.3.2-gccmkl
Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks
""" """
} }
// Generate Multiqc Report, gerernate Software Versions and references // Generate Multiqc Report, generate software versions and references
process multiqcReport { process multiqcReport {
publishDir "$outDir/${task.process}", mode: 'copy' publishDir "$outDir/${task.process}", mode: 'copy'
...@@ -659,8 +630,8 @@ process multiqcReport { ...@@ -659,8 +630,8 @@ process multiqcReport {
file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first() file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
file ('consensusPeaks_vf/*') from consensusPeaksVersions.first() file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
file ('peakAnnotation_vf/*') from peakAnnotationVersions.first() file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty() file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty("No Meme-chip is found")
file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty() file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty("No Rstudio is found")
file ('experimentQC_vf/*') from experimentQCVersions.first() file ('experimentQC_vf/*') from experimentQCVersions.first()
file ('trimReads/*') from trimgaloreResults.collect() file ('trimReads/*') from trimgaloreResults.collect()
file ('alignReads/*') from mappedReadsStats.collect() file ('alignReads/*') from mappedReadsStats.collect()
...@@ -677,14 +648,12 @@ process multiqcReport { ...@@ -677,14 +648,12 @@ process multiqcReport {
script: script:
""" """
module load python/3.6.1-2-anaconda source /chipseq/entrypoint.sh
module load pandoc/2.7
module load singularity/3.0.2
echo $workflow.nextflow.version > version_nextflow.txt echo $workflow.nextflow.version > version_nextflow.txt
singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc --version > version_multiqc.txt multiqc --version > version_multiqc.txt
python --version &> version_python.txt python --version &> version_python.txt
python3 $baseDir/scripts/generate_references.py -r $references -o software_references python3 $baseDir/scripts/generate_references.py -r $references -o software_references
python3 $baseDir/scripts/generate_versions.py -o software_versions python3 $baseDir/scripts/generate_versions.py -o software_versions
singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc -c $multiqc . multiqc -c $multiqc .
""" """
} }
\ No newline at end of file
profiles {
standard {
includeConfig 'conf/biohpc.config'
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
manifest {
name = 'chipseq_analysis'
description = 'BICF ChIP-seq Analysis Workflow.'
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
version = '1.1.2'
mainScript = 'main.nf'
nextflowVersion = '>=0.31.0'
}
...@@ -106,9 +106,16 @@ def convert_mapped_pe(bam, bam_basename): ...@@ -106,9 +106,16 @@ def convert_mapped_pe(bam, bam_basename):
# Name sort bam to make BEDPE # Name sort bam to make BEDPE
nmsrt_bam_filename = bam_basename + ".nmsrt.bam" nmsrt_bam_filename = bam_basename + ".nmsrt.bam"
# Allocate memory for each thread used during samtools sort.
# Determine the available memory on the host system (kB).
avail_sys_mem = int(subprocess.check_output("grep MemTotal /proc/meminfo | grep -o '[0-9]*'", shell=True).decode("utf-8").strip())
# Divide the total by the number of processors and apply a scaling factor (0.85) to not use all memory.
mem_per_thread = math.floor((avail_sys_mem ) * 0.7)
# DEBUGGING
mem_per_thread = mem_per_thread * 4
samtools_sort_command = \ samtools_sort_command = \
"samtools sort -n -@%d -o %s %s" \ "samtools sort -m %d -n -@%d -o %s %s" \
% (cpu_count(), nmsrt_bam_filename, bam) % (mem_per_thread, cpu_count(), nmsrt_bam_filename, bam)
logger.info(samtools_sort_command) logger.info(samtools_sort_command)
subprocess.check_output(shlex.split(samtools_sort_command)) subprocess.check_output(shlex.split(samtools_sort_command))
......
...@@ -14,6 +14,7 @@ import argparse ...@@ -14,6 +14,7 @@ import argparse
import shutil import shutil
import shlex import shlex
import logging import logging
import math
from multiprocessing import cpu_count from multiprocessing import cpu_count
import utils import utils
...@@ -133,12 +134,18 @@ def align_se(fastq, sai, reference, fastq_basename): ...@@ -133,12 +134,18 @@ def align_se(fastq, sai, reference, fastq_basename):
bam_filename = '%s.bam' % (fastq_basename) bam_filename = '%s.bam' % (fastq_basename)
# Allocate memory for each thread used during samtools sort.
# Determine the available memory on the host system (kB).
avail_sys_mem = int(subprocess.check_output("grep MemTotal /proc/meminfo | grep -o '[0-9]*'", shell=True).decode("utf-8").strip())
# Divide the total by the number of processors and apply a scaling factor (0.85) to not use all memory.
mem_per_thread = math.floor((avail_sys_mem) * 0.7)
steps = [ steps = [
"bwa samse %s %s %s" "bwa samse %s %s %s"
% (reference, sai[0], fastq[0]), % (reference, sai[0], fastq[0]),
"samtools view -@%d -Su -" % (cpu_count()), "samtools view -@%d -Su -" % (cpu_count()),
"samtools sort -@%d -o %s" "samtools sort -m %d -@%d -o %s"
% (cpu_count(), bam_filename)] % (mem_per_thread, cpu_count(), bam_filename)]
out, err = utils.run_pipe(steps) out, err = utils.run_pipe(steps)
if err: if err:
...@@ -168,12 +175,19 @@ def align_pe(fastq, sai, reference, fastq_basename): ...@@ -168,12 +175,19 @@ def align_pe(fastq, sai, reference, fastq_basename):
if err: if err:
logger.error("sampe error: %s", err) logger.error("sampe error: %s", err)
# Allocate memory for each thread used during samtools sort.
# Determine the available memory on the host system (kB).
avail_sys_mem = int(subprocess.check_output("grep MemTotal /proc/meminfo | grep -o '[0-9]*'", shell=True).decode("utf-8").strip())
# Divide the total by the number of processors and apply a scaling factor (0.85) to not use all memory.
mem_per_thread = math.floor((avail_sys_mem // cpu_count()) * 0.85)
# DEBUGGING
mem_per_thread = mem_per_thread * 4
steps = [ steps = [
"cat %s" % (sam_filename), "cat %s" % (sam_filename),
"grep -v -F -f %s" % (badcigar_filename), "grep -v -F -f %s" % (badcigar_filename),
"samtools view -@%d -Su -" % (cpu_count()), "samtools view -@%d -Su -" % (cpu_count()),
"samtools sort -@%d -o %s" "samtools sort -m %d -@%d -o %s"
% (cpu_count(), bam_filename)] % (mem_per_thread, cpu_count(), bam_filename)]
out, err = utils.run_pipe(steps) out, err = utils.run_pipe(steps)
if err: if err:
......
#!/usr/bin/env python3 #!/usr/bin/env python2
# #
# * -------------------------------------------------------------------------- # * --------------------------------------------------------------------------
...@@ -15,7 +15,7 @@ import shutil ...@@ -15,7 +15,7 @@ import shutil
import subprocess import subprocess
from multiprocessing import Pool from multiprocessing import Pool
import pandas as pd import pandas as pd
import utils import utils2 as utils
EPILOG = ''' EPILOG = '''
...@@ -67,8 +67,10 @@ def check_tools(): ...@@ -67,8 +67,10 @@ def check_tools():
'''Checks for required componenets on user system''' '''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system') logger.info('Checking for required libraries and components on this system')
stream = os.popen('which meme')
meme_path = shutil.which("meme") output = stream.read()
meme_path = output.strip()
#meme_path = shutil.which("meme")
if meme_path: if meme_path:
logger.info('Found meme: %s', meme_path) logger.info('Found meme: %s', meme_path)
...@@ -83,8 +85,10 @@ def check_tools(): ...@@ -83,8 +85,10 @@ def check_tools():
else: else:
logger.error('Missing meme') logger.error('Missing meme')
raise Exception('Missing meme') raise Exception('Missing meme')
stream = os.popen('which bedtools')
bedtools_path = shutil.which("bedtools") output = stream.read()
bedtools_path = output.strip()
if bedtools_path: if bedtools_path:
logger.info('Found bedtools: %s', bedtools_path) logger.info('Found bedtools: %s', bedtools_path)
...@@ -135,8 +139,10 @@ def motif_search(filename, genome, experiment, peak): ...@@ -135,8 +139,10 @@ def motif_search(filename, genome, experiment, peak):
logger.error("bedtools error: %s", err) logger.error("bedtools error: %s", err)
# Call memechip # Call memechip
#out, err = utils.run_pipe([
# 'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)])
out, err = utils.run_pipe([ out, err = utils.run_pipe([
'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)]) 'meme-chip -oc %s -minw 5 -maxw 15 -meme-nmotifs 10 %s -meme-norand ' % (out_motif, out_fa)])
if err: if err:
logger.error("meme-chip error: %s", err) logger.error("meme-chip error: %s", err)
......
#!/usr/bin/env python3
#
# * --------------------------------------------------------------------------
# * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
# * --------------------------------------------------------------------------
#
'''Call Motifs on called peaks.'''
import os
import argparse
import logging
import shutil
import subprocess
from multiprocessing import Pool
import pandas as pd
import utils
EPILOG = '''
For more details:
%(prog)s --help
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
logger.setLevel(logging.INFO)
# the order of this list is important.
# strip_extensions strips from the right inward, so
# the expected right-most extensions should appear first (like .gz)
# Modified from J. Seth Strattan
STRIP_EXTENSIONS = ['.narrowPeak', '.replicated']
def get_args():
'''Define arguments.'''
parser = argparse.ArgumentParser(
description=__doc__, epilog=EPILOG,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-d', '--design',
help="The design file to run motif search.",
required=True)
parser.add_argument('-g', '--genome',
help="The genome FASTA file.",
required=True)
parser.add_argument('-p', '--peak',
help="The number of peaks to use.",
required=True)
args = parser.parse_args()
return args
# Functions
def check_tools():
'''Checks for required componenets on user system'''
logger.info('Checking for required libraries and components on this system')
meme_path = shutil.which("meme")
if meme_path:
logger.info('Found meme: %s', meme_path)
# Get Version
memechip_version_command = "meme-chip --version"
memechip_version = subprocess.check_output(memechip_version_command, shell=True)
# Write to file
meme_file = open("version_memechip.txt", "wb")
meme_file.write(b"Version %s" % (memechip_version))
meme_file.close()
else:
logger.error('Missing meme')
raise Exception('Missing meme')
bedtools_path = shutil.which("bedtools")
if bedtools_path:
logger.info('Found bedtools: %s', bedtools_path)
# Get Version
bedtools_version_command = "bedtools --version"
bedtools_version = subprocess.check_output(bedtools_version_command, shell=True)
# Write to file
bedtools_file = open("version_bedtools.txt", "wb")
bedtools_file.write(bedtools_version)
bedtools_file.close()
else:
logger.error('Missing bedtools')
raise Exception('Missing bedtools')
def run_wrapper(args):
motif_search(*args)
def motif_search(filename, genome, experiment, peak):
'''Run motif serach on peaks.'''
file_basename = os.path.basename(
utils.strip_extensions(filename, STRIP_EXTENSIONS))
out_fa = '%s.fa' % (experiment)
out_motif = '%s_memechip' % (experiment)
# Sort Bed file and limit number of peaks
if peak == -1:
peak = utils.count_lines(filename)
peak_no = 'all'
else:
peak_no = peak
sorted_fn = '%s.%s.narrowPeak' % (file_basename, peak_no)
out, err = utils.run_pipe([
'sort -k %dgr,%dgr %s' % (5, 5, filename),
'head -n %s' % (peak)], outfile=sorted_fn)
# Get fasta file
out, err = utils.run_pipe([
'bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)])
if err:
logger.error("bedtools error: %s", err)
# Call memechip
out, err = utils.run_pipe([
'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)])
if err:
logger.error("meme-chip error: %s", err)
def main():
args = get_args()
design = args.design
genome = args.genome
peak = args.peak
# Create a file handler
handler = logging.FileHandler('motif.log')
logger.addHandler(handler)
# Check if tools are present
check_tools()
# Read files
design_df = pd.read_csv(design, sep='\t')
meme_arglist = zip(design_df['Peaks'].tolist(), [genome]*design_df.shape[0], design_df['Condition'].tolist(), [peak]*design_df.shape[0])
work_pool = Pool(min(12, design_df.shape[0]))
return_list = work_pool.map(run_wrapper, meme_arglist)
work_pool.close()
work_pool.join()
if __name__ == '__main__':
main()
#!/usr/bin/env python2
#
# * --------------------------------------------------------------------------
# * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
# * --------------------------------------------------------------------------
#
'''General utilities.'''
import shlex
import logging
import subprocess
import sys
import os
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = True
def run_pipe(steps, outfile=None):
from subprocess import Popen, PIPE
p = None
p_next = None
first_step_n = 1
last_step_n = len(steps)
for n, step in enumerate(steps, start=first_step_n):
logger.debug("step %d: %s" % (n, step))
if n == first_step_n:
if n == last_step_n and outfile: # one-step pipeline with outfile
with open(outfile, 'w') as fh:
print("one step shlex: %s to file: %s" % (shlex.split(step), outfile))
p = Popen(shlex.split(step), stdout=fh)
break
print("first step shlex to stdout: %s" % (shlex.split(step)))
p = Popen(shlex.split(step), stdout=PIPE)
elif n == last_step_n and outfile: # only treat the last step specially if you're sending stdout to a file
with open(outfile, 'w') as fh:
print("last step shlex: %s to file: %s" % (shlex.split(step), outfile))
p_last = Popen(shlex.split(step), stdin=p.stdout, stdout=fh)
p.stdout.close()
p = p_last
else: # handles intermediate steps and, in the case of a pipe to stdout, the last step
print("intermediate step %d shlex to stdout: %s" % (n, shlex.split(step)))
p_next = Popen(shlex.split(step), stdin=p.stdout, stdout=PIPE)
p.stdout.close()
p = p_next
out, err = p.communicate()
return out, err
def block_on(command):
process = subprocess.Popen(shlex.split(command), stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
for line in iter(process.stdout.readline, b''):
sys.stdout.write(line.decode('utf-8'))
process.communicate()
return process.returncode
def strip_extensions(filename, extensions):
'''Strips extensions to get basename of file.'''
basename = filename
for extension in extensions:
basename = basename.rpartition(extension)[0] or basename
return basename
def count_lines(filename):
import mimetypes
compressed_mimetypes = [
"compress",
"bzip2",
"gzip"
]
mime_type = mimetypes.guess_type(filename)[1]
if mime_type in compressed_mimetypes:
catcommand = 'gzip -dc'
else:
catcommand = 'cat'
out, err = run_pipe([
'%s %s' % (catcommand, filename),
'wc -l'
])
return int(out)
def rescale_scores(filename, scores_col, new_min=10, new_max=1000):
sorted_fn = 'sorted-%s' % (filename)
rescaled_fn = 'rescaled-%s' % (filename)
out, err = run_pipe([
'sort -k %dgr,%dgr %s' % (scores_col, scores_col, filename),
r"""awk 'BEGIN{FS="\t";OFS="\t"}{if (NF != 0) print $0}'"""],
sorted_fn)
out, err = run_pipe([
'head -n 1 %s' % (sorted_fn),
'cut -f %s' % (scores_col)])
max_score = float(out.strip())
logger.info("rescale_scores: max_score = %s", max_score)
out, err = run_pipe([
'tail -n 1 %s' % (sorted_fn),
'cut -f %s' % (scores_col)])
min_score = float(out.strip())
logger.info("rescale_scores: min_score = %s", min_score)
a = min_score
b = max_score
x = new_min
y = new_max
if min_score == max_score: # give all peaks new_min
rescale_formula = "x"
else: # n is the unscaled score from scores_col
rescale_formula = "((n-a)*(y-x)/(b-a))+x"
out, err = run_pipe(
[
'cat %s' % (sorted_fn),
r"""awk 'BEGIN{OFS="\t"}{n=$%d;a=%d;b=%d;x=%d;y=%d}"""
% (scores_col, a, b, x, y) +
r"""{$%d=int(%s) ; print $0}'"""
% (scores_col, rescale_formula)
],
rescaled_fn)
os.remove(sorted_fn)
return rescaled_fn