f4474da5 · f4474da5 · f4474da5 · e4c84105 · f4474da5 · f4474da5
--- a/workflow/conf/biohpc.config
+++ b/workflow/conf/biohpc.config
+singularity {
+  enabled = true
+  runOptions = ''                             // Please do NOT use "--disable-cache" in this runOptions. 
+                                              // Starting from version 2.0.0, the astrocyte_cli will clean up the cache automatically.
+  // runOptions = '--bind /vagrant:/vagrant'  // Use this one for vagrant development env only
+  cacheDir = "$baseDir/images/singularity"    // Singularity images specified in `workflow_containers` of astrocyte_pkg.yml will be saved to 
+                                              // this folder automatically, before running the workflow. The images will be renamed as
+                                              // "NAME-TAG.img", e.g. ubuntu-latest.img, centos-centos8.img, r-4.1.1.img, etc.
+}
+
 process {
  executor = 'slurm'
  queue = 'super'
@@ -5,69 +15,73 @@ process {
  beforeScript= 'ulimit -Ss unlimited'

  // Process specific configuration
-  withName: checkDesignFile {
-    module = ['python/3.6.1-2-anaconda']
+  withName: trackStart {
    executor = 'local'
  }
+
+  withName: checkDesignFile {
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/py36:1.0.0'
+    queue = 'super'
+  }
  withName: trimReads {
-    module = ['python/3.6.1-2-anaconda', 'trimgalore/0.4.1']
-    cpus = 32
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+	queue = '256GB,256GBv1'
  }
  withName: alignReads{
-    module = ['python/3.6.1-2-anaconda', 'bwa/intel/0.7.12', 'samtools/1.6']
-    queue = '128GB,256GB,256GBv1'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = '256GB,256GBv1'
  }
  withName: filterReads{
-    module = ['python/3.6.1-2-anaconda', 'samtools/1.6', 'sambamba/0.6.6', 'bedtools/2.26.0']
-    queue = '128GB,256GB,256GBv1'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = '256GB,256GBv1'
  }
  withName: experimentQC {
-    module = ['python/3.6.1-2-anaconda', 'deeptools/2.5.0.1']
-    queue = '128GB,256GB,256GBv1'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = '256GB,256GBv1'
  }
  withName: convertReads {
-    module = ['python/3.6.1-2-anaconda',  'samtools/1.6', 'bedtools/2.26.0']
-    queue = '128GB,256GB,256GBv1'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = '256GB,256GBv1'
  }
  withName: crossReads {
-    module = ['python/3.6.1-2-anaconda', 'phantompeakqualtools/1.2']
-    cpus = 32
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = 'super'
  }
  withName: defineExpDesignFiles {
-    module = ['python/3.6.1-2-anaconda']
-    executor = 'local'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/py36:1.0.0'
+    queue = 'super'
  }
  withName: poolAndPsuedoReads {
-    module = ['python/3.6.1-2-anaconda']
-    executor = 'local'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/py36:1.0.0'
+    queue = 'super'
  }
  withName: callPeaksMACS {
-    module = ['python/3.6.1-2-anaconda', 'macs/2.1.0-20151222', 'UCSC_userApps/v317', 'bedtools/2.26.0', 'phantompeakqualtools/1.2']
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
    queue = '128GB,256GB,256GBv1'
  }
  withName: plotProfile {
-    module = ['deeptools/2.5.0.1']
-    cpus = 32
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = 'super'
  }
  withName: consensusPeaks {
-    module = ['python/3.6.1-2-anaconda', 'bedtools/2.26.0']
-    executor = 'local'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+	queue = 'super'
  }
  withName: peakAnnotation {
-    module = ['R/3.3.2-gccmkl']
-    executor = 'local'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/r:3.3.2'
+	queue = 'super'
  }
  withName: diffPeaks {
-    module = ['R/3.3.2-gccmkl']
-    cpus = 32
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/r:3.3.2'
+	queue = '128GB,256GB,256GBv1'
  }
  withName: motifSearch {
-    module = ['python/3.6.1-2-anaconda', 'meme/4.11.1-gcc-openmpi', 'bedtools/2.26.0']
-    cpus = 32
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/motif-search:meme-5.5.4'
+	queue = 'super'
  }
  withName: multiqcReport {
-    module = ['python/3.6.1-2-anaconda', 'pandoc/2.7', 'singularity/3.0.2']
-    executor = 'local'
+    container = 'docker://git.biohpc.swmed.edu:5050/astrocyte/workflows/bicf/chipseq_analysis/chipseq:1.0.0'
+    queue = 'super'
  }
 }

@@ -116,3 +130,16 @@ report {
  enabled = true
  file = 'report.html'
 }
+
+
+env {
+  http_proxy = 'http://proxy.swmed.edu:3128'
+  https_proxy = 'http://proxy.swmed.edu:3128'
+  all_proxy = 'http://proxy.swmed.edu:3128'
+}
+
+manifest {
+  mainScript = 'main.nf'
+  version = '2.0.1'
+  nextflowVersion = '>=19.07.0'
+}
--- a/workflow/conf/multiqc_config.yaml
+++ b/workflow/conf/multiqc_config.yaml
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -27,7 +27,7 @@ params.skipDiff = false
 params.skipMotif = false
 params.skipPlotProfile = false
 params.references = "$baseDir/../docs/references.md"
-params.multiqc =  "$baseDir/conf/multiqc_config.yaml"
+params.multiqc =  "$baseDir/configs/multiqc_config.yaml"
 params.ci = false
 params.dev = false

@@ -147,14 +147,14 @@ process checkDesignFile {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
+	source /python361/entrypoint.sh
    python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    python $baseDir/scripts/check_design.py -d $designFile -f $readsList
+	source /python361/entrypoint.sh
+    python3 $baseDir/scripts/check_design.py -d $designFile -f $readsList
    """
  }

@@ -176,7 +176,7 @@ process trimReads {

  tag "$sampleId-$replicate"
  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
-
+  maxForks 4
  input:

  set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from rawReads
@@ -191,15 +191,13 @@ process trimReads {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
-    module load trimgalore/0.4.1
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} ${reads[1]} -s $sampleId -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    module load trimgalore/0.4.1
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/trim_reads.py -f ${reads[0]} -s $sampleId
    """
  }
@@ -209,10 +207,10 @@ process trimReads {
 // Align trimmed reads using bwa
 process alignReads {

-  queue '128GB,256GB,256GBv1'
+  queue '256GB,256GBv1'
  tag "$sampleId-$replicate"
  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
-
+  maxForks 4
  input:

  set sampleId, reads, experimentId, biosample, factor, treatment, replicate, controlId from trimmedReads
@@ -228,17 +226,15 @@ process alignReads {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
-    module load bwa/intel/0.7.12
-    module load samtools/1.6
+    source /chipseq/entrypoint.sh
+    free -hm
    python3 $baseDir/scripts/map_reads.py -f ${reads[0]} ${reads[1]} -r ${index}/genome.fa -s $sampleId -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    module load bwa/intel/0.7.12
-    module load samtools/1.6
+    source /chipseq/entrypoint.sh
+    free -hm
    python3 $baseDir/scripts/map_reads.py -f $reads -r ${index}/genome.fa -s $sampleId
    """
  }
@@ -251,7 +247,7 @@ process filterReads {
  queue '128GB,256GB,256GBv1'
  tag "$sampleId-$replicate"
  publishDir "$outDir/${task.process}/${sampleId}", mode: 'copy'
-
+  maxForks 4
  input:

  set sampleId, mapped, experimentId, biosample, factor, treatment, replicate, controlId from mappedReads
@@ -269,19 +265,13 @@ process filterReads {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
-    module load samtools/1.6
-    module load sambamba/0.6.6
-    module load bedtools/2.26.0
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/map_qc.py -b $mapped -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    module load samtools/1.6
-    module load sambamba/0.6.6
-    module load bedtools/2.26.0
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/map_qc.py -b $mapped
    """
  }
@@ -313,8 +303,7 @@ process experimentQC {
  script:

  """
-  module load python/3.6.1-2-anaconda
-  module load deeptools/2.5.0.1
+  source /chipseq/entrypoint.sh
  python3 $baseDir/scripts/experiment_qc.py -d $dedupDesign -e $extendReadsLen
  """

@@ -340,17 +329,13 @@ process convertReads {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
-    module load samtools/1.6
-    module load bedtools/2.26.0
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/convert_reads.py -b $deduped -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    module load samtools/1.6
-    module load bedtools/2.26.0
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/convert_reads.py -b $deduped
    """
  }
@@ -377,15 +362,13 @@ process crossReads {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
-    module load phantompeakqualtools/1.2
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/xcor.py -t $seTagAlign -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    module load phantompeakqualtools/1.2
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/xcor.py -t $seTagAlign
    """
  }
@@ -414,7 +397,7 @@ process defineExpDesignFiles {
  script:

  """
-  module load python/3.6.1-2-anaconda
+  source /python361/entrypoint.sh
  python3 $baseDir/scripts/experiment_design.py -d $xcorDesign
  """

@@ -440,13 +423,13 @@ process poolAndPsuedoReads {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
+	source /python361/entrypoint.sh
    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
+	source /python361/entrypoint.sh
    python3 $baseDir/scripts/pool_and_psuedoreplicate.py -d $experimentObjs -c $cutoffRatio
    """
  }
@@ -478,21 +461,13 @@ process callPeaksMACS {

  if (pairedEnd) {
    """
-    module load python/3.6.1-2-anaconda
-    module load macs/2.1.0-20151222
-    module load UCSC_userApps/v317
-    module load bedtools/2.26.0
-    module load phantompeakqualtools/1.2
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes -p
    """
  }
  else {
    """
-    module load python/3.6.1-2-anaconda
-    module load macs/2.1.0-20151222
-    module load UCSC_userApps/v317
-    module load bedtools/2.26.0
-    module load phantompeakqualtools/1.2
+    source /chipseq/entrypoint.sh
    python3 $baseDir/scripts/call_peaks_macs.py -t $tagAlign -x $xcor -c $controlTagAlign -s $sampleId -g $genomeSize -z $chromSizes
    """
  }
@@ -523,8 +498,8 @@ process plotProfile {

  script:
  """
-  module load deeptools/2.5.0.1
-  bash $baseDir/scripts/plot_profile.sh -g $gtfFile
+  source /chipseq/entrypoint.sh
+  /bin/bash $baseDir/scripts/plot_profile.sh -g $gtfFile
  """
 }

@@ -551,8 +526,7 @@ process consensusPeaks {
  script:

  """
-  module load python/3.6.1-2-anaconda
-  module load bedtools/2.26.0
+  source /chipseq/entrypoint.sh
  python3 $baseDir/scripts/overlap_peaks.py -d $peaksDesign -f $preDiffDesign
  """

@@ -575,7 +549,7 @@ process peakAnnotation {
  script:

  """
-  module load R/3.3.2-gccmkl
+  source /r-332/entrypoint.sh
  Rscript $baseDir/scripts/annotate_peaks.R $designAnnotatePeaks $gtfFile $geneNames
  """

@@ -603,10 +577,8 @@ process motifSearch {
  script:

  """
-  module load python/3.6.1-2-anaconda
-  module load meme/4.11.1-gcc-openmpi
-  module load bedtools/2.26.0
-  python3 $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
+  source /motif-search/entrypoint.sh
+  python $baseDir/scripts/motif_search.py -d $designMotifSearch -g $fasta -p $topPeakCount
  """
 }

@@ -639,13 +611,12 @@ process diffPeaks {
  script:

  """
-  module load python/3.6.1-2-anaconda
-  module load R/3.3.2-gccmkl
+  source /r-332/entrypoint.sh
  Rscript $baseDir/scripts/diff_peaks.R $designDiffPeaks
  """
 }

-// Generate Multiqc Report, gerernate Software Versions and references
+// Generate Multiqc Report, generate software versions and references
 process multiqcReport {
  publishDir "$outDir/${task.process}", mode: 'copy'

@@ -659,8 +630,8 @@ process multiqcReport {
  file ('callPeaksMACS_vf/*') from callPeaksMACSVersions.first()
  file ('consensusPeaks_vf/*') from consensusPeaksVersions.first()
  file ('peakAnnotation_vf/*') from peakAnnotationVersions.first()
-  file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty()
-  file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty()
+  file ('motifSearch_vf/*') from motifSearchVersions.first().ifEmpty("No Meme-chip is found")
+  file ('diffPeaks_vf/*') from diffPeaksVersions.first().ifEmpty("No Rstudio is found")
  file ('experimentQC_vf/*') from experimentQCVersions.first()
  file ('trimReads/*') from trimgaloreResults.collect()
  file ('alignReads/*') from mappedReadsStats.collect()
@@ -677,14 +648,12 @@ process multiqcReport {
  script:

  """
-  module load python/3.6.1-2-anaconda
-  module load pandoc/2.7
-  module load singularity/3.0.2
+  source /chipseq/entrypoint.sh
  echo $workflow.nextflow.version > version_nextflow.txt
-  singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc --version > version_multiqc.txt
+  multiqc --version > version_multiqc.txt
  python --version &> version_python.txt
  python3 $baseDir/scripts/generate_references.py -r $references -o software_references
  python3 $baseDir/scripts/generate_versions.py -o software_versions
-  singularity exec /project/shared/bicf_workflow_ref/singularity_images/bicf-multiqc-2.0.0.img multiqc -c $multiqc .
+  multiqc -c $multiqc .
  """
-}
+}
\ No newline at end of file
--- a/workflow/nextflow.config
+++ b/workflow/nextflow.config
-profiles {
-  standard {
-    includeConfig 'conf/biohpc.config'
-  }
-}
-
-trace {
-  enabled = true
-  file = 'pipeline_trace.txt'
-  fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
-}
-
-timeline {
-  enabled = true
-  file = 'timeline.html'
-}
-
-report {
-  enabled = true
-  file = 'report.html'
-}
-
-
-manifest {
-  name = 'chipseq_analysis'
-  description = 'BICF ChIP-seq Analysis Workflow.'
-  homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis'
-  version = '1.1.2'
-  mainScript = 'main.nf'
-  nextflowVersion = '>=0.31.0'
-}
--- a/workflow/scripts/convert_reads.py
+++ b/workflow/scripts/convert_reads.py
@@ -106,9 +106,16 @@ def convert_mapped_pe(bam, bam_basename):

    # Name sort bam to make BEDPE
    nmsrt_bam_filename = bam_basename + ".nmsrt.bam"
+    # Allocate memory for each thread used during samtools sort.
+    # Determine the available memory on the host system (kB).
+    avail_sys_mem = int(subprocess.check_output("grep MemTotal /proc/meminfo | grep -o '[0-9]*'", shell=True).decode("utf-8").strip())
+    # Divide the total by the number of processors and apply a scaling factor (0.85) to not use all memory.
+    mem_per_thread = math.floor((avail_sys_mem ) * 0.7)
+    # DEBUGGING
+    mem_per_thread = mem_per_thread * 4
    samtools_sort_command = \
-        "samtools sort -n -@%d -o %s %s" \
-        % (cpu_count(), nmsrt_bam_filename, bam)
+        "samtools sort -m %d  -n -@%d -o %s %s" \
+        % (mem_per_thread, cpu_count(), nmsrt_bam_filename, bam)

    logger.info(samtools_sort_command)
    subprocess.check_output(shlex.split(samtools_sort_command))

--- a/workflow/scripts/map_reads.py
+++ b/workflow/scripts/map_reads.py
@@ -14,6 +14,7 @@ import argparse
 import shutil
 import shlex
 import logging
+import math
 from multiprocessing import cpu_count
 import utils

@@ -133,12 +134,18 @@ def align_se(fastq, sai, reference, fastq_basename):

    bam_filename = '%s.bam' % (fastq_basename)

+    # Allocate memory for each thread used during samtools sort.
+    # Determine the available memory on the host system (kB).
+    avail_sys_mem = int(subprocess.check_output("grep MemTotal /proc/meminfo | grep -o '[0-9]*'", shell=True).decode("utf-8").strip())
+    # Divide the total by the number of processors and apply a scaling factor (0.85) to not use all memory.
+    mem_per_thread = math.floor((avail_sys_mem) * 0.7)
+
    steps = [
        "bwa samse %s %s %s"
        % (reference, sai[0], fastq[0]),
        "samtools view -@%d -Su -" % (cpu_count()),
-        "samtools sort -@%d -o %s"
-        % (cpu_count(), bam_filename)]
+        "samtools sort -m %d -@%d -o %s"
+        % (mem_per_thread, cpu_count(), bam_filename)]

    out, err = utils.run_pipe(steps)
    if err:
@@ -168,12 +175,19 @@ def align_pe(fastq, sai, reference, fastq_basename):
    if err:
        logger.error("sampe error: %s", err)

+    # Allocate memory for each thread used during samtools sort.
+    # Determine the available memory on the host system (kB).
+    avail_sys_mem = int(subprocess.check_output("grep MemTotal /proc/meminfo | grep -o '[0-9]*'", shell=True).decode("utf-8").strip())
+    # Divide the total by the number of processors and apply a scaling factor (0.85) to not use all memory.
+    mem_per_thread = math.floor((avail_sys_mem // cpu_count()) * 0.85)
+    # DEBUGGING
+    mem_per_thread = mem_per_thread * 4
    steps = [
        "cat %s" % (sam_filename),
        "grep -v -F -f %s" % (badcigar_filename),
        "samtools view -@%d -Su -" % (cpu_count()),
-        "samtools sort -@%d -o %s"
-        % (cpu_count(), bam_filename)]
+        "samtools sort -m %d -@%d -o %s"
+        % (mem_per_thread, cpu_count(), bam_filename)]

    out, err = utils.run_pipe(steps)
    if err:

--- a/workflow/scripts/motif_search.py
+++ b/workflow/scripts/motif_search.py
-#!/usr/bin/env python3
+#!/usr/bin/env python2

 #
 # * --------------------------------------------------------------------------
@@ -15,7 +15,7 @@ import shutil
 import subprocess
 from multiprocessing import Pool
 import pandas as pd
-import utils
+import utils2 as utils


 EPILOG = '''
@@ -67,8 +67,10 @@ def check_tools():
    '''Checks for required componenets on user system'''

    logger.info('Checking for required libraries and components on this system')
-
-    meme_path = shutil.which("meme")
+    stream = os.popen('which meme')
+    output = stream.read()
+    meme_path = output.strip()
+    #meme_path = shutil.which("meme")
    if meme_path:
        logger.info('Found meme: %s', meme_path)

@@ -83,8 +85,10 @@ def check_tools():
    else:
        logger.error('Missing meme')
        raise Exception('Missing meme')
-
-    bedtools_path = shutil.which("bedtools")
+    stream = os.popen('which bedtools')
+    output = stream.read()
+    bedtools_path = output.strip()
+    
    if bedtools_path:
        logger.info('Found bedtools: %s', bedtools_path)

@@ -135,8 +139,10 @@ def motif_search(filename, genome, experiment, peak):
        logger.error("bedtools error: %s", err)

    # Call memechip
+    #out, err = utils.run_pipe([
+    #    'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)])
    out, err = utils.run_pipe([
-        'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)])
+        'meme-chip -oc %s -minw 5 -maxw 15 -meme-nmotifs 10 %s -meme-norand ' % (out_motif, out_fa)])
    if err:
        logger.error("meme-chip error: %s", err)


--- a/workflow/scripts/motif_search.py.original
+++ b/workflow/scripts/motif_search.py.original
+#!/usr/bin/env python3
+
+#
+# * --------------------------------------------------------------------------
+# * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
+# * --------------------------------------------------------------------------
+#
+
+'''Call Motifs on called peaks.'''
+
+import os
+import argparse
+import logging
+import shutil
+import subprocess
+from multiprocessing import Pool
+import pandas as pd
+import utils
+
+
+EPILOG = '''
+For more details:
+        %(prog)s --help
+'''
+
+# SETTINGS
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+logger.propagate = False
+logger.setLevel(logging.INFO)
+
+
+# the order of this list is important.
+# strip_extensions strips from the right inward, so
+# the expected right-most extensions should appear first (like .gz)
+# Modified from J. Seth Strattan
+STRIP_EXTENSIONS = ['.narrowPeak', '.replicated']
+
+
+def get_args():
+    '''Define arguments.'''
+
+    parser = argparse.ArgumentParser(
+        description=__doc__, epilog=EPILOG,
+        formatter_class=argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument('-d', '--design',
+                        help="The design file to run motif search.",
+                        required=True)
+
+    parser.add_argument('-g', '--genome',
+                        help="The genome FASTA file.",
+                        required=True)
+
+    parser.add_argument('-p', '--peak',
+                        help="The number of peaks to use.",
+                        required=True)
+
+    args = parser.parse_args()
+    return args
+
+# Functions
+
+
+def check_tools():
+    '''Checks for required componenets on user system'''
+
+    logger.info('Checking for required libraries and components on this system')
+
+    meme_path = shutil.which("meme")
+    if meme_path:
+        logger.info('Found meme: %s', meme_path)
+
+        # Get Version
+        memechip_version_command = "meme-chip --version"
+        memechip_version = subprocess.check_output(memechip_version_command, shell=True)
+
+        # Write to file
+        meme_file = open("version_memechip.txt", "wb")
+        meme_file.write(b"Version %s" % (memechip_version))
+        meme_file.close()
+    else:
+        logger.error('Missing meme')
+        raise Exception('Missing meme')
+
+    bedtools_path = shutil.which("bedtools")
+    if bedtools_path:
+        logger.info('Found bedtools: %s', bedtools_path)
+
+        # Get Version
+        bedtools_version_command = "bedtools --version"
+        bedtools_version = subprocess.check_output(bedtools_version_command, shell=True)
+
+        # Write to file
+        bedtools_file = open("version_bedtools.txt", "wb")
+        bedtools_file.write(bedtools_version)
+        bedtools_file.close()
+    else:
+        logger.error('Missing bedtools')
+        raise Exception('Missing bedtools')
+
+
+def run_wrapper(args):
+    motif_search(*args)
+
+
+def motif_search(filename, genome, experiment, peak):
+    '''Run motif serach on peaks.'''
+
+    file_basename = os.path.basename(
+        utils.strip_extensions(filename, STRIP_EXTENSIONS))
+
+    out_fa = '%s.fa' % (experiment)
+    out_motif = '%s_memechip' % (experiment)
+
+    # Sort Bed file and limit number of peaks
+    if peak == -1:
+        peak = utils.count_lines(filename)
+        peak_no = 'all'
+    else:
+        peak_no = peak
+
+    sorted_fn = '%s.%s.narrowPeak' % (file_basename, peak_no)
+
+    out, err = utils.run_pipe([
+        'sort -k %dgr,%dgr %s' % (5, 5, filename),
+        'head -n %s' % (peak)], outfile=sorted_fn)
+
+    # Get fasta file
+    out, err = utils.run_pipe([
+        'bedtools getfasta -fi %s -bed %s -fo %s' % (genome, sorted_fn, out_fa)])
+
+    if err:
+        logger.error("bedtools error: %s", err)
+
+    # Call memechip
+    out, err = utils.run_pipe([
+        'meme-chip -oc %s -meme-minw 5 -meme-maxw 15 -meme-nmotifs 10 %s -norand' % (out_motif, out_fa)])
+    if err:
+        logger.error("meme-chip error: %s", err)
+
+
+def main():
+    args = get_args()
+    design = args.design
+    genome = args.genome
+    peak = args.peak
+
+    # Create a file handler
+    handler = logging.FileHandler('motif.log')
+    logger.addHandler(handler)
+
+    # Check if tools are present
+    check_tools()
+
+    # Read files
+    design_df = pd.read_csv(design, sep='\t')
+
+    meme_arglist = zip(design_df['Peaks'].tolist(), [genome]*design_df.shape[0], design_df['Condition'].tolist(), [peak]*design_df.shape[0])
+    work_pool = Pool(min(12, design_df.shape[0]))
+    return_list = work_pool.map(run_wrapper, meme_arglist)
+    work_pool.close()
+    work_pool.join()
+
+
+if __name__ == '__main__':
+    main()
--- a/workflow/scripts/utils2.py
+++ b/workflow/scripts/utils2.py
+#!/usr/bin/env python2
+
+#
+# * --------------------------------------------------------------------------
+# * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/chipseq_analysis/LICENSE.md)
+# * --------------------------------------------------------------------------
+#
+
+'''General utilities.'''
+
+
+import shlex
+import logging
+import subprocess
+import sys
+import os
+
+
+logger = logging.getLogger(__name__)
+logger.addHandler(logging.NullHandler())
+logger.propagate = True
+
+
+def run_pipe(steps, outfile=None):
+    from subprocess import Popen, PIPE
+    p = None
+    p_next = None
+    first_step_n = 1
+    last_step_n = len(steps)
+    for n, step in enumerate(steps, start=first_step_n):
+        logger.debug("step %d: %s" % (n, step))
+        if n == first_step_n:
+            if n == last_step_n and outfile:  # one-step pipeline with outfile
+                with open(outfile, 'w') as fh:
+                    print("one step shlex: %s to file: %s" % (shlex.split(step), outfile))
+                    p = Popen(shlex.split(step), stdout=fh)
+                break
+            print("first step shlex to stdout: %s" % (shlex.split(step)))
+
+            p = Popen(shlex.split(step), stdout=PIPE)
+        elif n == last_step_n and outfile:  # only treat the last step specially if you're sending stdout to a file
+            with open(outfile, 'w') as fh:
+                print("last step shlex: %s to file: %s" % (shlex.split(step), outfile))
+                p_last = Popen(shlex.split(step), stdin=p.stdout, stdout=fh)
+                p.stdout.close()
+                p = p_last
+        else:  # handles intermediate steps and, in the case of a pipe to stdout, the last step
+            print("intermediate step %d shlex to stdout: %s" % (n, shlex.split(step)))
+            p_next = Popen(shlex.split(step), stdin=p.stdout, stdout=PIPE)
+            p.stdout.close()
+            p = p_next
+    out, err = p.communicate()
+    return out, err
+
+
+def block_on(command):
+    process = subprocess.Popen(shlex.split(command), stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
+    for line in iter(process.stdout.readline, b''):
+        sys.stdout.write(line.decode('utf-8'))
+    process.communicate()
+    return process.returncode
+
+
+def strip_extensions(filename, extensions):
+    '''Strips extensions to get basename of file.'''
+
+    basename = filename
+    for extension in extensions:
+        basename = basename.rpartition(extension)[0] or basename
+
+    return basename
+
+
+def count_lines(filename):
+    import mimetypes
+    compressed_mimetypes = [
+        "compress",
+        "bzip2",
+        "gzip"
+        ]
+    mime_type = mimetypes.guess_type(filename)[1]
+    if mime_type in compressed_mimetypes:
+        catcommand = 'gzip -dc'
+    else:
+        catcommand = 'cat'
+    out, err = run_pipe([
+        '%s %s' % (catcommand, filename),
+        'wc -l'
+        ])
+    return int(out)
+
+
+def rescale_scores(filename, scores_col, new_min=10, new_max=1000):
+    sorted_fn = 'sorted-%s' % (filename)
+    rescaled_fn = 'rescaled-%s' % (filename)
+
+    out, err = run_pipe([
+        'sort -k %dgr,%dgr %s' % (scores_col, scores_col, filename),
+        r"""awk 'BEGIN{FS="\t";OFS="\t"}{if (NF != 0) print $0}'"""],
+        sorted_fn)
+
+    out, err = run_pipe([
+        'head -n 1 %s' % (sorted_fn),
+        'cut -f %s' % (scores_col)])
+    max_score = float(out.strip())
+    logger.info("rescale_scores: max_score = %s", max_score)
+
+    out, err = run_pipe([
+        'tail -n 1 %s' % (sorted_fn),
+        'cut -f %s' % (scores_col)])
+    min_score = float(out.strip())
+    logger.info("rescale_scores: min_score = %s", min_score)
+
+    a = min_score
+    b = max_score
+    x = new_min
+    y = new_max
+
+    if min_score == max_score:  # give all peaks new_min
+        rescale_formula = "x"
+    else:  # n is the unscaled score from scores_col
+        rescale_formula = "((n-a)*(y-x)/(b-a))+x"
+
+    out, err = run_pipe(
+        [
+            'cat %s' % (sorted_fn),
+            r"""awk 'BEGIN{OFS="\t"}{n=$%d;a=%d;b=%d;x=%d;y=%d}"""
+            % (scores_col, a, b, x, y) +
+            r"""{$%d=int(%s) ; print $0}'"""
+            % (scores_col, rescale_formula)
+        ],
+        rescaled_fn)
+
+    os.remove(sorted_fn)
+
+    return rescaled_fn
No results found