From 014508e5d1d0a2113f0ad20376dff37c18f6f60e Mon Sep 17 00:00:00 2001
From: Beibei Chen <beibei.chen@utsouthwestern.edu>
Date: Wed, 8 Feb 2017 09:09:17 -0600
Subject: [PATCH] Got rid of abs path

---
 astrocyte_package.yml           | 132 --------------------------------
 workflow/main.nf                |  66 +++++++++-------
 workflow/scripts/runMemechip.py |   2 +-
 3 files changed, 38 insertions(+), 162 deletions(-)
 delete mode 100644 astrocyte_package.yml

diff --git a/astrocyte_package.yml b/astrocyte_package.yml
deleted file mode 100644
index 2a9f17c..0000000
--- a/astrocyte_package.yml
+++ /dev/null
@@ -1,132 +0,0 @@
-#
-# metadata for the example astrocyte ChipSeq workflow package
-#
-
-# -----------------------------------------------------------------------------
-# BASIC INFORMATION
-# -----------------------------------------------------------------------------
-
-# A unique identifier for the workflow package, text/underscores only
-name: 'chipseq_analysis_bicf'
-# Who wrote this?
-author: 'Beibei Chen'
-# A contact email address for questions
-email: 'biohpc-help@utsouthwestern.edu'
-# A more informative title for the workflow package
-title: 'BICF ChIP-seq Analysis Workflow'
-# A summary of the workflow package in plain text
-description: |
-  This is a workflow package for the BioHPC/BICF ChIP-seq workflow system.
-  It implements a simple ChIP-seq analysis workflow using deepTools, Diffbind, ChipSeeker and MEME-ChIP, visualization application.
-
-# -----------------------------------------------------------------------------
-# DOCUMENTATION
-# -----------------------------------------------------------------------------
-
-# A list of documentation file in .md format that should be viewable from the
-# web interface. These files are in the 'docs' subdirectory. The first file
-# listed will be used as a documentation index and is index.md by convention
-documentation_files:
-  - 'index.md'
-
-# -----------------------------------------------------------------------------
-# NEXTFLOW WORKFLOW CONFIGURATION
-# -----------------------------------------------------------------------------
-
-# Remember - The workflow file is always named 'workflow/main.f'
-#            The workflow must publish all final output into $baseDir
-
-# A list of clueter environment modules that this workflow requires to run.
-# Specify versioned module names to ensure reproducability.
-workflow_modules:
-  - 'deeptools/2.3.5'
-  - 'meme/4.11.1-gcc-openmpi'
-
-# A list of parameters used by the workflow, defining how to present them,
-# options etc in the web interface. For each parameter:
-#
-# REQUIRED INFORMATION
-#  id:         The name of the parameter in the NEXTFLOW workflow
-#  type:       The type of the parameter, one of:
-#                string    - A free-format string
-#                integer   - An integer
-#                real      - A real number
-#                file      - A single file from user data
-#                files     - One or more files from user data
-#                select    - A selection from a list of values
-#  required:    true/false, must the parameter be entered/chosen?
-#  description: A user friendly description of the meaning of the parameter
-#
-# OPTIONAL INFORMATION
-#  default:   A default value for the parameter (optional)
-#  min:       Minium value/characters/files for number/string/files types
-#  max:       Maxumum value/characters/files for number/string/files types
-#  regex:     A regular expression that describes valid entries / filenames
-#
-# SELECT TYPE
-#  choices:   A set of choices presented to the user for the parameter.
-#             Each choice is a pair of value and description, e.g.
-#
-#             choices:
-#               - [ 'myval', 'The first option']
-#               - [ 'myval', 'The second option']
-#
-# NOTE - All parameters are passed to NEXTFLOW as strings... but they
-#        are validated by astrocyte using the information provided above
-
-workflow_parameters:
-
-  - id: design
-    type: file
-    required: true
-    regex: ".*csv"
-    description: |
-      A design file listing pairs of sample name and sample group.
-      Columns must include: SampleID,Tissue, Factor, Condition, Replicate, Peaks, bamReads, bamControl, ControlID, PeakCaller
-
-  - id: genome
-    type: select
-    choices:
-      - [ '/project/shared/bicf_workflow_ref/GRCh38', 'Human GRCh38']
-      - [ '/project/shared/bicf_workflow_ref/GRCh37', 'Human GRCh37']
-      - [ '/project/shared/bicf_workflow_ref/GRCm38', 'Mouse GRCh38']
-    required: true
-    description: |
-      Reference genome for annotation
-
-  - id: toppeak
-    type: integer
-    required: true
-    description: |
-      The number of top peaks to use for motif discovery.
-# -----------------------------------------------------------------------------
-# SHINY APP CONFIGURATION
-# -----------------------------------------------------------------------------
-
-# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
-#            The workflow must publish all final output into $baseDir
-
-# Name of the R module that the vizapp will run against
-vizapp_r_module: 'R/3.2.1-intel'
-
-# List of any CRAN packages, not provided by the modules, that must be made
-# available to the vizapp
-vizapp_cran_packages:
-  - sqldf
-  - shiny
-  - Vennerable
-  - DT
-  - ggplot2
-  - gplots
-  - gtools
-  - RColorBrewer
-
-
-# # List of any Bioconductor packages, not provided by the modules, that must be made
-# available to the vizapp
-vizapp_bioc_packages:
-  - qusage
-  - ballgown
-vizapp_github_packages:
-  - js229/Vennerable
-
diff --git a/workflow/main.nf b/workflow/main.nf
index 2095ec6..937e9ba 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -1,36 +1,37 @@
 #!/usr/bin/env nextflow
-	
-// Default parameter values to run tests
-// params.bams="$baseDir/../test/*.bam"
-   params.testpath="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/"
-   params.design="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/samplesheet.csv"
-   params.genomepath="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/hg19/"
+   params.design="$baseDir/../test/samplesheet.csv"
+   params.bams = "$baseDir/../test/*.bam"
+   params.peaks = "$baseDir/../test/*/broadPeak"
+   params.genomepath="/project/shared/bicf_workflow_ref/hg19/"
    species = "hg19"
    toppeakcount = 200
-// design_file = file(params.design)
-// bams=file(params.bams)
-//gtf_file = file("$params.genome/gencode.gtf")
-//genenames = file("$params.genome/genenames.txt")
-//geneset = file("$params.genome/gsea_gmt/$params.geneset")
+   design_file = file(params.design)
+   deeptools_design = Channel.fromPath(params.design)
+   diffbind_design = Channel.fromPath(params.design)
+   chipseeker_design = Channel.fromPath(params.design)
+   meme_design = Channel.fromPath(params.design)
+   index_bams = Channel.fromPath(params.bams)
+   deeptools_bams = Channel.fromPath(params.bams) 
+   deeptools_peaks = Channel.fromPath(params.peaks) 
+   chipseeker_peaks = Channel.fromPath(params.peaks) 
+   diffbind_bams = Channel.fromPath(params.bams) 
+   diffbind_peaks = Channel.fromPath(params.peaks) 
+   meme_peaks = Channel.fromPath(params.peaks) 
 
-
-process processdesign {
+process bamindex {
    publishDir "$baseDir/output/", mode: 'copy'
-//   input:
-//   file design_file from input
-//   file annotation Tdx
+   input:
+     file index_bam_files from index_bams
    output:
-     file "new_design" into deeptools_design
-     file "new_design" into diffbind_design
-     file "new_design" into chipseeker_design
-     file "new_design" into meme_design
- 
+     file "*bai" into deeptools_bamindex
+     file "*bai" into diffbind_bamindex
 
-     script:
+   script:
      """
      module load python/2.7.x-anaconda
      source activate /project/shared/bicf_workflow_ref/chipseq_bchen4/
-     python $baseDir/scripts/preprocessDesign.py -i ${params.design} 
+     module load samtools/intel/1.3
+     samtools index ${index_bam_files} 
      """
 }
 
@@ -38,23 +39,28 @@ process run_deeptools {
    publishDir "$baseDir/output", mode: 'copy'
    input:
      file deeptools_design_file from deeptools_design
-   file annotation Tdx
+     file deeptools_bam_files from deeptools_bams.toList()
+     file deeptools_peak_files from deeptools_peaks.toList()
+     file deeptools_bam_indexes from deeptools_bamindex.toList()
    output:
      stdout result
      script:
      """
      module load python/2.7.x-anaconda
      source activate /project/shared/bicf_workflow_ref/chipseq_bchen4/
-     module load deeptools/2.3.5 
-     python $baseDir/scripts/runDeepTools.py -i $deeptools_design_file -g ${params.genomepath}}
+     module load deeptools/2.3.5
+     python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}}
 """
 }
 
 
 process run_diffbind {
-//   publishDir "$baseDir/output", mode: 'copy'
+   publishDir "$baseDir/output", mode: 'copy'
    input:
      file diffbind_design_file from diffbind_design
+     file diffbind_bam_files from diffbind_bams.toList()
+     file diffbind_peak_files from diffbind_peaks.toList()
+       file diffbind_ban_indexes from diffbind_bamindex.toList()
    output:
      file "diffpeak.design" into diffpeaksdesign_chipseeker
      file "diffpeak.design" into diffpeaksdesign_meme
@@ -74,7 +80,7 @@ process run_chipseeker_diffpeak {
      file diffpeak_design_file from diffpeaksdesign_chipseeker
      file diffpeaks from diffpeaks_chipseeker
    output:
-     stdout result
+     stdout result_chipseeker
    script:
      """
      module load python/2.7.x-anaconda
@@ -84,9 +90,10 @@ process run_chipseeker_diffpeak {
 }
 
 process run_chipseeker_originalpeak {
-//   publishDir "$baseDir/output", mode: 'copy'
+   publishDir "$baseDir/output", mode: 'copy'
    input:
      file design_file from chipseeker_design
+     file chipseeker_peak_files from chipseeker_peaks
    output:
      stdout result1
    script:
@@ -101,6 +108,7 @@ process run_meme_original {
    publishDir "$baseDir/output", mode: 'copy'
    input:
      file design_meme from meme_design
+     file meme_peak_files from meme_peaks
    output:
      stdout result_meme_original
    script:
diff --git a/workflow/scripts/runMemechip.py b/workflow/scripts/runMemechip.py
index 0bc79d6..b2a15a3 100644
--- a/workflow/scripts/runMemechip.py
+++ b/workflow/scripts/runMemechip.py
@@ -40,7 +40,7 @@ def main():
   #run(args.infile, args.genome, args.limit, args.output)
   #get Pool ready
   dfile = pd.read_csv(args.infile)
-  meme_arglist =  zip(dfile['Peaks'].tolist(),[args.genome+"hg19.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist())  
+  meme_arglist =  zip(dfile['Peaks'].tolist(),[args.genome+"genome.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist())  
   work_pool = Pool(min(12,dfile.shape[0]))
   resultList =  work_pool.map(run_wrapper, meme_arglist)
   work_pool.close()
-- 
GitLab