Got rid of abs path

014508e5 · Beibei Chen · 6e0bfcf3 · 6e0bfcf3 · 014508e5 · 014508e5
Commit 014508e5 authored 8 years ago by Beibei Chen
--- a/astrocyte_package.yml
+++ b/astrocyte_package.yml
-#
-# metadata for the example astrocyte ChipSeq workflow package
-#
-# -----------------------------------------------------------------------------
-# BASIC INFORMATION
-# -----------------------------------------------------------------------------
-# A unique identifier for the workflow package, text/underscores only
-name: 'chipseq_analysis_bicf'
-# Who wrote this?
-author: 'Beibei Chen'
-# A contact email address for questions
-email: 'biohpc-help@utsouthwestern.edu'
-# A more informative title for the workflow package
-title: 'BICF ChIP-seq Analysis Workflow'
-# A summary of the workflow package in plain text
-description: |
-  This is a workflow package for the BioHPC/BICF ChIP-seq workflow system.
-  It implements a simple ChIP-seq analysis workflow using deepTools, Diffbind, ChipSeeker and MEME-ChIP, visualization application.
-# -----------------------------------------------------------------------------
-# DOCUMENTATION
-# -----------------------------------------------------------------------------
-# A list of documentation file in .md format that should be viewable from the
-# web interface. These files are in the 'docs' subdirectory. The first file
-# listed will be used as a documentation index and is index.md by convention
-documentation_files:
-  - 'index.md'
-# -----------------------------------------------------------------------------
-# NEXTFLOW WORKFLOW CONFIGURATION
-# -----------------------------------------------------------------------------
-# Remember - The workflow file is always named 'workflow/main.f'
-#            The workflow must publish all final output into $baseDir
-# A list of clueter environment modules that this workflow requires to run.
-# Specify versioned module names to ensure reproducability.
-workflow_modules:
-  - 'deeptools/2.3.5'
-  - 'meme/4.11.1-gcc-openmpi'
-# A list of parameters used by the workflow, defining how to present them,
-# options etc in the web interface. For each parameter:
-#
-# REQUIRED INFORMATION
-#  id:         The name of the parameter in the NEXTFLOW workflow
-#  type:       The type of the parameter, one of:
-#                string    - A free-format string
-#                integer   - An integer
-#                real      - A real number
-#                file      - A single file from user data
-#                files     - One or more files from user data
-#                select    - A selection from a list of values
-#  required:    true/false, must the parameter be entered/chosen?
-#  description: A user friendly description of the meaning of the parameter
-#
-# OPTIONAL INFORMATION
-#  default:   A default value for the parameter (optional)
-#  min:       Minium value/characters/files for number/string/files types
-#  max:       Maxumum value/characters/files for number/string/files types
-#  regex:     A regular expression that describes valid entries / filenames
-#
-# SELECT TYPE
-#  choices:   A set of choices presented to the user for the parameter.
-#             Each choice is a pair of value and description, e.g.
-#
-#             choices:
-#               - [ 'myval', 'The first option']
-#               - [ 'myval', 'The second option']
-#
-# NOTE - All parameters are passed to NEXTFLOW as strings... but they
-#        are validated by astrocyte using the information provided above
-workflow_parameters:
-  - id: design
-    type: file
-    required: true
-    regex: ".*csv"
-    description: |
-      A design file listing pairs of sample name and sample group.
-      Columns must include: SampleID,Tissue, Factor, Condition, Replicate, Peaks, bamReads, bamControl, ControlID, PeakCaller
-  - id: genome
-    type: select
-    choices:
-      - [ '/project/shared/bicf_workflow_ref/GRCh38', 'Human GRCh38']
-      - [ '/project/shared/bicf_workflow_ref/GRCh37', 'Human GRCh37']
-      - [ '/project/shared/bicf_workflow_ref/GRCm38', 'Mouse GRCh38']
-    required: true
-    description: |
-      Reference genome for annotation
-  - id: toppeak
-    type: integer
-    required: true
-    description: |
-      The number of top peaks to use for motif discovery.
-# -----------------------------------------------------------------------------
-# SHINY APP CONFIGURATION
-# -----------------------------------------------------------------------------
-# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
-#            The workflow must publish all final output into $baseDir
-# Name of the R module that the vizapp will run against
-vizapp_r_module: 'R/3.2.1-intel'
-# List of any CRAN packages, not provided by the modules, that must be made
-# available to the vizapp
-vizapp_cran_packages:
-  - sqldf
-  - shiny
-  - Vennerable
-  - DT
-  - ggplot2
-  - gplots
-  - gtools
-  - RColorBrewer
-# # List of any Bioconductor packages, not provided by the modules, that must be made
-# available to the vizapp
-vizapp_bioc_packages:
-  - qusage
-  - ballgown
-vizapp_github_packages:
-  - js229/Vennerable
--- a/workflow/main.nf
+++ b/workflow/main.nf
 #!/usr/bin/env nextflow
+   params.design="$baseDir/../test/samplesheet.csv"
-// Default parameter values to run tests
+   params.bams = "$baseDir/../test/*.bam"
-// params.bams="$baseDir/../test/*.bam"
+   params.peaks = "$baseDir/../test/*/broadPeak"
-   params.testpath="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/"
+   params.genomepath="/project/shared/bicf_workflow_ref/hg19/"
-   params.design="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/samplesheet.csv"
-   params.genomepath="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/hg19/"
   species = "hg19"
   toppeakcount = 200
-// design_file = file(params.design)
+   design_file = file(params.design)
-// bams=file(params.bams)
+   deeptools_design = Channel.fromPath(params.design)
-//gtf_file = file("$params.genome/gencode.gtf")
+   diffbind_design = Channel.fromPath(params.design)
-//genenames = file("$params.genome/genenames.txt")
+   chipseeker_design = Channel.fromPath(params.design)
-//geneset = file("$params.genome/gsea_gmt/$params.geneset")
+   meme_design = Channel.fromPath(params.design)
+   index_bams = Channel.fromPath(params.bams)
+   deeptools_bams = Channel.fromPath(params.bams) 
+   deeptools_peaks = Channel.fromPath(params.peaks) 
+   chipseeker_peaks = Channel.fromPath(params.peaks) 
+   diffbind_bams = Channel.fromPath(params.bams) 
+   diffbind_peaks = Channel.fromPath(params.peaks) 
+   meme_peaks = Channel.fromPath(params.peaks) 
+process bamindex {
-process processdesign {
   publishDir "$baseDir/output/", mode: 'copy'
-//   input:
+   input:
-//   file design_file from input
+     file index_bam_files from index_bams
-//   file annotation Tdx
   output:
-     file "new_design" into deeptools_design
+     file "*bai" into deeptools_bamindex
-     file "new_design" into diffbind_design
+     file "*bai" into diffbind_bamindex
-     file "new_design" into chipseeker_design
-     file "new_design" into meme_design
-     script:
+   script:
     """
     module load python/2.7.x-anaconda
     source activate /project/shared/bicf_workflow_ref/chipseq_bchen4/
-     python $baseDir/scripts/preprocessDesign.py -i ${params.design} 
+     module load samtools/intel/1.3
+     samtools index ${index_bam_files} 
     """
 }
@@ -38,23 +39,28 @@ process run_deeptools {
   publishDir "$baseDir/output", mode: 'copy'
   input:
     file deeptools_design_file from deeptools_design
-   file annotation Tdx
+     file deeptools_bam_files from deeptools_bams.toList()
+     file deeptools_peak_files from deeptools_peaks.toList()
+     file deeptools_bam_indexes from deeptools_bamindex.toList()
   output:
     stdout result
     script:
     """
     module load python/2.7.x-anaconda
     source activate /project/shared/bicf_workflow_ref/chipseq_bchen4/
-     module load deeptools/2.3.5 
+     module load deeptools/2.3.5
-     python $baseDir/scripts/runDeepTools.py -i $deeptools_design_file -g ${params.genomepath}}
+     python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}}
 """
 }
 process run_diffbind {
-//   publishDir "$baseDir/output", mode: 'copy'
+   publishDir "$baseDir/output", mode: 'copy'
   input:
     file diffbind_design_file from diffbind_design
+     file diffbind_bam_files from diffbind_bams.toList()
+     file diffbind_peak_files from diffbind_peaks.toList()
+       file diffbind_ban_indexes from diffbind_bamindex.toList()
   output:
     file "diffpeak.design" into diffpeaksdesign_chipseeker
     file "diffpeak.design" into diffpeaksdesign_meme
@@ -74,7 +80,7 @@ process run_chipseeker_diffpeak {
     file diffpeak_design_file from diffpeaksdesign_chipseeker
     file diffpeaks from diffpeaks_chipseeker
   output:
-     stdout result
+     stdout result_chipseeker
   script:
     """
     module load python/2.7.x-anaconda
@@ -84,9 +90,10 @@ process run_chipseeker_diffpeak {
 }
 process run_chipseeker_originalpeak {
-//   publishDir "$baseDir/output", mode: 'copy'
+   publishDir "$baseDir/output", mode: 'copy'
   input:
     file design_file from chipseeker_design
+     file chipseeker_peak_files from chipseeker_peaks
   output:
     stdout result1
   script:
@@ -101,6 +108,7 @@ process run_meme_original {
   publishDir "$baseDir/output", mode: 'copy'
   input:
     file design_meme from meme_design
+     file meme_peak_files from meme_peaks
   output:
     stdout result_meme_original
   script:

--- a/workflow/scripts/runMemechip.py
+++ b/workflow/scripts/runMemechip.py
@@ -40,7 +40,7 @@ def main():
  #run(args.infile, args.genome, args.limit, args.output)
  #get Pool ready
  dfile = pd.read_csv(args.infile)
-  meme_arglist =  zip(dfile['Peaks'].tolist(),[args.genome+"hg19.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist())  
+  meme_arglist =  zip(dfile['Peaks'].tolist(),[args.genome+"genome.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist())  
  work_pool = Pool(min(12,dfile.shape[0]))
  resultList =  work_pool.map(run_wrapper, meme_arglist)
  work_pool.close()