From 014508e5d1d0a2113f0ad20376dff37c18f6f60e Mon Sep 17 00:00:00 2001 From: Beibei Chen <beibei.chen@utsouthwestern.edu> Date: Wed, 8 Feb 2017 09:09:17 -0600 Subject: [PATCH] Got rid of abs path --- astrocyte_package.yml | 132 -------------------------------- workflow/main.nf | 66 +++++++++------- workflow/scripts/runMemechip.py | 2 +- 3 files changed, 38 insertions(+), 162 deletions(-) delete mode 100644 astrocyte_package.yml diff --git a/astrocyte_package.yml b/astrocyte_package.yml deleted file mode 100644 index 2a9f17c..0000000 --- a/astrocyte_package.yml +++ /dev/null @@ -1,132 +0,0 @@ -# -# metadata for the example astrocyte ChipSeq workflow package -# - -# ----------------------------------------------------------------------------- -# BASIC INFORMATION -# ----------------------------------------------------------------------------- - -# A unique identifier for the workflow package, text/underscores only -name: 'chipseq_analysis_bicf' -# Who wrote this? -author: 'Beibei Chen' -# A contact email address for questions -email: 'biohpc-help@utsouthwestern.edu' -# A more informative title for the workflow package -title: 'BICF ChIP-seq Analysis Workflow' -# A summary of the workflow package in plain text -description: | - This is a workflow package for the BioHPC/BICF ChIP-seq workflow system. - It implements a simple ChIP-seq analysis workflow using deepTools, Diffbind, ChipSeeker and MEME-ChIP, visualization application. - -# ----------------------------------------------------------------------------- -# DOCUMENTATION -# ----------------------------------------------------------------------------- - -# A list of documentation file in .md format that should be viewable from the -# web interface. These files are in the 'docs' subdirectory. The first file -# listed will be used as a documentation index and is index.md by convention -documentation_files: - - 'index.md' - -# ----------------------------------------------------------------------------- -# NEXTFLOW WORKFLOW CONFIGURATION -# ----------------------------------------------------------------------------- - -# Remember - The workflow file is always named 'workflow/main.f' -# The workflow must publish all final output into $baseDir - -# A list of clueter environment modules that this workflow requires to run. -# Specify versioned module names to ensure reproducability. -workflow_modules: - - 'deeptools/2.3.5' - - 'meme/4.11.1-gcc-openmpi' - -# A list of parameters used by the workflow, defining how to present them, -# options etc in the web interface. For each parameter: -# -# REQUIRED INFORMATION -# id: The name of the parameter in the NEXTFLOW workflow -# type: The type of the parameter, one of: -# string - A free-format string -# integer - An integer -# real - A real number -# file - A single file from user data -# files - One or more files from user data -# select - A selection from a list of values -# required: true/false, must the parameter be entered/chosen? -# description: A user friendly description of the meaning of the parameter -# -# OPTIONAL INFORMATION -# default: A default value for the parameter (optional) -# min: Minium value/characters/files for number/string/files types -# max: Maxumum value/characters/files for number/string/files types -# regex: A regular expression that describes valid entries / filenames -# -# SELECT TYPE -# choices: A set of choices presented to the user for the parameter. -# Each choice is a pair of value and description, e.g. -# -# choices: -# - [ 'myval', 'The first option'] -# - [ 'myval', 'The second option'] -# -# NOTE - All parameters are passed to NEXTFLOW as strings... but they -# are validated by astrocyte using the information provided above - -workflow_parameters: - - - id: design - type: file - required: true - regex: ".*csv" - description: | - A design file listing pairs of sample name and sample group. - Columns must include: SampleID,Tissue, Factor, Condition, Replicate, Peaks, bamReads, bamControl, ControlID, PeakCaller - - - id: genome - type: select - choices: - - [ '/project/shared/bicf_workflow_ref/GRCh38', 'Human GRCh38'] - - [ '/project/shared/bicf_workflow_ref/GRCh37', 'Human GRCh37'] - - [ '/project/shared/bicf_workflow_ref/GRCm38', 'Mouse GRCh38'] - required: true - description: | - Reference genome for annotation - - - id: toppeak - type: integer - required: true - description: | - The number of top peaks to use for motif discovery. -# ----------------------------------------------------------------------------- -# SHINY APP CONFIGURATION -# ----------------------------------------------------------------------------- - -# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R' -# The workflow must publish all final output into $baseDir - -# Name of the R module that the vizapp will run against -vizapp_r_module: 'R/3.2.1-intel' - -# List of any CRAN packages, not provided by the modules, that must be made -# available to the vizapp -vizapp_cran_packages: - - sqldf - - shiny - - Vennerable - - DT - - ggplot2 - - gplots - - gtools - - RColorBrewer - - -# # List of any Bioconductor packages, not provided by the modules, that must be made -# available to the vizapp -vizapp_bioc_packages: - - qusage - - ballgown -vizapp_github_packages: - - js229/Vennerable - diff --git a/workflow/main.nf b/workflow/main.nf index 2095ec6..937e9ba 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -1,36 +1,37 @@ #!/usr/bin/env nextflow - -// Default parameter values to run tests -// params.bams="$baseDir/../test/*.bam" - params.testpath="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/" - params.design="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/samplesheet.csv" - params.genomepath="/project/BICF/BICF_Core/bchen4/chipseq_analysis/test/genome/hg19/" + params.design="$baseDir/../test/samplesheet.csv" + params.bams = "$baseDir/../test/*.bam" + params.peaks = "$baseDir/../test/*/broadPeak" + params.genomepath="/project/shared/bicf_workflow_ref/hg19/" species = "hg19" toppeakcount = 200 -// design_file = file(params.design) -// bams=file(params.bams) -//gtf_file = file("$params.genome/gencode.gtf") -//genenames = file("$params.genome/genenames.txt") -//geneset = file("$params.genome/gsea_gmt/$params.geneset") + design_file = file(params.design) + deeptools_design = Channel.fromPath(params.design) + diffbind_design = Channel.fromPath(params.design) + chipseeker_design = Channel.fromPath(params.design) + meme_design = Channel.fromPath(params.design) + index_bams = Channel.fromPath(params.bams) + deeptools_bams = Channel.fromPath(params.bams) + deeptools_peaks = Channel.fromPath(params.peaks) + chipseeker_peaks = Channel.fromPath(params.peaks) + diffbind_bams = Channel.fromPath(params.bams) + diffbind_peaks = Channel.fromPath(params.peaks) + meme_peaks = Channel.fromPath(params.peaks) - -process processdesign { +process bamindex { publishDir "$baseDir/output/", mode: 'copy' -// input: -// file design_file from input -// file annotation Tdx + input: + file index_bam_files from index_bams output: - file "new_design" into deeptools_design - file "new_design" into diffbind_design - file "new_design" into chipseeker_design - file "new_design" into meme_design - + file "*bai" into deeptools_bamindex + file "*bai" into diffbind_bamindex - script: + script: """ module load python/2.7.x-anaconda source activate /project/shared/bicf_workflow_ref/chipseq_bchen4/ - python $baseDir/scripts/preprocessDesign.py -i ${params.design} + module load samtools/intel/1.3 + samtools index ${index_bam_files} """ } @@ -38,23 +39,28 @@ process run_deeptools { publishDir "$baseDir/output", mode: 'copy' input: file deeptools_design_file from deeptools_design - file annotation Tdx + file deeptools_bam_files from deeptools_bams.toList() + file deeptools_peak_files from deeptools_peaks.toList() + file deeptools_bam_indexes from deeptools_bamindex.toList() output: stdout result script: """ module load python/2.7.x-anaconda source activate /project/shared/bicf_workflow_ref/chipseq_bchen4/ - module load deeptools/2.3.5 - python $baseDir/scripts/runDeepTools.py -i $deeptools_design_file -g ${params.genomepath}} + module load deeptools/2.3.5 + python $baseDir/scripts/runDeepTools.py -i ${params.design} -g ${params.genomepath}} """ } process run_diffbind { -// publishDir "$baseDir/output", mode: 'copy' + publishDir "$baseDir/output", mode: 'copy' input: file diffbind_design_file from diffbind_design + file diffbind_bam_files from diffbind_bams.toList() + file diffbind_peak_files from diffbind_peaks.toList() + file diffbind_ban_indexes from diffbind_bamindex.toList() output: file "diffpeak.design" into diffpeaksdesign_chipseeker file "diffpeak.design" into diffpeaksdesign_meme @@ -74,7 +80,7 @@ process run_chipseeker_diffpeak { file diffpeak_design_file from diffpeaksdesign_chipseeker file diffpeaks from diffpeaks_chipseeker output: - stdout result + stdout result_chipseeker script: """ module load python/2.7.x-anaconda @@ -84,9 +90,10 @@ process run_chipseeker_diffpeak { } process run_chipseeker_originalpeak { -// publishDir "$baseDir/output", mode: 'copy' + publishDir "$baseDir/output", mode: 'copy' input: file design_file from chipseeker_design + file chipseeker_peak_files from chipseeker_peaks output: stdout result1 script: @@ -101,6 +108,7 @@ process run_meme_original { publishDir "$baseDir/output", mode: 'copy' input: file design_meme from meme_design + file meme_peak_files from meme_peaks output: stdout result_meme_original script: diff --git a/workflow/scripts/runMemechip.py b/workflow/scripts/runMemechip.py index 0bc79d6..b2a15a3 100644 --- a/workflow/scripts/runMemechip.py +++ b/workflow/scripts/runMemechip.py @@ -40,7 +40,7 @@ def main(): #run(args.infile, args.genome, args.limit, args.output) #get Pool ready dfile = pd.read_csv(args.infile) - meme_arglist = zip(dfile['Peaks'].tolist(),[args.genome+"hg19.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist()) + meme_arglist = zip(dfile['Peaks'].tolist(),[args.genome+"genome.2bit"]*dfile.shape[0],[str(args.limit)]*dfile.shape[0],dfile['SampleID'].tolist()) work_pool = Pool(min(12,dfile.shape[0])) resultList = work_pool.map(run_wrapper, meme_arglist) work_pool.close() -- GitLab