astrocyte_pkg.yml

#
# metadata for the example astrocyte ChipSeq workflow package
#

# -----------------------------------------------------------------------------
# BASIC INFORMATION
# -----------------------------------------------------------------------------

# A unique identifier for the workflow package, text/underscores only
name: 'chipseq_analysis_bicf'
# Who wrote this?
author: 'Beibei Chen'
# A contact email address for questions
email: 'biohpc-help@utsouthwestern.edu'
# A more informative title for the workflow package
title: 'BICF ChIP-seq Analysis Workflow'
# A summary of the workflow package in plain text
description: |
  This is a workflow package for the BioHPC/BICF ChIP-seq workflow system.
  It implements a simple ChIP-seq analysis workflow using deepTools, Diffbind, ChipSeeker and MEME-ChIP, visualization application.

# -----------------------------------------------------------------------------
# DOCUMENTATION
# -----------------------------------------------------------------------------

# A list of documentation file in .md format that should be viewable from the
# web interface. These files are in the 'docs' subdirectory. The first file
# listed will be used as a documentation index and is index.md by convention
documentation_files:
  - ['index.md', 'chipseq-analysis']

# -----------------------------------------------------------------------------
# NEXTFLOW WORKFLOW CONFIGURATION
# -----------------------------------------------------------------------------

# Remember - The workflow file is always named 'workflow/main.f'
#            The workflow must publish all final output into $baseDir

# A list of clueter environment modules that this workflow requires to run.
# Specify versioned module names to ensure reproducability.
workflow_modules:
  - 'deeptools/2.3.5'
  - 'meme/4.11.1-gcc-openmpi'

# A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter:
#
# REQUIRED INFORMATION
#  id:         The name of the parameter in the NEXTFLOW workflow
#  type:       The type of the parameter, one of:
#                string    - A free-format string
#                integer   - An integer
#                real      - A real number
#                file      - A single file from user data
#                files     - One or more files from user data
#                select    - A selection from a list of values
#  required:    true/false, must the parameter be entered/chosen?
#  description: A user friendly description of the meaning of the parameter
#
# OPTIONAL INFORMATION
#  default:   A default value for the parameter (optional)
#  min:       Minium value/characters/files for number/string/files types
#  max:       Maxumum value/characters/files for number/string/files types
#  regex:     A regular expression that describes valid entries / filenames
#
# SELECT TYPE
#  choices:   A set of choices presented to the user for the parameter.
#             Each choice is a pair of value and description, e.g.
#
#             choices:
#               - [ 'myval', 'The first option']
#               - [ 'myval', 'The second option']
#
# NOTE - All parameters are passed to NEXTFLOW as strings... but they
#        are validated by astrocyte using the information provided above

workflow_parameters:

  - id: bams
    type: files
    required: true
    description: |
      Bam files of all samples
    regex: ".*(bam|BAM)"

  - id: peaks
    type: files
    required: true
    description: |
      Peak files of all samples. Peaks should be sorted by user using either p_value or intensity of the signals.Bed format.
    regex: ".*(narrowPeak|broadPeak|bed|BED)"


  - id: design
    type: files
    required: true
    regex: ".*(csv)"
    description: |
      A design file listing pairs of sample name and sample group. Must be in csv format
      Columns must include: SampleID,Tissue, Factor, Condition, Replicate, Peaks, bamReads, bamControl, ControlID, PeakCaller

  - id: genomepath
    type: select
    choices:
      - [ '/project/shared/bicf_workflow_ref/GRCh38', 'human GRCh38']
      - [ '/project/shared/bicf_workflow_ref/GRCh37', 'human GRCh37']
      - [ '/project/shared/bicf_workflow_ref/GRCm38', 'mouse GRCm38']
    required: true
    description: |
      Reference genome for annotation

  - id: toppeakcount
    type: integer
    required: true
    description: |
      The number of top peaks to use for motif discovery. This program won't sort peak BED files for you, so please make sure your peak files are already sorted. Default : -1, use all the peaks
    default: -1

# -----------------------------------------------------------------------------
# SHINY APP CONFIGURATION
# -----------------------------------------------------------------------------

# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
#            The workflow must publish all final output into $baseDir

# Name of the R module that the vizapp will run against
vizapp_r_module: 'R/3.2.1-intel'

# List of any CRAN packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_cran_packages:
  - shiny
  - shinyFiles

# # List of any Bioconductor packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_bioc_packages:
  - qusage
#  - ballgown
vizapp_github_packages:
  - js229/Vennerable