astrocyte_pkg.yml

#
# metadata for the example astrocyte ChipSeq workflow package
#

# -----------------------------------------------------------------------------
# BASIC INFORMATION
# -----------------------------------------------------------------------------

# A unique identifier for the workflow package, text/underscores only
name: 'rnaseq_bicf'
# Who wrote this?
author: 'Brandi Cantarel'
# A contact email address for questions
email: 'biohpc-help@utsouthwestern.edu'
# A more informative title for the workflow package
title: 'BICF RNASeq Analysis Workflow'
# A summary of the workflow package in plain text
description: |
  This is a workflow package for the BioHPC/BICF RNASeq workflow system.
  It implements differential expression analysis, gene set enrichment analysis,
  gene fusion analysis and variant identification using RNASeq data.
  

# -----------------------------------------------------------------------------
# DOCUMENTATION
# -----------------------------------------------------------------------------

# A list of documentation file in .md format that should be viewable from the
# web interface. These files are in the 'docs' subdirectory. The first file
# listed will be used as a documentation index and is index.md by convention
documentation_files:
  - 'index.md'

# -----------------------------------------------------------------------------
# NEXTFLOW WORKFLOW CONFIGURATION
# -----------------------------------------------------------------------------

# Remember - The workflow file is always named 'workflow/main.f'
#            The workflow must publish all final output into $baseDir

# A list of clueter environment modules that this workflow requires to run.
# Specify versioned module names to ensure reproducability.
workflow_modules:
  - 'trimgalore/0.4.1'
  - 'cutadapt/1.9.1'
  - 'hisat2/2.0.1-beta-intel'
  - 'samtools/intel/1.3'
  - 'picard/1.127'
  - 'subread/1.5.0-intel'
  - 'stringtie/1.1.2-intel'
  - 'speedseq/20160506'
  - 'python/2.7.x-anaconda'
# A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter:
#
# REQUIRED INFORMATION
#  id:         The name of the parameter in the NEXTFLOW workflow
#  type:       The type of the parameter, one of:
#                string    - A free-format string
#                integer   - An integer
#                real      - A real number
#                file      - A single file from user data
#                files     - One or more files from user data
#                select    - A selection from a list of values
#  required:    true/false, must the parameter be entered/chosen?
#  description: A user friendly description of the meaning of the parameter
#
# OPTIONAL INFORMATION
#  default:   A default value for the parameter (optional)
#  min:       Minium value/characters/files for number/string/files types
#  max:       Maxumum value/characters/files for number/string/files types
#  regex:     A regular expression that describes valid entries / filenames
#
# SELECT TYPE
#  choices:   A set of choices presented to the user for the parameter.
#             Each choice is a pair of value and description, e.g.
#
#             choices:
#               - [ 'myval', 'The first option']
#               - [ 'myval', 'The second option']
#
# NOTE - All parameters are passed to NEXTFLOW as strings... but they
#        are validated by astrocyte using the information provided above

workflow_parameters:

  - id: fastqs
    type: files
    required: true
    description: |
      One or more input paired-end FASTQ files from a RNASeq experiment and a design file with the link between the same name and the sample group
    regex: ".*(fastq|fq)*"
    min: 1

  - id: stranded
    type: select
    required: true
    choices:
      - [ '0', 'Unstranded']
      - [ '1', 'Stranded']
      - [ '2', 'Reverse Stranded']
    description: |
      In the case that the sequence libraries where generated using a stranded specific protocol.
      
  - id: pairs
    type: select
    required: true
    choices:
      - [ 'pe', 'Paired End']
      - [ 'se', 'Single End']
    description: |
      In single-end sequencing, the sequencer reads a fragment from only one end to the other, generating the sequence of base pairs. In paired-end reading it starts at one read, finishes this direction at the specified read length, and then starts another round of reading from the opposite end of the fragment.

  - id: align
    type: select
    required: true
    choices:
      - [ 'hisat', 'HiSAT2']
      - [ 'star', 'STAR']
    description: |
      Alignment tool
      
  - id: fusion
    type: select
    required: true
    choices:
      - [ 'skip', 'No Fusion Detection']
      - [ 'detect', 'Fusion Detection']
    description: |
      Run Star Fusion

  - id: markdups
    type: select
    required: true
    choices:
      - [ 'picard', 'Remove Duplicates']
      - [ 'null', 'Keep All Sequences']
    description: |
      Duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read pairs having identical 5-prime positions (coordinate and strand) for both reads in a mate pair and optionally, matching unique molecular identifier reads.

  - id: dea
    type: select
    required: true
    choices:
      - [ 'detect', 'Run Statistical Analysis']
      - [ 'skip', 'Skip Statistical Analysis']
    description: |
      Runs deSEq2 and EdgeR

  - id: design
    type: file
    required: true
    regex: ".*txt"
    description: |
      A design file listing pairs of sample name and sample group.
      Columns must include: SampleID,SampleName,SampleGroup,FullPathToFqR1,FullPathToFqR2

  - id: genome
    type: select
    choices:
      - [ '/project/shared/bicf_workflow_ref/human/GRCh38', 'Human GRCh38']
      - [ '/project/shared/bicf_workflow_ref/human/GRCh37', 'Human GRCh37']
      - [ '/project/shared/bicf_workflow_ref/mouse/GRCm38', 'Mouse GRCm38']
    required: true
    description: |
      Reference genome for alignment

  - id: geneset
    type: select
    choices:
      - ['h.all.v5.1.symbols.gmt','Hallmark Gene Sets']
      - ['c2.all.v5.1.symbols.gmt','Curated Gene Sets']
      - ['c3.all.v5.1.symbols.gmt','Motif Gene Sets']
      - ['c5.all.v5.1.entrez.gmt','Gene Ontology Gene Sets']
      - ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures']
      - ['c7.all.v5.1.entrez.gmt','Immunological Signatures']
      
    required: true
    description: |
      Gene Set Definitions used for QuSAGE Analysis -- see http://software.broadinstitute.org/gsea/msigdb/ for geneset descriptions


# -----------------------------------------------------------------------------
# SHINY APP CONFIGURATION
# -----------------------------------------------------------------------------

# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
#            The workflow must publish all final output into $baseDir

# Name of the R module that the vizapp will run against
vizapp_r_module: 'R/3.4.1-gccmkl'

# List of any CRAN packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_cran_packages:
  - sqldf
  - crosstalk
  - htmltools
  - htmlwidgets
  - httpuv
  - shiny
  - Vennerable
  - DT
  - grid
  - gridExtra
  - ggplot2
  - gplots
  - gtools
  - RColorBrewer
  - tidyverse


# # List of any Bioconductor packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_bioc_packages:
  - qusage
  - ballgown
  - edgeR
  - DESeq2
vizapp_github_packages:
  - js229/Vennerable