#
# metadata for the  Astrocyte CellRanger mkfastq workflow package
#

# -----------------------------------------------------------------------------
# BASIC INFORMATION
# -----------------------------------------------------------------------------

# A unique identifier for the workflow package, text/underscores only
name: 'cellranger_count'
# Who wrote this?
author: 'Gervaise Henry and Venkat Malladi'
# A contact email address for questions
email: 'bicf@utsouthwestern.edu'
# A more informative title for the workflow package
title: 'BICF CellRanger count Workflow'
# A summary of the workflow package in plain text
description: |
  This is a workflow package for the BICF/Strand Lab CellRanger count workflow system.
  It implements 10x CellRanger count analysis workflow application.

# -----------------------------------------------------------------------------
# DOCUMENTATION
# -----------------------------------------------------------------------------

# A list of documentation file in .md format that should be viewable from the
# web interface. These files are in the 'docs' subdirectory. The first file
# listed will be used as a documentation index and is index.md by convention
documentation_files:
  - 'index.md'

# -----------------------------------------------------------------------------
# NEXTFLOW WORKFLOW CONFIGURATION
# -----------------------------------------------------------------------------

# Remember - The workflow file is always named 'workflow/main.nf'
#            The workflow must publish all final output into $baseDir

# A list of cluster environment modules that this workflow requires to run.
# Specify versioned module names to ensure reproducability.
workflow_modules:
  - 'python/3.6.1-2-anaconda'
  - 'cellranger/2.1.1'
  - 'cellranger/3.0.1'
  - 'cellranger/3.0.2'
  - 'bcl2fastq/2.17.1.14'

# A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter:
#
# REQUIRED INFORMATION
#  id:         The name of the parameter in the NEXTFLOW workflow
#  type:       The type of the parameter, one of:
#                string    - A free-format string
#                integer   - An integer
#                real      - A real number
#                file      - A single file from user data
#                files     - One or more files from user data
#                select    - A selection from a list of values
#  required:    true/false, must the parameter be entered/chosen?
#  description: A user friendly description of the meaning of the parameter
#
# OPTIONAL INFORMATION
#  default:   A default value for the parameter (optional)
#  min:       Minium value/characters/files for number/string/files types
#  max:       Maxumum value/characters/files for number/string/files types
#  regex:     A regular expression that describes valid entries / filenames
#
# SELECT TYPE
#  choices:   A set of choices presented to the user for the parameter.
#             Each choice is a pair of value and description, e.g.
#
#             choices:
#               - [ 'myval', 'The first option']
#               - [ 'myval', 'The second option']
#
# NOTE - All parameters are passed to NEXTFLOW as strings... but they
#        are validated by astrocyte using the information provided above

workflow_parameters:

  - id: fastq
    type: files
    required: true
    description: |
      Pairs (read1 and read2) of fastq.gz files from a sequencing of 10x single-cell expereiment. Index fastq not required.
    regex: ".*fastq.gz"
    min: 2

  - id: designFile
    type: file
    required: true
    regex: ".*csv"
    description: |
      A design file listing sample, corresponding read1 filename, corresponding read2 filename. There can be multiple rows with the same sample name, if there are multiple fastq's for that sample.

  - id: genome
    type: select
    choices:
      - [ 'GRCh38-3.0.0', 'Human GRCh38 release 93']
      - [ 'GRCh38-1.2.0', 'Human GRCh38 release 84']
      - [ 'hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
      - [ 'hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
      - [ 'mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
      - [ 'mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
      - [ 'hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93']
      - [ 'hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84']
      - [ 'ercc92-1.2.0', 'ERCC.92 Spike-In']
    required: true
    description: |
      Reference species and genome used for alignment and subsequent analysis.

  - id: expectCells
    type: integer
    default: 3000
    min: 0
    max: 10000
    required: false
    description: |
      Expected number of recovered cells.

  - id: forceCells
    type: integer
    default: 0
    min: 0
    max: 10000
    required: false
    description: |
      Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells.

  - id: kitVersion
    type: select
    default: 'auto'
    choices:
      - [ 'auto', 'Auto Detect']
      - [ 'three', '3']
      - [ 'two', '2']
    required: true
    description: |
      10x single cell gene expression chemistry version (only used in cellranger version 2.x).

  - id: version
    type: select
    default: '3.0.2'
    choices:
      - [ '3.0.2', '3.0.2']
      - [ '3.0.1', '3.0.1']
      - [ '2.1.1', '2.1.1']
    required: true
    description: |
      10x cellranger version.

  - id: astrocyte
    type: select
    choices:
      - [ 'true', 'true' ]
    required: true
    default: 'true'
    description: |
      Ensure configuraton for astrocyte.

# -----------------------------------------------------------------------------
# SHINY APP CONFIGURATION
# -----------------------------------------------------------------------------

# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
#            The workflow must publish all final output into $baseDir

# Name of the R module that the vizapp will run against
vizapp_r_module: 'R/3.2.1-intel'

# List of any CRAN packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_cran_packages:
  - shiny
  - shinyFiles

# List of any Bioconductor packages, not provided by the modules,
# that must be made available to the vizapp
vizapp_bioc_packages:
  - chipseq