Add new file

3d70fc56 · Beibei Chen · 4e1e0b7c · 3d70fc56
Commit 3d70fc56 authored 8 years ago by Beibei Chen
--- a/astrocyte_package.yml
+++ b/astrocyte_package.yml
+#
+# metadata for the example astrocyte ChipSeq workflow package
+#
+
+# -----------------------------------------------------------------------------
+# BASIC INFORMATION
+# -----------------------------------------------------------------------------
+
+# A unique identifier for the workflow package, text/underscores only
+name: 'rnaseq_bicf'
+# Who wrote this?
+author: 'Brandi Cantarel'
+# A contact email address for questions
+email: 'biohpc-help@utsouthwestern.edu'
+# A more informative title for the workflow package
+title: 'BICF RNASeq Analysis Workflow'
+# A summary of the workflow package in plain text
+description: |
+  This is a workflow package for the BioHPC/BICF RNASeq workflow system.
+  It implements a simple RNASeq analysis workflow using TrimGalore, HiSAT,FeatureCounts,
+  StringTie and statistical analysis using EdgeR and Ballgown, plus a simple R Shiny
+  visualization application.
+
+# -----------------------------------------------------------------------------
+# DOCUMENTATION
+# -----------------------------------------------------------------------------
+
+# A list of documentation file in .md format that should be viewable from the
+# web interface. These files are in the 'docs' subdirectory. The first file
+# listed will be used as a documentation index and is index.md by convention
+documentation_files:
+  - 'index.md'
+
+# -----------------------------------------------------------------------------
+# NEXTFLOW WORKFLOW CONFIGURATION
+# -----------------------------------------------------------------------------
+
+# Remember - The workflow file is always named 'workflow/main.f'
+#            The workflow must publish all final output into $baseDir
+
+# A list of clueter environment modules that this workflow requires to run.
+# Specify versioned module names to ensure reproducability.
+workflow_modules:
+  - 'trimgalore/0.4.1'
+  - 'cutadapt/1.9.1'
+  - 'hisat2/2.0.1-beta-intel'
+  - 'samtools/intel/1.3'
+  - 'picard/1.127'
+  - 'subread/1.5.0-intel'
+  - 'stringtie/1.1.2-intel'
+  - 'speedseq/20160506'
+# A list of parameters used by the workflow, defining how to present them,
+# options etc in the web interface. For each parameter:
+#
+# REQUIRED INFORMATION
+#  id:         The name of the parameter in the NEXTFLOW workflow
+#  type:       The type of the parameter, one of:
+#                string    - A free-format string
+#                integer   - An integer
+#                real      - A real number
+#                file      - A single file from user data
+#                files     - One or more files from user data
+#                select    - A selection from a list of values
+#  required:    true/false, must the parameter be entered/chosen?
+#  description: A user friendly description of the meaning of the parameter
+#
+# OPTIONAL INFORMATION
+#  default:   A default value for the parameter (optional)
+#  min:       Minium value/characters/files for number/string/files types
+#  max:       Maxumum value/characters/files for number/string/files types
+#  regex:     A regular expression that describes valid entries / filenames
+#
+# SELECT TYPE
+#  choices:   A set of choices presented to the user for the parameter.
+#             Each choice is a pair of value and description, e.g.
+#
+#             choices:
+#               - [ 'myval', 'The first option']
+#               - [ 'myval', 'The second option']
+#
+# NOTE - All parameters are passed to NEXTFLOW as strings... but they
+#        are validated by astrocyte using the information provided above
+
+workflow_parameters:
+
+  - id: fastqs
+    type: files
+    required: true
+    description: |
+      One or more input paired-end FASTQ files from a RNASeq experiment and a design file with the link between the same name and the sample group
+    regex: ".*(fastq|fq)*"
+    min: 1
+
+  - id: stranded
+    type: select
+    required: true
+    choices:
+      - [ '0', 'Unstranded']
+      - [ '1', 'Stranded']
+      - [ '2', 'Reverse Stranded']
+    description: |
+      In the case that the sequence libraries where generated using a stranded specific protocol.
+      
+  - id: pairs
+    type: select
+    required: true
+    choices:
+      - [ 'pe', 'Paired End']
+      - [ 'se', 'Single End']
+    description: |
+      In single-end sequencing, the sequencer reads a fragment from only one end to the other, generating the sequence of base pairs. In paired-end reading it starts at one read, finishes this direction at the specified read length, and then starts another round of reading from the opposite end of the fragment.
+
+  - id: align
+    type: select
+    required: true
+    choices:
+      - [ 'hisat', 'HiSAT2']
+      - [ 'star', 'STAR']
+    description: |
+      Alignment tool
+      
+  - id: markdups
+    type: select
+    required: true
+    choices:
+      - [ 'mark', 'Remove Duplicates']
+      - [ 'keep', 'Keep All Sequences']
+    description: |
+      Duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read pairs having identical 5-prime positions (coordinate and strand) for both reads in a mate pair and optionally, matching unique molecular identifier reads.
+
+  - id: design
+    type: file
+    required: true
+    regex: ".*txt"
+    description: |
+      A design file listing pairs of sample name and sample group.
+      Columns must include: SampleID,SampleName,SampleGroup,FullPathToFqR1,FullPathToFqR2
+
+  - id: genome
+    type: select
+    choices:
+      - [ '/project/shared/bicf_workflow_ref/GRCh38', 'Human GRCh38']
+      - [ '/project/shared/bicf_workflow_ref/GRCh37', 'Human GRCh37']
+      - [ '/project/shared/bicf_workflow_ref/GRCm38', 'Mouse GRCh38']
+    required: true
+    description: |
+      Reference genome for alignment
+
+  - id: geneset
+    type: select
+    choices:
+      - ['h.all.v5.1.symbols.gmt','Hallmark Gene Sets']
+      - ['c2.all.v5.1.symbols.gmt','Curated Gene Sets']
+      - ['c3.all.v5.1.symbols.gmt','Motif Gene Sets']
+      - ['c5.all.v5.1.entrez.gmt','Gene Ontology Gene Sets']
+      - ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures']
+      - ['c7.all.v5.1.entrez.gmt','Immunological Signatures']
+      
+    required: true
+    description: |
+      Gene Set Definitions used for QuSAGE Analysis -- see http://software.broadinstitute.org/gsea/msigdb/ for geneset descriptions
+
+
+# -----------------------------------------------------------------------------
+# SHINY APP CONFIGURATION
+# -----------------------------------------------------------------------------
+
+# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
+#            The workflow must publish all final output into $baseDir
+
+# Name of the R module that the vizapp will run against
+vizapp_r_module: 'R/3.2.1-intel'
+
+# List of any CRAN packages, not provided by the modules, that must be made
+# available to the vizapp
+vizapp_cran_packages:
+  - sqldf
+  - shiny
+  - Vennerable
+  - DT
+  - ggplot2
+  - gplots
+  - gtools
+  - RColorBrewer
+
+
+# # List of any Bioconductor packages, not provided by the modules, that must be made
+# available to the vizapp
+vizapp_bioc_packages:
+  - qusage
+  - ballgown
+  - edgeR
+  - DESeq2
+vizapp_github_packages:
+  - js229/Vennerable
+