Newer
Older
#
# metadata for the example astrocyte ChipSeq workflow package
#
# -----------------------------------------------------------------------------
# BASIC INFORMATION
# -----------------------------------------------------------------------------
# A unique identifier for the workflow package, text/underscores only
name: 'rnaseq_bicf'
# Who wrote this?
author: 'Brandi Cantarel'
# A contact email address for questions
email: 'biohpc-help@utsouthwestern.edu'
# A more informative title for the workflow package
title: 'BICF RNASeq Analysis Workflow'
# A summary of the workflow package in plain text
description: |
This is a workflow package for the BioHPC/BICF RNASeq workflow system.
It implements differential expression analysis, gene set enrichment analysis,
gene fusion analysis and variant identification using RNASeq data.
# The minimum Astrocyte version that requires to run this workflow. For old pipelines, which do not have this label
# a default value of 0.3.1 will be assigned automatically. A request of minimum version less than 0.4.0 will be ignored.
minimum_astrocyte_version: '0.4.1'
# The Nextflow version that requires to run this workflow. For old pipelines, which do not have this label
# a default value of 0.31.0 will be assigned automatically. Please make sure the requested nextflow version is available
# in the module list.
nextflow_version: '20.01.0'
# (Optional) The Nextflow config file to use for this workflow. If provided, the file should exist in workflow/configs
nextflow_config: 'nextflow.config'
# The container to use for this workflow, none/singularity. If omitted, the default value 'none' will be used.
container: 'none'
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# -----------------------------------------------------------------------------
# DOCUMENTATION
# -----------------------------------------------------------------------------
# A list of documentation file in .md format that should be viewable from the
# web interface. These files are in the 'docs' subdirectory. The first file
# listed will be used as a documentation index and is index.md by convention
documentation_files:
- 'index.md'
# -----------------------------------------------------------------------------
# NEXTFLOW WORKFLOW CONFIGURATION
# -----------------------------------------------------------------------------
# Remember - The workflow file is always named 'workflow/main.f'
# The workflow must publish all final output into $baseDir
# A list of clueter environment modules that this workflow requires to run.
# Specify versioned module names to ensure reproducability.
workflow_modules:
- 'trimgalore/0.4.1'
- 'cutadapt/1.9.1'
- 'hisat2/2.0.1-beta-intel'
- 'samtools/intel/1.3'
- 'picard/1.127'
- 'subread/1.5.0-intel'
- 'stringtie/1.1.2-intel'
- 'speedseq/20160506'
- 'python/2.7.x-anaconda'
# A list of parameters used by the workflow, defining how to present them,
# options etc in the web interface. For each parameter:
#
# REQUIRED INFORMATION
# id: The name of the parameter in the NEXTFLOW workflow
# type: The type of the parameter, one of:
# string - A free-format string
# integer - An integer
# real - A real number
# file - A single file from user data
# files - One or more files from user data
# select - A selection from a list of values
# required: true/false, must the parameter be entered/chosen?
# description: A user friendly description of the meaning of the parameter
#
# OPTIONAL INFORMATION
# default: A default value for the parameter (optional)
# min: Minium value/characters/files for number/string/files types
# max: Maxumum value/characters/files for number/string/files types
# regex: A regular expression that describes valid entries / filenames
#
# SELECT TYPE
# choices: A set of choices presented to the user for the parameter.
# Each choice is a pair of value and description, e.g.
#
# choices:
# - [ 'myval', 'The first option']
# - [ 'myval', 'The second option']
#
# NOTE - All parameters are passed to NEXTFLOW as strings... but they
# are validated by astrocyte using the information provided above
workflow_parameters:
- id: fastqs
type: files
required: true
description: |
One or more input paired-end FASTQ files from a RNASeq experiment
regex: ".*(fastq|fq)*"
min: 1
- id: stranded
type: select
required: true
choices:
- [ '0', 'Unstranded']
- [ '1', 'Stranded']
- [ '2', 'Reverse Stranded']
description: |
In the case that the sequence libraries where generated using a stranded specific protocol.
- id: pairs
type: select
required: true
choices:
- [ 'pe', 'Paired End']
- [ 'se', 'Single End']
description: |
In single-end sequencing, the sequencer reads a fragment from only one end to the other, generating the sequence of base pairs. In paired-end reading it starts at one read, finishes this direction at the specified read length, and then starts another round of reading from the opposite end of the fragment.
- id: align
type: select
required: true
choices:
- [ 'hisat', 'HiSAT2']
- [ 'star', 'STAR']
description: |
Alignment tool
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
- id: fusion
type: select
required: true
choices:
- [ 'skip', 'No Fusion Detection']
- [ 'detect', 'Fusion Detection']
description: |
Run Star Fusion
- id: markdups
type: select
required: true
choices:
- [ 'picard', 'Remove Duplicates']
- [ 'null', 'Keep All Sequences']
description: |
Duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read pairs having identical 5-prime positions (coordinate and strand) for both reads in a mate pair and optionally, matching unique molecular identifier reads.
- id: dea
type: select
required: true
choices:
- [ 'detect', 'Run Statistical Analysis']
- [ 'skip', 'Skip Statistical Analysis']
description: |
Runs deSEq2 and EdgeR
- id: design
type: file
required: true
regex: ".*txt"
description: |
A design file listing pairs of sample name and sample group.
Columns must include: SampleID,SampleName,SampleGroup,FullPathToFqR1,FullPathToFqR2
- id: genome
type: select
choices:
- [ '/project/shared/bicf_workflow_ref/human/GRCh38', 'Human GRCh38']
- [ '/project/shared/bicf_workflow_ref/human/GRCh37', 'Human GRCh37']
- [ '/project/shared/bicf_workflow_ref/mouse/GRCm38', 'Mouse GRCm38']
- [ '/project/shared/bicf_workflow_ref/mouse/GRCm39', 'Mouse GRCm39']
required: true
description: |
Reference genome for alignment
- id: geneset
type: select
choices:
- ['h.all.v6.2.symbols.gmt','Hallmark Gene Sets']
- ['c2.all.v5.1.symbols.gmt','Curated Gene Sets']
- ['c3.all.v5.1.symbols.gmt','Motif Gene Sets']
- ['c5.all.v6.2.symbols.gmt','Gene Ontology Gene Sets']
- ['c7.all.v5.1.symbols.gmt','Immunological Signatures']
required: true
description: |
Gene Set Definitions used for QuSAGE Analysis -- see http://software.broadinstitute.org/gsea/msigdb/ for geneset descriptions
# -----------------------------------------------------------------------------
# SHINY APP CONFIGURATION
# -----------------------------------------------------------------------------
# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R'
# The workflow must publish all final output into $baseDir
# Name of the R module that the vizapp will run against
# List of any CRAN packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_cran_packages:
- sqldf
- crosstalk
- htmltools
- htmlwidgets
- httpuv
# # List of any Bioconductor packages, not provided by the modules, that must be made
# available to the vizapp
vizapp_bioc_packages:
- qusage
- ballgown
- edgeR
- DESeq2
vizapp_github_packages: