# # metadata for the example astrocyte ChipSeq workflow package # # ----------------------------------------------------------------------------- # BASIC INFORMATION # ----------------------------------------------------------------------------- # A unique identifier for the workflow package, text/underscores only name: 'rnaseq_bicf' # Who wrote this? author: 'Brandi Cantarel' # A contact email address for questions email: 'biohpc-help@utsouthwestern.edu' # A more informative title for the workflow package title: 'BICF RNASeq Analysis Workflow' # A summary of the workflow package in plain text description: | This is a workflow package for the BioHPC/BICF RNASeq workflow system. It implements differential expression analysis, gene set enrichment analysis, gene fusion analysis and variant identification using RNASeq data. # ----------------------------------------------------------------------------- # DOCUMENTATION # ----------------------------------------------------------------------------- # A list of documentation file in .md format that should be viewable from the # web interface. These files are in the 'docs' subdirectory. The first file # listed will be used as a documentation index and is index.md by convention documentation_files: - 'index.md' # ----------------------------------------------------------------------------- # NEXTFLOW WORKFLOW CONFIGURATION # ----------------------------------------------------------------------------- # Remember - The workflow file is always named 'workflow/main.f' # The workflow must publish all final output into $baseDir # A list of clueter environment modules that this workflow requires to run. # Specify versioned module names to ensure reproducability. workflow_modules: - 'trimgalore/0.4.1' - 'cutadapt/1.9.1' - 'hisat2/2.0.1-beta-intel' - 'samtools/intel/1.3' - 'picard/1.127' - 'subread/1.5.0-intel' - 'stringtie/1.1.2-intel' - 'speedseq/20160506' - 'python/2.7.x-anaconda' # A list of parameters used by the workflow, defining how to present them, # options etc in the web interface. For each parameter: # # REQUIRED INFORMATION # id: The name of the parameter in the NEXTFLOW workflow # type: The type of the parameter, one of: # string - A free-format string # integer - An integer # real - A real number # file - A single file from user data # files - One or more files from user data # select - A selection from a list of values # required: true/false, must the parameter be entered/chosen? # description: A user friendly description of the meaning of the parameter # # OPTIONAL INFORMATION # default: A default value for the parameter (optional) # min: Minium value/characters/files for number/string/files types # max: Maxumum value/characters/files for number/string/files types # regex: A regular expression that describes valid entries / filenames # # SELECT TYPE # choices: A set of choices presented to the user for the parameter. # Each choice is a pair of value and description, e.g. # # choices: # - [ 'myval', 'The first option'] # - [ 'myval', 'The second option'] # # NOTE - All parameters are passed to NEXTFLOW as strings... but they # are validated by astrocyte using the information provided above workflow_parameters: - id: fastqs type: files required: true description: | One or more input paired-end FASTQ files from a RNASeq experiment regex: ".*(fastq|fq)*" min: 1 - id: stranded type: select required: true choices: - [ '0', 'Unstranded'] - [ '1', 'Stranded'] - [ '2', 'Reverse Stranded'] description: | In the case that the sequence libraries where generated using a stranded specific protocol. - id: pairs type: select required: true choices: - [ 'pe', 'Paired End'] - [ 'se', 'Single End'] description: | In single-end sequencing, the sequencer reads a fragment from only one end to the other, generating the sequence of base pairs. In paired-end reading it starts at one read, finishes this direction at the specified read length, and then starts another round of reading from the opposite end of the fragment. - id: align type: select required: true choices: - [ 'hisat', 'HiSAT2'] - [ 'star', 'STAR'] description: | Alignment tool - id: fusion type: select required: true choices: - [ 'skip', 'No Fusion Detection'] - [ 'detect', 'Fusion Detection'] description: | Run Star Fusion - id: markdups type: select required: true choices: - [ 'picard', 'Remove Duplicates'] - [ 'null', 'Keep All Sequences'] description: | Duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read pairs having identical 5-prime positions (coordinate and strand) for both reads in a mate pair and optionally, matching unique molecular identifier reads. - id: dea type: select required: true choices: - [ 'detect', 'Run Statistical Analysis'] - [ 'skip', 'Skip Statistical Analysis'] description: | Runs deSEq2 and EdgeR - id: design type: file required: true regex: ".*txt" description: | A design file listing pairs of sample name and sample group. Columns must include: SampleID,SampleName,SampleGroup,FullPathToFqR1,FullPathToFqR2 - id: genome type: select choices: - [ '/project/shared/bicf_workflow_ref/human/GRCh38', 'Human GRCh38'] - [ '/project/shared/bicf_workflow_ref/human/GRCh37', 'Human GRCh37'] - [ '/project/shared/bicf_workflow_ref/mouse/GRCm38', 'Mouse GRCm38'] - [ '/project/shared/bicf_workflow_ref/mouse/GRCm39', 'Mouse GRCm39'] required: true description: | Reference genome for alignment - id: geneset type: select choices: - ['h.all.v6.2.symbols.gmt','Hallmark Gene Sets'] - ['c2.all.v5.1.symbols.gmt','Curated Gene Sets'] - ['c3.all.v5.1.symbols.gmt','Motif Gene Sets'] - ['c5.all.v6.2.symbols.gmt','Gene Ontology Gene Sets'] - ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures'] - ['c7.all.v5.1.symbols.gmt','Immunological Signatures'] required: true description: | Gene Set Definitions used for QuSAGE Analysis -- see http://software.broadinstitute.org/gsea/msigdb/ for geneset descriptions # ----------------------------------------------------------------------------- # SHINY APP CONFIGURATION # ----------------------------------------------------------------------------- # Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R' # The workflow must publish all final output into $baseDir # Name of the R module that the vizapp will run against vizapp_r_module: 'R/3.4.1-gccmkl' # List of any CRAN packages, not provided by the modules, that must be made # available to the vizapp vizapp_cran_packages: - sqldf - crosstalk - htmltools - htmlwidgets - httpuv - shiny - Vennerable - DT - grid - gridExtra - ggplot2 - gplots - gtools - RColorBrewer - tidyverse # # List of any Bioconductor packages, not provided by the modules, that must be made # available to the vizapp vizapp_bioc_packages: - qusage - ballgown - edgeR - DESeq2 vizapp_github_packages: - js229/Vennerable