From 3d70fc56c0c0e1e94fa3134f16c42ad6ba7ab722 Mon Sep 17 00:00:00 2001 From: Beibei Chen <beibei.chen@utsouthwestern.edu> Date: Mon, 21 Nov 2016 13:07:17 -0600 Subject: [PATCH] Add new file --- astrocyte_package.yml | 196 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 astrocyte_package.yml diff --git a/astrocyte_package.yml b/astrocyte_package.yml new file mode 100644 index 0000000..c12f063 --- /dev/null +++ b/astrocyte_package.yml @@ -0,0 +1,196 @@ +# +# metadata for the example astrocyte ChipSeq workflow package +# + +# ----------------------------------------------------------------------------- +# BASIC INFORMATION +# ----------------------------------------------------------------------------- + +# A unique identifier for the workflow package, text/underscores only +name: 'rnaseq_bicf' +# Who wrote this? +author: 'Brandi Cantarel' +# A contact email address for questions +email: 'biohpc-help@utsouthwestern.edu' +# A more informative title for the workflow package +title: 'BICF RNASeq Analysis Workflow' +# A summary of the workflow package in plain text +description: | + This is a workflow package for the BioHPC/BICF RNASeq workflow system. + It implements a simple RNASeq analysis workflow using TrimGalore, HiSAT,FeatureCounts, + StringTie and statistical analysis using EdgeR and Ballgown, plus a simple R Shiny + visualization application. + +# ----------------------------------------------------------------------------- +# DOCUMENTATION +# ----------------------------------------------------------------------------- + +# A list of documentation file in .md format that should be viewable from the +# web interface. These files are in the 'docs' subdirectory. The first file +# listed will be used as a documentation index and is index.md by convention +documentation_files: + - 'index.md' + +# ----------------------------------------------------------------------------- +# NEXTFLOW WORKFLOW CONFIGURATION +# ----------------------------------------------------------------------------- + +# Remember - The workflow file is always named 'workflow/main.f' +# The workflow must publish all final output into $baseDir + +# A list of clueter environment modules that this workflow requires to run. +# Specify versioned module names to ensure reproducability. +workflow_modules: + - 'trimgalore/0.4.1' + - 'cutadapt/1.9.1' + - 'hisat2/2.0.1-beta-intel' + - 'samtools/intel/1.3' + - 'picard/1.127' + - 'subread/1.5.0-intel' + - 'stringtie/1.1.2-intel' + - 'speedseq/20160506' +# A list of parameters used by the workflow, defining how to present them, +# options etc in the web interface. For each parameter: +# +# REQUIRED INFORMATION +# id: The name of the parameter in the NEXTFLOW workflow +# type: The type of the parameter, one of: +# string - A free-format string +# integer - An integer +# real - A real number +# file - A single file from user data +# files - One or more files from user data +# select - A selection from a list of values +# required: true/false, must the parameter be entered/chosen? +# description: A user friendly description of the meaning of the parameter +# +# OPTIONAL INFORMATION +# default: A default value for the parameter (optional) +# min: Minium value/characters/files for number/string/files types +# max: Maxumum value/characters/files for number/string/files types +# regex: A regular expression that describes valid entries / filenames +# +# SELECT TYPE +# choices: A set of choices presented to the user for the parameter. +# Each choice is a pair of value and description, e.g. +# +# choices: +# - [ 'myval', 'The first option'] +# - [ 'myval', 'The second option'] +# +# NOTE - All parameters are passed to NEXTFLOW as strings... but they +# are validated by astrocyte using the information provided above + +workflow_parameters: + + - id: fastqs + type: files + required: true + description: | + One or more input paired-end FASTQ files from a RNASeq experiment and a design file with the link between the same name and the sample group + regex: ".*(fastq|fq)*" + min: 1 + + - id: stranded + type: select + required: true + choices: + - [ '0', 'Unstranded'] + - [ '1', 'Stranded'] + - [ '2', 'Reverse Stranded'] + description: | + In the case that the sequence libraries where generated using a stranded specific protocol. + + - id: pairs + type: select + required: true + choices: + - [ 'pe', 'Paired End'] + - [ 'se', 'Single End'] + description: | + In single-end sequencing, the sequencer reads a fragment from only one end to the other, generating the sequence of base pairs. In paired-end reading it starts at one read, finishes this direction at the specified read length, and then starts another round of reading from the opposite end of the fragment. + + - id: align + type: select + required: true + choices: + - [ 'hisat', 'HiSAT2'] + - [ 'star', 'STAR'] + description: | + Alignment tool + + - id: markdups + type: select + required: true + choices: + - [ 'mark', 'Remove Duplicates'] + - [ 'keep', 'Keep All Sequences'] + description: | + Duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read pairs having identical 5-prime positions (coordinate and strand) for both reads in a mate pair and optionally, matching unique molecular identifier reads. + + - id: design + type: file + required: true + regex: ".*txt" + description: | + A design file listing pairs of sample name and sample group. + Columns must include: SampleID,SampleName,SampleGroup,FullPathToFqR1,FullPathToFqR2 + + - id: genome + type: select + choices: + - [ '/project/shared/bicf_workflow_ref/GRCh38', 'Human GRCh38'] + - [ '/project/shared/bicf_workflow_ref/GRCh37', 'Human GRCh37'] + - [ '/project/shared/bicf_workflow_ref/GRCm38', 'Mouse GRCh38'] + required: true + description: | + Reference genome for alignment + + - id: geneset + type: select + choices: + - ['h.all.v5.1.symbols.gmt','Hallmark Gene Sets'] + - ['c2.all.v5.1.symbols.gmt','Curated Gene Sets'] + - ['c3.all.v5.1.symbols.gmt','Motif Gene Sets'] + - ['c5.all.v5.1.entrez.gmt','Gene Ontology Gene Sets'] + - ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures'] + - ['c7.all.v5.1.entrez.gmt','Immunological Signatures'] + + required: true + description: | + Gene Set Definitions used for QuSAGE Analysis -- see http://software.broadinstitute.org/gsea/msigdb/ for geneset descriptions + + +# ----------------------------------------------------------------------------- +# SHINY APP CONFIGURATION +# ----------------------------------------------------------------------------- + +# Remember - The vizapp is always 'vizapp/server.R' 'vizapp/ui.R' +# The workflow must publish all final output into $baseDir + +# Name of the R module that the vizapp will run against +vizapp_r_module: 'R/3.2.1-intel' + +# List of any CRAN packages, not provided by the modules, that must be made +# available to the vizapp +vizapp_cran_packages: + - sqldf + - shiny + - Vennerable + - DT + - ggplot2 + - gplots + - gtools + - RColorBrewer + + +# # List of any Bioconductor packages, not provided by the modules, that must be made +# available to the vizapp +vizapp_bioc_packages: + - qusage + - ballgown + - edgeR + - DESeq2 +vizapp_github_packages: + - js229/Vennerable + -- GitLab