diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 72b63f1e9b8ea52b84cc02386a72aad9ce0db440..eb44569784c41e056da3f7d23181c8a6550f4849 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,10 +1,12 @@ # Gitlab CI Script for astrocyte/rnaseq # Brandi L. Cantarel - 2017 +variables: + GIT_SUBMODULE_STRATEGY: recursive + before_script: - - module load nextflow/0.31.0 - - git submodule sync --recursive - - git submodule update --init --recursive + - module load nextflow/20.01.0 + - module load singularity/3.0.2 stages: - integration @@ -12,13 +14,13 @@ stages: test_human: stage: integration script: - - nextflow run -with-dag flowchart.png -with-timeline human_timeline.html -with-report human_report.html workflow/main.nf --design /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq/design.rnaseq.txt --input /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq --output human_output + - nextflow run -c nextflow.config -with-dag flowchart.png -with-timeline human_timeline.html -with-report human_report.html workflow/main.nf --design /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq/design.rnaseq.txt --input /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq --output human_output artifacts: expire_in: 2 days test_mouse: stage: integration script: - - nextflow run -with-dag flowchart.png -with-timeline mouse_timeline.html -with-report mouse_report.html workflow/main.nf --input /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq --design /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq/mouse_se.design.txt --pairs se --fusion skip --genome /project/shared/bicf_workflow_ref/mouse/GRCm38 --markdups null --output mouse_output + - nextflow run -c nextflow.config -with-dag flowchart.png -with-timeline mouse_timeline.html -with-report mouse_report.html workflow/main.nf --input /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq --design /project/shared/bicf_workflow_ref/workflow_testdata/rnaseq/mouse_se.design.txt --pairs se --fusion skip --genome /project/shared/bicf_workflow_ref/mouse/GRCm38 --markdups null --output mouse_output artifacts: expire_in: 2 days diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 0242dee412031e1290cd19266bcb68035939fcaf..0000000000000000000000000000000000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "workflow/process_scripts"] - path = workflow/process_scripts - url = git@git.biohpc.swmed.edu:ngsclialab/process_scripts.git diff --git a/README.md b/README.md index f8a7b3bbf894221a847d590b2afe703b88b77ac2..d9798bc20f0f0f2d6e2569109b936ebd55d0fdb7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,82 @@ -## RNASeq Analysis Worklow +# RNASeq Analysis Worklow + +This workflow can be run in with the whole genome, or with a specific list of genes of interest. + +## Initiate Nextflow Workflows + +### Required Tools + +This pipeline uses [Nextflow](https://www.nextflow.io/docs/latest/index.html), a bioinformatics workflow tool and [Singularity](https://sylabs.io/docs/), a containerization tool. + +Make sure both tools rae installed before running this pipeline. If running on a HPC cluster then load required modules. + +``` +module load nextflow/20.01.0 singularity/3.5.3 +``` + +### RNA Design File + +The design file must named design.txt and be in tab seperated format for the workflows. All RNA workflows can be run usin the same design file format. You can run in single-end mode with blank cells in the FqR2 column. + +| SampleID | CaseID | FqR1 | FqR2 | +|---|---|---|---| +| Sample1 | Fam1 | Sample1.R1.fastq.gz | Sample1.R2.fastq.gz | +| Sample2 | Fam1 | Sample2.R1.fastq.gz | Sample2.R2.fastq.gz | +| Sample3 | Fam2 | Sample3.R1.fastq.gz | Sample3.R2.fastq.gz | +| Sample4 | Fam2 | Sample4.R1.fastq.gz | Sample4.R2.fastq.gz | + + +### RNA Parameters +* **--input** + * directory containing the design file and fastq files + * default is set to *'${basedir}/fastq'* + * eg: **--input '/project/shared/bicf_workflow_ref/workflow_testdata/rnaseq/fastq'** +* **--output** + * directory for the analysis output + * default is set to *'${basedir}/analysis'* + * eg: **--output '${basedir}/output'** +* **--genome** + * directory containing all reference files for the various tools. This includes the genome.fa, gencode.gtf, genenames.txt, ect. + * default is set for use on UTSW BioHPC. + * eg: **--genome '/project/shared/bicf_workflow_ref/human/grch38_cloud/rnaref'** +* **--stranded** + * option for -s flag in featurecount used in geneabundance calculations + * default is set to *'0'* + * eg: **--stranded '0'** +* **--pairs** + * select either 'pe' (paired-end) or 'se' (single-end) based on read inputs. Select 'pe' when both R1 and R2 are present. If only R1, then select 'se'. + * default is set to *'pe'* + * eg: **--pairs 'pe'** +* **--align** + * select the algorithm/tool for alignment from 'hisat' or 'star' + * default is set to *'hisat'* + * eg: **--align 'hisat'** +* **--markdups** + * select either picard (Mark Duplicates) or null (do not Mark Duplicates) + * default is set to *'picard'* + * eg: **--align 'picard'** + +### RNA Run Workflow Testing + +Human PE + +``` +module load nextflow/20.01.0 singularity/3.5.3 +base=$repoClonedDirectory +datadir='/project/shared/bicf_workflow_ref/workflow_testdata/rnaseq' + +nextflow -C ${base}/nextflow.config run ${base}/workflow/main.nf --design ${datadir}/design.rnaseq.txt --input ${datadir} --output analysis ``` -module load nextflow -nextflow run workflow/main.nf + +Mouse SE + +``` +module load nextflow/20.01.0 singularity/3.5.3 +base=$repoClonedDirectory +datadir='/project/shared/bicf_workflow_ref/workflow_testdata/rnaseq' + +nextflow -C ${base}/nextflow.config run -with-dag flowchart.png -with-timeline mouse_timeline.html -with-report mouse_report.html ${base}/workflow/main.nf --design ${datadir}/mouse_se.design.txt --input ${datadir} --pairs se --output analysis + ``` + diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index 1b88f581051246bad1da5ff77010f9f8bb6fac92..47962cd4987298242134038d30852a0a66188825 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -161,6 +161,7 @@ workflow_parameters: - [ '/project/shared/bicf_workflow_ref/human/GRCh38', 'Human GRCh38'] - [ '/project/shared/bicf_workflow_ref/human/GRCh37', 'Human GRCh37'] - [ '/project/shared/bicf_workflow_ref/mouse/GRCm38', 'Mouse GRCm38'] + - [ '/project/shared/bicf_workflow_ref/mouse/GRCm39', 'Mouse GRCm39'] required: true description: | Reference genome for alignment diff --git a/nextflow.config b/nextflow.config index b187420c973977efc28feba14febdc18a8c51fa8..150748a34379cf059276d0144009eb1943283927 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,32 +6,41 @@ process { clusterOptions = '--hold --no-kill' queue = '128GB,256GB,256GBv1' withLabel: trim { - container = 'trim_galore.sif' + container = 'goalconsortium/trim_galore:1.0.9' } - withLabel: dnaalign { - container = 'dna_alignment.sif' + withLabel: abra2 { + container = 'goalconsortium/abra2:1.0.9' } withLabel: profiling_qc { - container = 'profiling_qc.sif' + container = 'goalconsortium/profiling_qc:1.0.9' + } + withLabel: dnaalign { + container = 'goalconsortium/dna_alignment:1.0.9' + } + withLabel: variantcalling { + container = 'goalconsortium/variantcalling:1.0.9' + } + withLabel: structuralvariant { + container = 'goalconsortium/structuralvariant:1.1.2' } withLabel: starfusion { - container = 'starfusion.sif' + container = 'goalconsortium/starfusion:1.0.9' } withLabel: ralign { - container = 'rna_alignment.sif' + container = 'goalconsortium/rna_alignment:1.0.9' } withLabel: geneabund { - container = 'rna_gene_abundance.sif' + container = 'goalconsortium/rna_gene_abundance:1.1.3' } withLabel: rnaseqstat { - container = 'rnaseq_dea.sif' + container = 'goalconsortium/rna_statanal:1.1.4' } } singularity { enabled = true runOptions='--no-home --cleanenv' - cacheDir = '/project/shared/bicf_workflow_ref/seqprg/singularity/' + singularity.cacheDir="$PWD" } trace { @@ -57,9 +66,9 @@ env { } manifest { - homePage = 'https://git.biohpc.swmed.edu/ngsclialab/school' - description = 'School is a collection of genomics analysis workflows that are used for detecting single nucleotide variants (SNVs), insertions/deletions (indels), copy number variants (CNVs) and translocations from RNA and DNA sequencing. These workflows have been validated in a CLIA laboratory at UTSW' + homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/rnaseq' + description = 'RNA sequencing gene abundance analysis' mainScript = 'rna.nf' version = '1.0.0' - nextflowVersion = '>=0.31.0' + nextflowVersion = '>=20.01.0' } diff --git a/testing/human_pe_test/run_test.sh b/testing/human_pe_test/run_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..e8624c09d8caa4d85134c0c3f2ddbe7f55b9e952 --- /dev/null +++ b/testing/human_pe_test/run_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +module load nextflow/20.01.0 singularity/3.5.3 +base='/project/BICF/BICF_Core/s166458/rnaseq_astrocyte' +datadir='/project/shared/bicf_workflow_ref/workflow_testdata/rnaseq' + +nextflow -C ${base}/nextflow.config run ${base}/workflow/main.nf --design ${datadir}/design.rnaseq.txt --input ${datadir} --output analysis diff --git a/testing/mouse_se_test/run_test.sh b/testing/mouse_se_test/run_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..2c105e3bcdb151a336dc6e0302704828291c4570 --- /dev/null +++ b/testing/mouse_se_test/run_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +module load nextflow/20.01.0 singularity/3.5.3 +base='/project/BICF/BICF_Core/s166458/rnaseq_astrocyte' +datadir='/project/shared/bicf_workflow_ref/workflow_testdata/rnaseq' + +nextflow -C ${base}/nextflow.config run -with-dag flowchart.png -with-timeline mouse_timeline.html -with-report mouse_report.html ${base}/workflow/main.nf --design ${datadir}/mouse_se.design.txt --input ${datadir} --pairs se --output analysis diff --git a/testing/run_all_tests.sh b/testing/run_all_tests.sh new file mode 100644 index 0000000000000000000000000000000000000000..62911f89ebfb66669043f90d60d75a893807ee15 --- /dev/null +++ b/testing/run_all_tests.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +baseDir="`dirname \"$0\"`" + +cd ${baseDir}/mouse_se_test/ +sbatch -p 32GB,super run_test.sh +cd ${baseDir}/human_pe_test/ +sbatch -p 32GB,super run_test.sh diff --git a/workflow/process_scripts b/workflow/process_scripts deleted file mode 160000 index 8706de703b8b5933f89651e5be0312af96cddb7c..0000000000000000000000000000000000000000 --- a/workflow/process_scripts +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8706de703b8b5933f89651e5be0312af96cddb7c