Skip to content
Snippets Groups Projects
Commit 67e3eb6e authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'develop' into 'master'

v0.0.1

See merge request !37
parents 4442b00e 8a8ce097
Branches
Tags v0.0.1
2 merge requests!78Master,!37v0.0.1
Pipeline #7843 passed with stages
in 1 hour, 19 minutes, and 39 seconds
Showing
with 652 additions and 100 deletions
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Created by https://www.gitignore.io/api/r,perl,macos,linux,python,windows
# Edit at https://www.gitignore.io/?templates=r,perl,macos,linux,python,windows
# Created by https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice
# Edit at https://www.gitignore.io/?templates=r,perl,linux,macos,python,windows,microsoftoffice
### Linux ###
*~
......@@ -71,6 +45,27 @@ Network Trash Folder
Temporary Items
.apdisk
### MicrosoftOffice ###
*.tmp
# Word temporary
~$*.doc*
# Word Auto Backup File
Backup of *.doc*
# Excel temporary
~$*.xls*
# Excel Backup File
*.xlk
# PowerPoint temporary
~$*.ppt*
# Visio autosave temporary files
*.~vsd*
### Perl ###
!Build/
.last_cover_stats
......@@ -165,15 +160,6 @@ coverage.xml
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
......@@ -183,31 +169,22 @@ docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
......@@ -215,6 +192,11 @@ venv.bak/
# Rope project settings
.ropeproject
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# mkdocs documentation
/site
......@@ -226,9 +208,6 @@ dmypy.json
# Pyre type checker
.pyre/
### Python Patch ###
.venv/
### R ###
# History files
.Rhistory
......@@ -237,6 +216,9 @@ dmypy.json
# Session Data files
.RData
# User-specific files
.Ruserdata
# Example code in package build process
*-Ex.R
......@@ -257,7 +239,7 @@ vignettes/*.pdf
.httr-oauth
# knitr and R markdown default cache directories
/*_cache/
*_cache/
/cache/
# Temporary files created by R markdown
......@@ -271,6 +253,7 @@ vignettes/*.pdf
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
......@@ -293,11 +276,11 @@ $RECYCLE.BIN/
# Windows shortcuts
*.lnk
# End of https://www.gitignore.io/api/r,perl,macos,linux,python,windows
# End of https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice
# nextflow analysis folders/files
/test_data/*
/workflow/docker/images/*
!/test_data/createTestData.sh
/workflow/.nextflow/*
/workflow/work/*
/workflow/output/*
......@@ -314,6 +297,8 @@ timeline*.html*
*.tmp
*.swp
*.out
*_studyRID.json
*_studyRID.csv
run*.sh
!.gitkeep
before_script:
- module add python/3.6.4-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load singularity/3.5.3
- module load nextflow/20.01.0
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/* ./test_data/
- mkdir -p ~/.deriva
- mkdir -p ~/.bdbag
stages:
- unit
- integration
- consistency
getBag:
stage: unit
script:
- ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
- pytest -m getBag
getData:
stage: unit
script:
- ln -sfn `readlink -e ./test_data/auth/cookies.txt` ~/.bdbag/deriva-cookies.txt
- unzip ./test_data/bagit/Replicate_Q-Y5F6.zip
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6 TEST
- pytest -m getData
parseMetadata:
stage: unit
script:
- rep=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p repRID)
- exp=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p expRID)
- study=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p studyRID)
- endsMeta=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsMeta)
- endsManual=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p endsManual)
- stranded=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded)
- spike=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike)
- species=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species)
- readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength)
- echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv
- pytest -m parseMetadata
inferMetadata:
stage: unit
script:
- >
align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5F6_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%')) &&
if [[ ${align} == "" ]]; then exit 1; fi
- >
singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam" 1>> Q-Y5F6_1M.se.inferMetadata.log &&
ended=`singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/inferMeta.sh endness Q-Y5F6_1M.se.inferMetadata.log` &&
if [[ ${ended} == "" ]]; then exit 1; fi
- pytest -m inferMetadata
getRef:
stage: unit
script:
- mkdir -p hu
- mkdir -p mo
- cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./hu/
- cp -R /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2 ./mo/
trimData:
stage: unit
script:
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- pytest -m trimData
downsampleData:
stage: unit
script:
- singularity run 'docker://bicf/seqtk:2.0.1_indev' seqtk sample -s100 ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz 1000 1> sampled.1.fq
- pytest -m downsampleData
alignData:
stage: unit
script:
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
- singularity run 'docker://bicf/gudmaprbkaligner:2.0.1_indev' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
- pytest -m alignData
dedupData:
stage: unit
script:
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
- >
for i in {"chr8","chr4","chrY"}; do
echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
- pytest -m dedupData
countData:
stage: unit
script:
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.featureCounts
- assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
- pytest -m makeFeatureCounts
makeBigWig:
stage: unit
script:
- singularity run 'docker://bicf/deeptools3.3:2.0.1_indev' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
- pytest -m makeBigWig
fastqc:
stage: unit
script:
- singularity run 'docker://bicf/fastqc:2.0.1_indev' fastqc ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
- pytest -m fastqc
dataQC:
stage: unit
script:
- echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
- for i in {"chr8","chr4","chrY"}; do
echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
- pytest -m dataQC
integration_se:
stage: integration
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID 16-1ZX4 -with-dag dag.png --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./SE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- output/qc/
- SE_multiqc_data.json
expire_in: 7 days
integration_pe:
stage: integration
script:
- hostname
- ulimit -a
- nextflow -q run ./workflow/rna-seq.nf --deriva ./test_data/auth/credential.json --bdbag ./test_data/auth/cookies.txt --repRID Q-Y5JA -with-dag dag.png --ci true
- find . -type f -name "multiqc_data.json" -exec cp {} ./PE_multiqc_data.json \;
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- dag.png
- output/qc/
- PE_multiqc_data.json
expire_in: 7 days
consistency:
stage: consistency
script:
- grep -m 1 \"Assigned\":.[0-9] SE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedSE.txt
- grep -m 1 \"Assigned\":.[0-9] PE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedPE.txt
- echo 7742416 > assignedExpectSE.txt
- echo 2599140 > assignedExpectPE.txt
- pytest -m consistencySE
- pytest -m consistencyPE
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- SE_multiqc_data.json
- PE_multiqc_data.json
- assignedSE.txt
- assignedPE.txt
- assignedExpectSE.txt
- assignedExpectPE.txt
expire_in: 7 days
# Summary
# Steps to reproduce
# Observed bug behavior
# Expected behavior
# Relevant logs and/or screenshots
# Potential fixes
/label ~bug ~"To Do"
/cc @ghenry @venkat.malladi @s181706
\ No newline at end of file
Please fill in the appropriate checklist below (delete those which are not relevant).
These are the most common things requested on pull requests.
## PR checklist
- [ ] This comment contains a description of changes (with reason)
- [ ] If you've fixed a bug or added code that should be tested, add tests!
- [ ] Documentation in `docs` is updated
- [ ] Replace dag.png with the most recent CI pipleine integrated_pe artifact
- [ ] `CHANGELOG.md` is updated
- [ ] `README.md` is updated
- [ ] `LICENSE.md` is updated with new contributors
- [ ] Docker images moved to production release and changed in pipeline
- [ ] Docker images used in the CI unit tests match those used in pipeline
* [ ] **Close issue**\
Closes #
/cc @ghenry @venkat.malladi
/assign @ghenry
# v0.0.1 (in development)
# v0.0.2 (in development)
**User Facing**
**Background**
*Known Bugs*
<hr>
# v0.0.1
**INITIAL BETA VERSION**\
Does not include automatic data upload\
This version is for initial upload of test data to GUDMAP/RBK data-hub for internal integration
<hr>
NOT YET LICENSED
MIT License
Copyright (c) 2019 University of Texas Southwestern Medical Center.
Contributors: Gervaise H. Henry, Jon Gesell, Jeremy Mathews, and Venkat Malladi
Department: Bioinformatic Core Facility, Department of Bioinformatics
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
\ No newline at end of file
|*master*|*develop*|
|:-:|:-:|
|[![Build Status](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/commits/master)|[![Build Status](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/badges/develop/build.svg)](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/commits/develop)|
|[![pipeline status](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/commits/master)|[![pipeline status](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/badges/develop/pipeline.svg)](https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq/commits/develop)|
<!--
[![DOI]()]()
GUDMAP/RBK RNA-Seq Pipeline
===========================
-->
RNA-Seq Analytic Pipeline for GUDMAP/RBK
========================================
Introduction
------------
This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub. It is designed to run on the HPC cluster ([BioHPC](https://portal.biohpc.swmed.edu)) at UT Southwestern Medical Center (in conjunction with the standard nextflow profile: config `biohpc.config`)
![flowchart](docs/RNA-Seq%20Pipeline%20Design%20Flowchart.jpg "Flowchart")
Cloud Compatibility:
--------------------
This pipeline is also capable of being run on AWS. To do so:
* Build a AWS batch queue and environment either manually or with [aws-cloudformantion](https://console.aws.amazon.com/cloudformation/home?#/stacks/new?stackName=Nextflow&templateURL=https://s3.amazonaws.com/aws-genomics-workflows/templates/nextflow/nextflow-aio.template.yaml)
* Edit one of the aws configs in workflow/config/
* Replace workDir with the S3 bucket generated
* Change region if different
* Change queue to the aws batch queue generated
* The user must have awscli configured with an appropriate authentication (with `aws configure` and access keys) in the environment which nextflow will be run
* Add `-profile` with the name aws config which was customized
To Run:
-------
* Available parameters:
* FULL EXAMPLE:
```
nextflow run workflow/main.nf
```
* Design example:
* `--deriva` active **credential.json** file from [deriva-auth](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/Uploading-files-via-Deriva-client-tools#from-a-remote-server)
* `--bdbag` active **cookies.txt** file from [deriva-auth](https://github.com/informatics-isi-edu/gudmap-rbk/wiki/Uploading-files-via-Deriva-client-tools#from-a-remote-server)
* `--repRID` mRNA-seq replicate RID
* `--source` consortium server source
* **dev** = [dev.gudmap.org](dev.gudmap.org) (default, does not contain all data)
* **staging** = [staging.gudmap.org](staging.gudmap.org) (does not contain all data)
* **production** = [www.gudmap.org](www.gudmap.org) (***does contain all data***)
* `--refMoVersion` mouse reference version ***(optional)***
* `--refHuVersion` human reference version ***(optional)***
* `--refERCCVersion` human reference version ***(optional)***
* `-profile` config profile to use ***(optional)***:
* defaut = processes on BioHPC cluster
* **biohpc** = process on BioHPC cluster
* **biohpc_max** = process on high power BioHPC cluster nodes (=> 128GB nodes), for resource testing
* **aws_ondemand** = AWS Batch on-demand instant requests
* **aws_spot** = AWS Batch spot instance requests
* NOTES:
* once deriva-auth is run and authenticated, the two files above are saved in ```~/.deriva/``` (see official documents from [deriva](https://github.com/informatics-isi-edu/deriva-client#installer-packages-for-windows-and-macosx) on the lifetime of the credentials)
* reference version consists of Genome Reference Consortium version, patch release and GENCODE annotation release # (leaving the params blank will use the default version tied to the pipeline version)
* *current mouse* **38.p6.vM22** = GRCm38.p6 with GENCODE annotation release M22
* *current human* **38.p6.v31** = GRCh38.p12 with GENCODE annotation release 31
* Tracking parameters ([Tracking Site](http://bicf.pipeline.tracker.s3-website-us-east-1.amazonaws.com/)):
* `--ci` boolean (default = false)
* `--dev` boolean (default = false)
FULL EXAMPLE:
-------------
```
nextflow run workflow/rna-seq.nf --deriva ./data/credential.json --bdbag ./data/cookies.txt --repRID Q-Y5JA
```
To run a set of replicates from study RID:
------------------------------------------
Run in repo root dir:
* `sh workflow/scripts/splitStudy.sh [studyRID]`
It will run in parallel in batches of 5 replicatesRID
<hr>
[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/blob/develop/CHANGELOG.md)
<hr>
Credits
-------
This worklow is was developed by [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
=======
This workflow is was developed by [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
PI
--
Venkat S. Malladi\
*Faculty Associate & Director*\
Bioinformatics Core Facility\
UT Southwestern Medical Center\
<a href="https://orcid.org/0000-0002-0144-0564" target="orcid.widget" rel="noopener noreferrer" style="vertical-align:top;"><img src="https://orcid.org/sites/default/files/images/orcid_16x16.png" style="width:1em;margin-right:.5em;" alt="ORCID iD icon">orcid.org/0000-0002-0144-0564</a>\
[venkat.malladi@utsouthwestern.edu](mailto:venkat.malladi@utsouthwestern.edu)
Developers
----------
Gervaise H. Henry\
*Computational Biologist*\
Department of Urology\
UT Southwestern Medical Center\
<a href="https://orcid.org/0000-0001-7772-9578" target="orcid.widget" rel="noopener noreferrer" style="vertical-align:top;"><img src="https://orcid.org/sites/default/files/images/orcid_16x16.png" style="width:1em;margin-right:.5em;" alt="ORCID iD icon">orcid.org/0000-0001-7772-9578</a>\
[gervaise.henry@utsouthwestern.edu](mailto:gervaise.henry@utsouthwestern.edu)
Jonathan Gesell\
*Computational Biologist*\
Bioinformatics Core Facility\
UT Southwestern Medical Center\
<a href="https://orcid.org/0000-0001-5902-3299" target="orcid.widget" rel="noopener noreferrer" style="vertical-align:top;"><img src="https://orcid.org/sites/default/files/images/orcid_16x16.png" style="width:1em;margin-right:.5em;" alt="ORCID iD icon">orcid.org/0000-0001-5902-3299</a>\
[johnathan.gesell@utsouthwestern.edu](mailto:jonathn.gesell@utsouthwestern.edu)
Jeremy A. Mathews\
*Computational Intern*\
Bioinformatics Core Facility\
UT Southwestern Medical Center\
<a href="https://orcid.org/0000-0002-2931-1430" target="orcid.widget" rel="noopener noreferrer" style="vertical-align:top;"><img src="https://orcid.org/sites/default/files/images/orcid_16x16.png" style="width:1em;margin-right:.5em;" alt="ORCID iD icon">orcid.org/0000-0002-2931-1430</a>\
[jeremy.mathews@utsouthwestern.edu](mailto:jeremy.mathews@utsouthwestern.edu)
Please cite in publications: Pipeline was developed by BICF from funding provided by **Cancer Prevention and Research Institute of Texas (RP150596)**.
<hr>
<hr>
Pipeline Directed Acyclic Graph
-------------------------------
![dag](docs/dag.png "DAG")
......@@ -5,3 +5,5 @@ rm timeline*.html*
rm .nextflow*.log*
rm -r .nextflow/
rm -r work/
rm *_studyRID.json
rm *_studyRID.csv
File deleted
<mxfile host="Electron" modified="2020-03-23T23:17:50.947Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/12.6.5 Chrome/80.0.3987.86 Electron/8.0.0 Safari/537.36" etag="EoueA3AcDhzhFmxKZ9Lg" version="12.6.5" type="device"><diagram name="Page-1" id="74e2e168-ea6b-b213-b513-2b3c1d86103e">7V1Zd5u6Fv41Xuveh2QxD49Nk7S9pzlNTpq0PS9dMsg2CQYXcBP3118xCDMIAWZ2nJcYIWGs/e1J2ntrxr9fv35wwGZ1Y+vQnHGM/jrjL2ccJzASj/75LbuwheNlOWxZOoYetrH7hnvjD4wamah1a+jQTXX0bNv0jE26UbMtC2peqg04jv2S7rawzfS3bsAS5hruNWDmW78ZurcKW3mGYfY3PkJjuYq+WsE35kB7Xjr21oq+b8bxi+AvvL0G+FlRf3cFdPsl0cRfzfj3jm174af163to+pOLpy0cd11wN35vB1pelQE3Px8/6OrD5hMrf9S/boXLT/faGX6M6+3whEAdzU90aTveyl7aFjCv9q0XwY+G/mMZdLXv89m2N6iRRY1P0PN2EbHB1rNR08pbm9Fd+Gp43xOff/iPOhejq8vX6MnBxS5xcQsdYw096OA2y3N235MX4ZM4EV/vnxVc4YeFP9r/pYWTiSfG3jpa1Ovix7+vd/zPn9zPP3f2Vn76+SIaZ6wU4d0DzhJ6lLkWYqIjboI2+iHODo1zoAk843f6TUCE6mXcLxr6znHALtFhYxuW5yaefOs3oA6vmM0Y9VyImDTiUUHOIKXCGJ4pGaOq7LnAqfGf0upwQUwORx/CecBXiQndNwX4r8ELotyYF5626w3uj/jd2Zz4gxW5PH8sdVa5+3PPvucerj5yc9eRTDgMf3CyjIDni/vorx6rlAxH2oIOe0lSKbBvOLwXruHUkwapzCHPl1/+dVcf/r57v/g4N73ftzdX5pkg5BmEyErKEAyiCLX1B2GIVCL/OVak8VHD4b0wAiZjS+rDsi34pnjj01f54euvp9fvPyF8+iOrMnv11xmLXYIEb9z9fOJvP/z1+Pjp7yX439L2ft1aZyIzG4A5WJ5TzxVpjzu2nqFFHy5KJajnGX944zH9sMfJ0+jC0xgTMzB5aFVyNbJwLBP2JAgfMqYX2GPv/wT7NmFPVBb8YKjvwcEuM3HGZSHJMpQ0VV6wqqQDReHPomn/DcxtRIgZJ5mImhcLO3jPPYNIv7Y2vnHmBhB/hzqwzOZ1fxN9Wvr/b4BhoZvvEHfsPENz8VPRC4YPDrvhZncDrOIve4nW+fyvE/wJDaHMaLZpO2Gzs5yD/wR3ODQJDPHTf+OBwVMXYG2Yu3D42rZs9A4I8Kku+19J/JEzTswuwYpo3v3WYP0wvsJ0EANKoJZL/7P/YqI/4SKib1lfNu6LufWgx3D7x4Q0j+8gwbbdBDfmYL3vFZIk7uV6jv0crcj6jX6nSMT5l2xw6a7AJhyzfl36k3O+MO0XbQUc71y3te0avX448GVlePA+nHjU/QV1Dp/gT7zfEsg0v2VhmOb7gN74XfiFokFNC/sHr5W5P1dEwVe5YkAp/Kuh48HXSvMa34qndC+9RJ+D9z2TRGB2iRtM8sZLPG3RzcS9VYjx+KaUuIdkXty+TLxClubBZUz4ZGMajlG/HG5jFg3ZEYM8oyfdF2NtgsALSqg2bWWY+mews7e+KnA9oD3jK0QdRPlIKQb8lVak8Qq8f2GCOTQv4jX8kKSx25WEX/i9Pp9eR5x8+QgdHVggao6+UUGXwDSWFvqsIQL7WjQBJ/838boIFV2YpYEU3FG4OS9J8TwQNGWEqH1TXtXFwj5n7LCRXxBhA7dFjavEBgf2IIgqMqEg6sl/Lif/kS6/AMuKVM9Rto15LVRS5RONF+iIk0qYU6GFOSVaRxzTfP0tuewAHK2ZyYlNydBQlEtszgL7ssy8DK6yxmqO9KGhU430XN4aJS7WiRVdMJbliTZw7+vbTEoMIDOQbiZmuqtMBqHNzELalJaLhj1qfcAlVHqk0FN4BHPXNrcefOdoEWaD1v2VkBPzXFdyBQu/ygKcT5OBFQhyhiS748YmcoZIJAznYfzYve/6I3GnTKbsxciPlBQhy5S8FGxRbJDWMWnM0J7UaEb1rtw2B25MQwOe7wf98+my0GvLQC5lEWRZN/QGkC8ArC0wP1mbwCYkiYm0IHEjs0JKywe2H96uytptWGV0TdUlkcPHzR18aw6WhleV6nlSJ3GAqU7xAMkoiOgerWb1QGqOZIN3Revc2+epjCTRJfDARE3weA88M8ltmOTRt/0DNQ9YS39dosbX8YRvk/nMtwETGa4W4o0LXxa5OcrWs6sKIi+kQXX2YI7AQZoc2zdlmlyqqMh5tm1NfgT2fwFOmwdenXB6ME65I8ApL/fgp0qVFehkHVVsI0zYUcV4bt2aXQDX+9Wxybremp5xhg3XAux0Qsfs1niejorSjaFK1Z6tUxG9HtADnn0ThCS5HF0RMvfyQo6G6Le68CYmwZT9Dj6rsQbfCmDFQUNxe7GjDjKZcGBmmclUNbyKk4/BZBLaNZmI0U/88XugRCh0B0jyNA+TOtHMtVR6MNnlmhposoY7VrXTMNypUpVg8m3Nww0+/AzTSJqAV8GaW2wC7m+Su997DrD0OgM2xjOs1R9qBoxfab9QTn0GavSnpsB0RUjwcriK4luSIIyaEsgmAd+HlqEB810U4bI2dD0Q1za64xu+qG2F2qA1i4Jrbm3X8Aw7FQ6Dn/I50yF+Gg6gMeEiitfTDGuJGs4CdkqE27BcP6yBIytLWUNpgTMo4r2QM4pw1ZLD5O+mZdiFFFk5LYBmcVWAkSrQan33pw0HIfeq+SALzzHW09/+EZTRuWESd/RGb3vL2WJFU1jJm8LUZMpJ+2Ztb7vczXcPmug+Wupy/rz6fPttd2Wcyc3zFccO0wOshQZ4JM/yMYQB8lIPvppSXUNN1k3DWngabhpVwLa+Mu9Tew31vAs0pY2X+gQebr2eKq8G8TV+acFY4B2Xs9GDYCBt2PXnXBB3xv+Bizx3Tsm1kITxuRbHv8PTnmuB9x5KE4wJsdn0TOShjTmunjGX6d5LTFd+/75AJEzWlsNCr8aO8YC2XAGgp5PVQa1O0EOmB0GakO1jpqo0aT1AtJldz3Rk1ztwAdG8BinvS2ih1y227tox5v2tvTlw4eFmfG3eFtO8TWBtkqXQhhVP11StU3PlJ9RfLyzklF8vPSTOr+d7n23kzll9iZ0pxzdkMFWeoMFW2fRX8OOCuuMxs+XTCn51xYiD98sUo5rXi8QqPtI4bOzuFvBzg7MbhWoG2OGURYMy2G7DVlfaLXw7jRIC3SKbGOGijsR9HP9egFpD2U3WgcQafRqbAVREn1aLe10tboqcXleLaUq+dfckKCl2PQfGeFySukFFMp2Sw+0XXRiG96rc3jnr/ylPf4wvW12QCHX9gvpu03dL1H7zyit8HTGvXBwgr1wWc6Q9fnuR6DDlgUdjktROBKkfnzckqVWxJ21ITsxHUt4g5ruHN7kiDcMdAb7bdpRoU1pJ/U7FUaLaGRN2lmJYn7ylLr2lTuAzvMfUWSm26blMxRQeo9tElgV5UbAGz/DCWH4zploTF99lu8iML/acFDYj6XmlIq9ynfpO4O9b8GDdws3u+9f5w43woG7WcebvG9tYyuOMyhVJS5LYkVDciNhPOQI7svVkEMqEVhNHUzEk6ZJ3wpZkPlWiJVMgoveIynJmcBAdxUDAQXQIQ3846LWaJ/EnEIokaYg1j6GoJzdkWif5nMvmi54TVt3U+iZUcJbqcpZw/CW5o3hS3tQZrCQHJq28+WNYBsqn4rejvQOaz7gLb4McekZzf78NRd4YE70qcuLJ13k9vt2YNtCPQJELQyZRFR8zXnWypyItqbCahrAkWl2D5ruN+fTMItYsN7dYwjZccb3jvs0tVkxjkGdTBlRpf1bp4VDXeJ3qBMsDYEkt+FuKyoLwqG5RybP1UJnt3w8qB03mO05UchVRqYwClWo9VAoCvT8+xvjQ/v2gvnlgzgn15BIvpahXh0C9lN2aL5HF2f5lKM7uJ9bt3wvqsRt1Qv0hqK9+zvx4YC+r9WCf7d8PLE+GcQNY0jIlUuUECiMB+walWtNby/bvB5Qnu7htUJJK5hTHFfS+hsDWdNdyA/rB5aDBQG8bl+wgJyY0B6bAlAzg1IYDeoE+P+iJf7WgPzDMqQXhx7kskV0G4EsgOMiygdBuzYMosv0Ny2PqXncSqMWbcr2LY6HmUkJuQD9QbTf1zF0BZ3PCalFQ5UihymWFahlUcwOkMmxnfbm6A9pmBgNou49fXu7ZL+wXV/vr31fWuCWESsQFo1MsUhA5I1LPCKoaf0N+yq2xgaZh+akJl9D1c5c45hpHy1BP9ilIw5qXJVwlYw+iZKncSTu5LCoHvdsfMA8e5fNUBD30XPFiJl76z0KM7+4DGPI8VjuwgMtEgRAyNOJqu8nAguyp6a2V+FKKq5i7G2BVAQE5COvufYKA4aOKoq6IcTzayjD1z2Bnb/0pcD2gPeOrfIxPWsgjwa4HyXH+RXAC0wUavQw6ZTLw0kEobPTjrsHaMH0KPUJHBxaYpU5QUEgoayN2pX5QkcKq54rExH9sWg4R4rlUYoxRGwFdRJUtEcpi2C+WC9YbEw4R1VWWUVfMI3kqjCYk+3TQTrpMH9UuouK01OOUCEFD5I5TPIpypJUoqJSoLlrGHMNYIpcooC3UDuMLYpS6Mjb2hNdRn+w5KXTrY+gk7oNIO1wWNzn2JEfXwEC6Pwo1jx/TRR2sZplYJ63fTOtXVfqEVWYy155UfqcqP68+ysTMsan8yWn8QoX/1mq1HELO4bQ8eVM5R0zDWkDnpugQ+enpeL7fii1qlYotMuH7ui3YQjY2Bg0v7sXaqGBG0PiiNCij6tIBN0hqR8MifwzFjjgAg7QJJAmgY9P+xUCbkPbn6FXRD1X++cqAAQyuAslHqd2X6X/vOcDSa43YGM+w3gCoGZD+VrOxVhQMdk5ubdfwDJu4o/Y50yF+Gt4dMeEiWtvRDGuJGs4CJiKfRt05Q1ROb1da4Aei9ZG3hoOlKq1vy6mimVRnK0rook7NcVhOx284HVovh8YkpWkqhDAVoow4tn2YA2BJm79yYTRmK6oqmiZS+qEYwB2soWByT2QBpZi8Y1xAIUbWCVyOlJ5hHYH27+SEcIr2Vypof7F37U+cTK6F7JbpHZhAtAhywAyDlojApPJPaQgrpn15+gu2I0+VccuEF57TEuE1FWuBLqCnYS7QIZ0lFONqtgPdATR7B/OORwyg2slJ80Lenj4Kx76TU4mPRbWfzv86QLXT+ae8PIVIyG0t6MmdlHtVKEsnKI8ayvwRQJmXe4ByPKl0TTwVO7XE2piGoUoXzxlCMU+uTVgUmUpt5o4I1mtxZroMSh6O4jsBfqrXoD5FR1NOOuus17DqFpTykSY3U52b8iSevNYtjjfpW8Xy+HTEignO2f69pOKLzUtJvaFU/Mpope3/VLARBzEJJb4eXrP9ceRAYf9Mdn3d/v3wQ7uL/Cd+mFVY4BwrR8hqPY7I9u8Hse2uXfWN2EPRqQN3FXt9nRsaHKGS6piQKsgZpHJ0pGb7H4Xsltpd+jrJbtpcV02lG6a4kFhPcGf791MycDp13lsR0iTeqoDTqrmdJBE9nmLXfqTrucDvK7zwteRpyXCurLIVw6nnAqfGf0qbw3mhj5pxzQ9OegtmThNuGVFd2YHZRWQasQt9eC/sgu2vN8Yuh3EIEfjqyE1+lpVoNcPEkkM+SoZLJcM5IRjeeEw/zIBpfmKGA5mBxWvTo+UGnlpBr5Qb6MOPixu4EzdU54b57kET3UdLXc6fV59vv+2ujDOZkAA+JmZQBN/6qcUAhCGVAFxsZDUc3g8rTHvFaECngqglxr5KKslqji9KFkoJQ0qBLfE056Ph8IZ8gS4d288e23f3QzdubB36Pf4P</diagram></mxfile>
\ No newline at end of file
docs/RNA-Seq Pipeline Design Flowchart.jpg

213 KiB

File added
File added
docs/dag.png

665 KiB

docs/gudmap-rbk_logo.png

29.2 KiB

profiles {
standard {
includeConfig 'workflow/conf/biohpc.config'
}
}
[pytest]
python_paths = workflow/scripts
#!/bin/bash
#This script regenerates test data from replicate RID Q-Y5F6
module load singularity/3.5.3
module load pigz/2.4
mkdir -p NEW_test_data
ln -sfn `readlink -e ./test_data/auth/credential.json` ~/.deriva/credential.json
mkdir -p ./NEW_test_data/bagit
singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' deriva-download-cli dev.gudmap.org --catalog 2 ./workflow/conf/replicate_export_config.json . rid=Q-Y5F6
cp Replicate_Q-Y5F6.zip ./NEW_test_data/bagit/Replicate_Q-Y5F6.zip
mkdir -p ./NEW_test_data/fastq
unzip ./test_data/bagit/Replicate_Q-Y5F6.zip
singularity run 'docker://bicf/gudmaprbkfilexfer:1.3' bash ./workflow/scripts/bdbagFetch.sh Replicate_Q-Y5F6 Replicate_Q-Y5F6
cp Replicate_Q-Y5F6.R1.fastq.gz ./NEW_test_data/fastq/Replicate_Q-Y5F6.R1.fastq.gz
cp Replicate_Q-Y5F6.R2.fastq.gz ./NEW_test_data/fastq/Replicate_Q-Y5F6.R2.fastq.gz
mkdir -p ./NEW_test_data/fastq/small
singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./NEW_test_data/fastq/Replicate_Q-Y5F6.R1.fastq.gz 1000000 1> Q-Y5F6_1M.R1.fastq
singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./NEW_test_data/fastq/Replicate_Q-Y5F6.R2.fastq.gz 1000000 1> Q-Y5F6_1M.R2.fastq
pigz Q-Y5F6_1M.R1.fastq
pigz Q-Y5F6_1M.R2.fastq
cp Q-Y5F6_1M.R1.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
cp Q-Y5F6_1M.R2.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
mkdir -p ./NEW_test_data/meta
singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
cp Q-Y5F6_1M.se_trimmed.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz
cp Q-Y5F6_1M.pe_R1_val_1.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz
cp Q-Y5F6_1M.pe_R2_val_2.fq.gz ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz
cp Q-Y5F6_1M.R1.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R1.fastq.gz_trimming_report.txt
cp Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt ./NEW_test_data/meta/Q-Y5F6_1M.R2.fastq.gz_trimming_report.txt
touch metaTest.csv
echo 'Replicate_RID,Experiment_RID,Study_RID,Paired_End,File_Type,Has_Strand_Specific_Information,Used_Spike_Ins,Species' > metaTest.csv
echo 'Replicate_RID,Experiment_RID,Study_RID,uk,FastQ,no,no,Homo sapiens' >> metaTest.csv
cp metaTest.csv ./NEW_test_data/meta/metaTest.csv
mkdir -p ./NEW_test_data/bam
mkdir -p ./NEW_test_data/bam/small
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.se.unal.gz -S Q-Y5F6_1M.se.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness F -U ./NEW_test_data/fastq/small/Q-Y5F6_1M.se_trimmed.fq.gz --summary-file Q-Y5F6_1M.se.alignSummary.txt --new-summary
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.se.bam Q-Y5F6_1M.se.sam
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.bam
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.bam Q-Y5F6_1M.se.sorted.bam.bai
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' hisat2 -p 20 --add-chrname --un-gz Q-Y5F6_1M.pe.unal.gz -S Q-Y5F6_1M.pe.sam -x /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/hisat2/genome --rna-strandness FR --no-mixed --no-discordant -1 ./NEW_test_data/fastq/small/Q-Y5F6_1M.pe_R1_val_1.fq.gz -2 ./test_data/fastq/small/Q-Y5F6_1M.pe_R2_val_2.fq.gz --summary-file Q-Y5F6_1M.pe.alignSummary.txt --new-summary
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools view -1 -@ 20 -F 4 -F 8 -F 256 -o Q-Y5F6_1M.pe.bam Q-Y5F6_1M.pe.sam
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.bam
singularity run 'docker://bicf/gudmaprbkaligner:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.pe.sorted.bam Q-Y5F6_1M.pe.sorted.bam.bai
cp Q-Y5F6_1M.se.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.bam
cp Q-Y5F6_1M.pe.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.pe.bam
cp Q-Y5F6_1M.se.sorted.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.bam
cp Q-Y5F6_1M.se.sorted.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.bam.bai
cp Q-Y5F6_1M.pe.sorted.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.pe.sorted.bam
cp Q-Y5F6_1M.pe.sorted.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.pe.sorted.bam.bai
cp Q-Y5F6_1M.se.alignSummary.txt ./NEW_test_data/meta/Q-Y5F6_1M.se.alignSummary.txt
cp Q-Y5F6_1M.pe.alignSummary.txt ./NEW_test_data/meta/Q-Y5F6_1M.pe.alignSummary.txt
singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.bam O=Q-Y5F6_1M.se.deduped.bam M=Q-Y5F6_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.deduped.bam
singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
cp Q-Y5F6_1M.se.deduped.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
cp Q-Y5F6_1M.se.sorted.deduped.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
cp Q-Y5F6_1M.se.sorted.deduped.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam.bai
cp Q-Y5F6_1M.se.deduped.Metrics.txt /NEW_test_data/meta/Q-Y5F6_1M.se.deduped.Metrics.txt
cp Q-Y5F6_1M.se.deduped.Metrics.txt ./NEW_test_data/meta/Q-Y5F6_1M.se.deduped.Metrics.txt
for i in {"chr8","chr4","chrY"}; do
echo "samtools view -b ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
cp Q-Y5F6_1M.se.sorted.deduped.chr4.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr4.bam
cp Q-Y5F6_1M.se.sorted.deduped.chr4.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr4.bam.bai
cp Q-Y5F6_1M.se.sorted.deduped.chr8.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr8.bam
cp Q-Y5F6_1M.se.sorted.deduped.chr8.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chr8.bam.bai
cp Q-Y5F6_1M.se.sorted.deduped.chrY.bam ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chrY.bam
cp Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.chrY.bam.bai
mkdir -p ./NEW_test_data/counts
mkdir -p ./NEW_test_data/counts/small
singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5F6_1M.se.featureCounts
cp Q-Y5F6_1M.se.featureCounts ./NEW_test_data/counts/small/Q-Y5F6_1M.se.featureCounts
cp Q-Y5F6_1M.se.countTable.csv ./NEW_test_data/counts/small/Q-Y5F6_1M.se.countTable.csv
mkdir -p ./NEW_test_data/bw
mkdir -p ./NEW_test_data/bw/small
singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam -o Q-Y5F6_1M.se.bw
cp Q-Y5F6_1M.se.bw ./NEW_test_data/bw/small/Q-Y5F6_1M.se.bw
mkdir -p ./NEW_test_data/fastqc
mkdir -p ./NEW_test_data/fastqc/small
singularity run 'docker://bicf/fastqc:2.0.0' ./NEW_test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz -o .
cp Q-Y5F6_1M.R1_fastqc.html ./NEW_test_data/fastqc/small/Q-Y5F6_1M.R1_fastqc.html
cp Q-Y5F6_1M.R1_fastqc.zip ./NEW_test_data/fastqc/small/Q-Y5F6_1M.R1_fastqc.zip
echo -e "geneID\tchrom\ttx_start\ttx_end\tTIN" > Q-Y5F6_1M.se.sorted.deduped.tin.xls
for i in {"chr8","chr4","chrY"}; do
echo "tin.py -i ./NEW_test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.0' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
cp Q-Y5F6_1M.se.sorted.deduped.tin.xls ./NEW_test_data/meta/Q-Y5F6_1M.se.sorted.deduped.tin.xls
workDir = 's3://gudmap-rbk.output/work'
aws.client.storageEncryption = 'AES256'
aws {
region = 'us-east-2'
batch {
cliPath = '/home/ec2-user/miniconda/bin/aws'
}
}
process {
executor = 'awsbatch'
cpus = 1
memory = '1 GB'
withName: trackStart {
cpus = 1
memory = '1 GB'
}
withName: getBag {
cpus = 1
memory = '1 GB'
}
withName: getData {
cpus = 1
memory = '1 GB'
}
withName: parseMetadata {
cpus = 15
memory = '1 GB'
}
withName: trimData {
cpus = 20
memory = '2 GB'
}
withName: getRefInfer {
cpus = 1
memory = '1 GB'
}
withName: downsampleData {
cpus = 1
memory = '1 GB'
}
withName: alignSampleData {
cpus = 50
memory = '5 GB'
}
withName: inferMetadata {
cpus = 5
memory = '1 GB'
}
withName: getRef {
cpus = 1
memory = '1 GB'
}
withName: alignData {
cpus = 50
memory = '10 GB'
}
withName: dedupData {
cpus = 5
memory = '20 GB'
}
withName: countData {
cpus = 2
memory = '5 GB'
}
withName: makeBigWig {
cpus = 15
memory = '5 GB'
}
withName: fastqc {
cpus = 1
memory = '1 GB'
}
withName: dataQC {
cpus = 15
memory = '2 GB'
}
withName: aggrQC {
cpus = 2
memory = '1 GB'
}
}
process {
executor = 'slurm'
queue='super'
queue = 'super'
clusterOptions = '--hold'
// Process specific configuration
withLabel:getData {
executor = 'super'
withName: trackStart {
executor = 'local'
}
withName: getBag {
executor = 'local'
}
withName: getData {
executor = 'local'
}
withName: parseMetadata {
executor = 'local'
}
withName: trimData {
queue = 'super'
}
withName: getRefInfer {
executor = 'local'
}
withName: downsampleData {
executor = 'local'
}
withName: alignSampleData {
queue = 'super'
}
withName: inferMetadata {
queue = 'super'
}
withName: getRef {
executor = 'local'
}
withName: alignData {
queue = '256GB,256GBv1'
}
withName: dedupData {
queue = 'super'
}
withName: countData {
queue = 'super'
}
withName: makeBigWig {
queue = 'super'
}
withName: fastqc {
queue = 'super'
}
withName: dataQC {
queue = 'super'
}
withName: aggrQC {
executor = 'local'
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
singularity {
enabled = true
file = 'timeline.html'
cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
}
report {
enabled = true
file = 'report.html'
env {
http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128'
all_proxy = 'http://proxy.swmed.edu:3128'
}
tower {
accessToken = '3ade8f325d4855434b49aa387421a44c63e3360f'
enabled = true
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment