Skip to content
Snippets Groups Projects
Commit 20daf0f9 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'develop' into '15-auto.bagit.download'

Develop

See merge request !6
parents f6b9f1f2 0147ece8
Branches
Tags
4 merge requests!37v0.0.1,!11Develop,!8Resolve "Add automated download of bagit through deriva",!6Develop
Pipeline #5263 passed with stage
in 58 seconds
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions # Created by https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice
*.so # Edit at https://www.gitignore.io/?templates=r,perl,linux,macos,python,windows,microsoftoffice
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Created by https://www.gitignore.io/api/r,perl,macos,linux,python,windows
# Edit at https://www.gitignore.io/?templates=r,perl,macos,linux,python,windows
### Linux ### ### Linux ###
*~ *~
...@@ -71,6 +45,27 @@ Network Trash Folder ...@@ -71,6 +45,27 @@ Network Trash Folder
Temporary Items Temporary Items
.apdisk .apdisk
### MicrosoftOffice ###
*.tmp
# Word temporary
~$*.doc*
# Word Auto Backup File
Backup of *.doc*
# Excel temporary
~$*.xls*
# Excel Backup File
*.xlk
# PowerPoint temporary
~$*.ppt*
# Visio autosave temporary files
*.~vsd*
### Perl ### ### Perl ###
!Build/ !Build/
.last_cover_stats .last_cover_stats
...@@ -165,15 +160,6 @@ coverage.xml ...@@ -165,15 +160,6 @@ coverage.xml
*.mo *.mo
*.pot *.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff: # Scrapy stuff:
.scrapy .scrapy
...@@ -183,31 +169,22 @@ docs/_build/ ...@@ -183,31 +169,22 @@ docs/_build/
# PyBuilder # PyBuilder
target/ target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv # pyenv
.python-version .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file # celery beat schedule file
celerybeat-schedule celerybeat-schedule
# SageMath parsed files # SageMath parsed files
*.sage.py *.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings # Spyder project settings
.spyderproject .spyderproject
.spyproject .spyproject
...@@ -215,6 +192,11 @@ venv.bak/ ...@@ -215,6 +192,11 @@ venv.bak/
# Rope project settings # Rope project settings
.ropeproject .ropeproject
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# mkdocs documentation # mkdocs documentation
/site /site
...@@ -226,9 +208,6 @@ dmypy.json ...@@ -226,9 +208,6 @@ dmypy.json
# Pyre type checker # Pyre type checker
.pyre/ .pyre/
### Python Patch ###
.venv/
### R ### ### R ###
# History files # History files
.Rhistory .Rhistory
...@@ -237,6 +216,9 @@ dmypy.json ...@@ -237,6 +216,9 @@ dmypy.json
# Session Data files # Session Data files
.RData .RData
# User-specific files
.Ruserdata
# Example code in package build process # Example code in package build process
*-Ex.R *-Ex.R
...@@ -257,7 +239,7 @@ vignettes/*.pdf ...@@ -257,7 +239,7 @@ vignettes/*.pdf
.httr-oauth .httr-oauth
# knitr and R markdown default cache directories # knitr and R markdown default cache directories
/*_cache/ *_cache/
/cache/ /cache/
# Temporary files created by R markdown # Temporary files created by R markdown
...@@ -271,6 +253,7 @@ vignettes/*.pdf ...@@ -271,6 +253,7 @@ vignettes/*.pdf
### Windows ### ### Windows ###
# Windows thumbnail cache files # Windows thumbnail cache files
Thumbs.db Thumbs.db
Thumbs.db:encryptable
ehthumbs.db ehthumbs.db
ehthumbs_vista.db ehthumbs_vista.db
...@@ -293,7 +276,7 @@ $RECYCLE.BIN/ ...@@ -293,7 +276,7 @@ $RECYCLE.BIN/
# Windows shortcuts # Windows shortcuts
*.lnk *.lnk
# End of https://www.gitignore.io/api/r,perl,macos,linux,python,windows # End of https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice
# nextflow analysis folders/files # nextflow analysis folders/files
/test_data/* /test_data/*
......
before_script:
- module add python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load singularity/3.0.2
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/cookies/deriva-cookies.txt ./test_data/
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/File_Q-Y53P.zip ./test_data/
stages:
- unit
unit:
stage: unit
script:
- singularity run 'docker://bicf/gudmaprbkfilexfer:1.1' bdbag --materialize ./test_data/File_Q-Y53P.zip
- singularity run 'docker://bicf/gudmaprbkfilexfer:1.1' bdbag --validate full ./test_data/File_Q-Y53P/
- if [[ $(md5sum test_data/File_Q-Y53P/data/assets/Study/Q-Y4H0/Experiment/Q-Y4BY/Replicate/Q-Y5F8/hMARIS_SIX2+_RiboDep#1.gene.rpkm.txt | awk '{ print $1 }') != $(cat test_data/File_Q-Y53P/data/File.csv | cut -d ',' -f10 | tail -1) ]]; then exit 21; fi;
...@@ -2,5 +2,6 @@ ...@@ -2,5 +2,6 @@
**User Facing** **User Facing**
**Background** **Background**
* Implementation of CI
*Known Bugs* *Known Bugs*
...@@ -5,10 +5,14 @@ process { ...@@ -5,10 +5,14 @@ process {
// Process specific configuration // Process specific configuration
withName:splitData { withName:splitData {
container = 'docker://bicf/bdbag:1.0' container = 'docker://bicf/gudmaprbkfilexfer:1.1'
} }
withName:getData { withName:getData {
container = 'docker://bicf/bdbag:1.0' container = 'docker://bicf/gudmaprbkfilexfer:1.1'
}
withName:trimData {
container = 'docker://bicf/trimgalore:1.1'
queue = '256GB,256GBv1,384GB'
} }
} }
......
name: bdbag
dependencies:
- pandas=0.23.3=py36_0
- pip:
- bdbag==1.5.5
...@@ -13,12 +13,15 @@ bdbag = Channel ...@@ -13,12 +13,15 @@ bdbag = Channel
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" } .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
outDir = params.outDir outDir = params.outDir
logsDir = "${outDir}/Logs"
/* /*
* splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid * splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
*/ */
process splitData { process splitData {
tag "${bdbag.baseName}" tag "${bdbag.baseName}"
executor 'local'
publishDir "${logsDir}/splitData", mode: 'symlink', pattern: "${bdbag.baseName}.splitData.err"
input: input:
file bdbag file bdbag
...@@ -29,23 +32,24 @@ process splitData { ...@@ -29,23 +32,24 @@ process splitData {
file("${bdbag.baseName}/data/File.csv") into fileMeta file("${bdbag.baseName}/data/File.csv") into fileMeta
file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
file ("${bdbag.baseName}.splitData.err")
script: script:
""" """
hostname hostname >> ${bdbag.baseName}.splitData.err
ulimit -a ulimit -a >> ${bdbag.baseName}.splitData.err
ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt 2>>${bdbag.baseName}.splitData.err
echo "LOG: deriva cookie linked" echo "LOG: deriva cookie linked" >> ${bdbag.baseName}.splitData.err
study=`echo "${bdbag}" | cut -d '.' -f1` study=`echo "${bdbag}" | cut -d '.' -f1` 2>>${bdbag.baseName}.splitData.err
echo "LOG: \${study}" echo "LOG: \${study}" >> ${bdbag.baseName}.splitData.err
unzip ${bdbag} unzip ${bdbag} 2>>${bdbag.baseName}.splitData.err
echo "LOG: bdgag unzipped" echo "LOG: bdgag unzipped" >> ${bdbag.baseName}.splitData.err
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: fetch file filtered for only .fastq.gz" echo "LOG: fetch file filtered for only .fastq.gz" >> ${bdbag.baseName}.splitData.err
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: fetch file split by replicates" echo "LOG: fetch file split by replicates" >> ${bdbag.baseName}.splitData.err
sh ${baseDir}/scripts/splitBag.sh \${study} sh ${baseDir}/scripts/splitBag.sh \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: bag recreated with replicate split fetch file" echo "LOG: bag recreated with replicate split fetch file" >> ${bdbag.baseName}.splitData.err
""" """
} }
...@@ -54,24 +58,45 @@ process splitData { ...@@ -54,24 +58,45 @@ process splitData {
*/ */
process getData { process getData {
tag "${rep.baseName}" tag "${rep.baseName}"
publishDir "${outDir}/tempOut/fastqs", mode: "symlink" publishDir "${logsDir}/getData", mode: 'symlink', pattern: "${rep.baseName}.getData.err"
input: input:
each rep from bdbagSplit each rep from bdbagSplit
output: output:
path ("*.R*.fastq.gz", type: 'file', maxDepth: '0') into fastq set val ("${rep.baseName}"), file ("*.R{1,2}.fastq.gz") into trimming
script: script:
""" """
hostname hostname >>${rep.baseName}.getData.err
ulimit -a ulimit -a >>${rep.baseName}.getData.err
export https_proxy=\${http_proxy} export https_proxy=\${http_proxy}
replicate=\$(basename "${rep}" | cut -d '.' -f1) replicate=\$(basename "${rep}" | cut -d '.' -f1)
echo "LOG: \${replicate}" echo "LOG: \${replicate}" >>${rep.baseName}.getData.err
unzip ${rep} unzip ${rep} 2>>${rep.baseName}.getData.err
echo "LOG: replicate bdbag unzipped" echo "LOG: replicate bdbag unzipped" >>${rep.baseName}.getData.err
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} 2>>${rep.baseName}.getData.err
echo "LOG: replicate bdbag fetched" echo "LOG: replicate bdbag fetched" >>${rep.baseName}.getData.err
""" """
} }
/*
* trimData: trims any adapter or non-host sequences from the data
*/
process trimData {
tag "trim-${repID}"
publishDir "${outDir}/tempOut/trimmed", mode: "symlink", pattern: "*_val_{1,2}.fq.gz"
publishDir "${logsDir}/trimData", mode: 'symlink', pattern: "\${rep}.trimData.*"
input:
set repID, reads from trimming
output:
path ("*_val_{1,2}.fq.gz", type: 'file', maxDepth: '0')
script:
"""
rep=`echo ${repID} | cut -f2- -d '_'`;
trim_galore --gzip --max_n 1 --paired --basename \${rep} -j `nproc` ${reads[0]} ${reads[1]} 1>>\${rep}.trimData.log 2>>\${rep}.trimData.err;
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment