Skip to content
Snippets Groups Projects
Commit 20daf0f9 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'develop' into '15-auto.bagit.download'

Develop

See merge request !6
parents f6b9f1f2 0147ece8
Branches
Tags
4 merge requests!37v0.0.1,!11Develop,!8Resolve "Add automated download of bagit through deriva",!6Develop
Pipeline #5263 passed with stage
in 58 seconds
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Created by https://www.gitignore.io/api/r,perl,macos,linux,python,windows
# Edit at https://www.gitignore.io/?templates=r,perl,macos,linux,python,windows
# Created by https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice
# Edit at https://www.gitignore.io/?templates=r,perl,linux,macos,python,windows,microsoftoffice
### Linux ###
*~
......@@ -71,6 +45,27 @@ Network Trash Folder
Temporary Items
.apdisk
### MicrosoftOffice ###
*.tmp
# Word temporary
~$*.doc*
# Word Auto Backup File
Backup of *.doc*
# Excel temporary
~$*.xls*
# Excel Backup File
*.xlk
# PowerPoint temporary
~$*.ppt*
# Visio autosave temporary files
*.~vsd*
### Perl ###
!Build/
.last_cover_stats
......@@ -165,15 +160,6 @@ coverage.xml
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
......@@ -183,31 +169,22 @@ docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
......@@ -215,6 +192,11 @@ venv.bak/
# Rope project settings
.ropeproject
# Mr Developer
.mr.developer.cfg
.project
.pydevproject
# mkdocs documentation
/site
......@@ -226,9 +208,6 @@ dmypy.json
# Pyre type checker
.pyre/
### Python Patch ###
.venv/
### R ###
# History files
.Rhistory
......@@ -237,6 +216,9 @@ dmypy.json
# Session Data files
.RData
# User-specific files
.Ruserdata
# Example code in package build process
*-Ex.R
......@@ -257,7 +239,7 @@ vignettes/*.pdf
.httr-oauth
# knitr and R markdown default cache directories
/*_cache/
*_cache/
/cache/
# Temporary files created by R markdown
......@@ -271,6 +253,7 @@ vignettes/*.pdf
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
......@@ -293,7 +276,7 @@ $RECYCLE.BIN/
# Windows shortcuts
*.lnk
# End of https://www.gitignore.io/api/r,perl,macos,linux,python,windows
# End of https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice
# nextflow analysis folders/files
/test_data/*
......
before_script:
- module add python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load singularity/3.0.2
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/cookies/deriva-cookies.txt ./test_data/
- ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/File_Q-Y53P.zip ./test_data/
stages:
- unit
unit:
stage: unit
script:
- singularity run 'docker://bicf/gudmaprbkfilexfer:1.1' bdbag --materialize ./test_data/File_Q-Y53P.zip
- singularity run 'docker://bicf/gudmaprbkfilexfer:1.1' bdbag --validate full ./test_data/File_Q-Y53P/
- if [[ $(md5sum test_data/File_Q-Y53P/data/assets/Study/Q-Y4H0/Experiment/Q-Y4BY/Replicate/Q-Y5F8/hMARIS_SIX2+_RiboDep#1.gene.rpkm.txt | awk '{ print $1 }') != $(cat test_data/File_Q-Y53P/data/File.csv | cut -d ',' -f10 | tail -1) ]]; then exit 21; fi;
......@@ -2,5 +2,6 @@
**User Facing**
**Background**
* Implementation of CI
*Known Bugs*
......@@ -5,10 +5,14 @@ process {
// Process specific configuration
withName:splitData {
container = 'docker://bicf/bdbag:1.0'
container = 'docker://bicf/gudmaprbkfilexfer:1.1'
}
withName:getData {
container = 'docker://bicf/bdbag:1.0'
container = 'docker://bicf/gudmaprbkfilexfer:1.1'
}
withName:trimData {
container = 'docker://bicf/trimgalore:1.1'
queue = '256GB,256GBv1,384GB'
}
}
......
name: bdbag
dependencies:
- pandas=0.23.3=py36_0
- pip:
- bdbag==1.5.5
......@@ -13,12 +13,15 @@ bdbag = Channel
.ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" }
outDir = params.outDir
logsDir = "${outDir}/Logs"
/*
* splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
*/
process splitData {
tag "${bdbag.baseName}"
executor 'local'
publishDir "${logsDir}/splitData", mode: 'symlink', pattern: "${bdbag.baseName}.splitData.err"
input:
file bdbag
......@@ -29,23 +32,24 @@ process splitData {
file("${bdbag.baseName}/data/File.csv") into fileMeta
file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta
file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta
file ("${bdbag.baseName}.splitData.err")
script:
"""
hostname
ulimit -a
ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt
echo "LOG: deriva cookie linked"
study=`echo "${bdbag}" | cut -d '.' -f1`
echo "LOG: \${study}"
unzip ${bdbag}
echo "LOG: bdgag unzipped"
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study}
echo "LOG: fetch file filtered for only .fastq.gz"
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study}
echo "LOG: fetch file split by replicates"
sh ${baseDir}/scripts/splitBag.sh \${study}
echo "LOG: bag recreated with replicate split fetch file"
hostname >> ${bdbag.baseName}.splitData.err
ulimit -a >> ${bdbag.baseName}.splitData.err
ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt 2>>${bdbag.baseName}.splitData.err
echo "LOG: deriva cookie linked" >> ${bdbag.baseName}.splitData.err
study=`echo "${bdbag}" | cut -d '.' -f1` 2>>${bdbag.baseName}.splitData.err
echo "LOG: \${study}" >> ${bdbag.baseName}.splitData.err
unzip ${bdbag} 2>>${bdbag.baseName}.splitData.err
echo "LOG: bdgag unzipped" >> ${bdbag.baseName}.splitData.err
python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: fetch file filtered for only .fastq.gz" >> ${bdbag.baseName}.splitData.err
python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: fetch file split by replicates" >> ${bdbag.baseName}.splitData.err
sh ${baseDir}/scripts/splitBag.sh \${study} 2>>${bdbag.baseName}.splitData.err
echo "LOG: bag recreated with replicate split fetch file" >> ${bdbag.baseName}.splitData.err
"""
}
......@@ -54,24 +58,45 @@ process splitData {
*/
process getData {
tag "${rep.baseName}"
publishDir "${outDir}/tempOut/fastqs", mode: "symlink"
publishDir "${logsDir}/getData", mode: 'symlink', pattern: "${rep.baseName}.getData.err"
input:
each rep from bdbagSplit
output:
path ("*.R*.fastq.gz", type: 'file', maxDepth: '0') into fastq
set val ("${rep.baseName}"), file ("*.R{1,2}.fastq.gz") into trimming
script:
"""
hostname
ulimit -a
hostname >>${rep.baseName}.getData.err
ulimit -a >>${rep.baseName}.getData.err
export https_proxy=\${http_proxy}
replicate=\$(basename "${rep}" | cut -d '.' -f1)
echo "LOG: \${replicate}"
unzip ${rep}
echo "LOG: replicate bdbag unzipped"
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate}
echo "LOG: replicate bdbag fetched"
echo "LOG: \${replicate}" >>${rep.baseName}.getData.err
unzip ${rep} 2>>${rep.baseName}.getData.err
echo "LOG: replicate bdbag unzipped" >>${rep.baseName}.getData.err
sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} 2>>${rep.baseName}.getData.err
echo "LOG: replicate bdbag fetched" >>${rep.baseName}.getData.err
"""
}
}
/*
* trimData: trims any adapter or non-host sequences from the data
*/
process trimData {
tag "trim-${repID}"
publishDir "${outDir}/tempOut/trimmed", mode: "symlink", pattern: "*_val_{1,2}.fq.gz"
publishDir "${logsDir}/trimData", mode: 'symlink', pattern: "\${rep}.trimData.*"
input:
set repID, reads from trimming
output:
path ("*_val_{1,2}.fq.gz", type: 'file', maxDepth: '0')
script:
"""
rep=`echo ${repID} | cut -f2- -d '_'`;
trim_galore --gzip --max_n 1 --paired --basename \${rep} -j `nproc` ${reads[0]} ${reads[1]} 1>>\${rep}.trimData.log 2>>\${rep}.trimData.err;
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment