diff --git a/.gitignore b/.gitignore index 8b4b1eadf6253fc94cefe75b485a051ed8f3d71e..9b75a201f875574a221bd0e4bf073e5a0d0db406 100644 --- a/.gitignore +++ b/.gitignore @@ -1,32 +1,6 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg - -# PyInstaller -# Created by https://www.gitignore.io/api/r,perl,macos,linux,python,windows -# Edit at https://www.gitignore.io/?templates=r,perl,macos,linux,python,windows +# Created by https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice +# Edit at https://www.gitignore.io/?templates=r,perl,linux,macos,python,windows,microsoftoffice ### Linux ### *~ @@ -71,6 +45,27 @@ Network Trash Folder Temporary Items .apdisk +### MicrosoftOffice ### +*.tmp + +# Word temporary +~$*.doc* + +# Word Auto Backup File +Backup of *.doc* + +# Excel temporary +~$*.xls* + +# Excel Backup File +*.xlk + +# PowerPoint temporary +~$*.ppt* + +# Visio autosave temporary files +*.~vsd* + ### Perl ### !Build/ .last_cover_stats @@ -165,15 +160,6 @@ coverage.xml *.mo *.pot -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - # Scrapy stuff: .scrapy @@ -183,31 +169,22 @@ docs/_build/ # PyBuilder target/ -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - # pyenv .python-version +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - # Spyder project settings .spyderproject .spyproject @@ -215,6 +192,11 @@ venv.bak/ # Rope project settings .ropeproject +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + # mkdocs documentation /site @@ -226,9 +208,6 @@ dmypy.json # Pyre type checker .pyre/ -### Python Patch ### -.venv/ - ### R ### # History files .Rhistory @@ -237,6 +216,9 @@ dmypy.json # Session Data files .RData +# User-specific files +.Ruserdata + # Example code in package build process *-Ex.R @@ -257,7 +239,7 @@ vignettes/*.pdf .httr-oauth # knitr and R markdown default cache directories -/*_cache/ +*_cache/ /cache/ # Temporary files created by R markdown @@ -271,6 +253,7 @@ vignettes/*.pdf ### Windows ### # Windows thumbnail cache files Thumbs.db +Thumbs.db:encryptable ehthumbs.db ehthumbs_vista.db @@ -293,7 +276,7 @@ $RECYCLE.BIN/ # Windows shortcuts *.lnk -# End of https://www.gitignore.io/api/r,perl,macos,linux,python,windows +# End of https://www.gitignore.io/api/r,perl,linux,macos,python,windows,microsoftoffice # nextflow analysis folders/files /test_data/* diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..1181343f1bb3f9085eacb57fabe343921489bf98 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,16 @@ +before_script: + - module add python/3.6.1-2-anaconda + - pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1 + - module load singularity/3.0.2 + - ln -sfn /project/BICF/BICF_Core/shared/gudmap/cookies/deriva-cookies.txt ./test_data/ + - ln -sfn /project/BICF/BICF_Core/shared/gudmap/test_data/File_Q-Y53P.zip ./test_data/ + +stages: + - unit + +unit: + stage: unit + script: + - singularity run 'docker://bicf/gudmaprbkfilexfer:1.1' bdbag --materialize ./test_data/File_Q-Y53P.zip + - singularity run 'docker://bicf/gudmaprbkfilexfer:1.1' bdbag --validate full ./test_data/File_Q-Y53P/ + - if [[ $(md5sum test_data/File_Q-Y53P/data/assets/Study/Q-Y4H0/Experiment/Q-Y4BY/Replicate/Q-Y5F8/hMARIS_SIX2+_RiboDep#1.gene.rpkm.txt | awk '{ print $1 }') != $(cat test_data/File_Q-Y53P/data/File.csv | cut -d ',' -f10 | tail -1) ]]; then exit 21; fi; diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ff0911406a181f08e86e495c7530f87d6f43dae..2dec9e320556503056f41874817d032ad618f3af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,5 +2,6 @@ **User Facing** **Background** +* Implementation of CI *Known Bugs* diff --git a/workflow/conf/biohpc.config b/workflow/conf/biohpc.config index 0cd366baa86dc8991645466f58773455d2d2c24d..13368ad2ec3ba4a6696dac0731de81f00bb3c479 100755 --- a/workflow/conf/biohpc.config +++ b/workflow/conf/biohpc.config @@ -5,10 +5,14 @@ process { // Process specific configuration withName:splitData { - container = 'docker://bicf/bdbag:1.0' + container = 'docker://bicf/gudmaprbkfilexfer:1.1' } withName:getData { - container = 'docker://bicf/bdbag:1.0' + container = 'docker://bicf/gudmaprbkfilexfer:1.1' + } + withName:trimData { + container = 'docker://bicf/trimgalore:1.1' + queue = '256GB,256GBv1,384GB' } } diff --git a/workflow/conf/conda.env.bdbag.yml b/workflow/conf/conda.env.bdbag.yml deleted file mode 100644 index 33361d301b3fac561fa39807e3c740583e57d28b..0000000000000000000000000000000000000000 --- a/workflow/conf/conda.env.bdbag.yml +++ /dev/null @@ -1,5 +0,0 @@ -name: bdbag -dependencies: - - pandas=0.23.3=py36_0 - - pip: - - bdbag==1.5.5 diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 4f1fd5f249b3bf7e25388a7389675fd100c9b18c..035faa8ee8b3f2b95f298e4edc8f074a0b695587 100755 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -13,12 +13,15 @@ bdbag = Channel .ifEmpty { exit 1, "bdbag zip file not found: ${params.bdbag}" } outDir = params.outDir +logsDir = "${outDir}/Logs" /* * splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid */ process splitData { tag "${bdbag.baseName}" + executor 'local' + publishDir "${logsDir}/splitData", mode: 'symlink', pattern: "${bdbag.baseName}.splitData.err" input: file bdbag @@ -29,23 +32,24 @@ process splitData { file("${bdbag.baseName}/data/File.csv") into fileMeta file("${bdbag.baseName}/data/Experiment Settings.csv") into experimentSettingsMeta file("${bdbag.baseName}/data/Experiment.csv") into experimentMeta + file ("${bdbag.baseName}.splitData.err") script: """ - hostname - ulimit -a - ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt - echo "LOG: deriva cookie linked" - study=`echo "${bdbag}" | cut -d '.' -f1` - echo "LOG: \${study}" - unzip ${bdbag} - echo "LOG: bdgag unzipped" - python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} - echo "LOG: fetch file filtered for only .fastq.gz" - python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} - echo "LOG: fetch file split by replicates" - sh ${baseDir}/scripts/splitBag.sh \${study} - echo "LOG: bag recreated with replicate split fetch file" + hostname >> ${bdbag.baseName}.splitData.err + ulimit -a >> ${bdbag.baseName}.splitData.err + ln -sf `readlink -e cookies.txt` ~/.bdbag/deriva-cookies.txt 2>>${bdbag.baseName}.splitData.err + echo "LOG: deriva cookie linked" >> ${bdbag.baseName}.splitData.err + study=`echo "${bdbag}" | cut -d '.' -f1` 2>>${bdbag.baseName}.splitData.err + echo "LOG: \${study}" >> ${bdbag.baseName}.splitData.err + unzip ${bdbag} 2>>${bdbag.baseName}.splitData.err + echo "LOG: bdgag unzipped" >> ${bdbag.baseName}.splitData.err + python3 ${baseDir}/scripts/modifyFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err + echo "LOG: fetch file filtered for only .fastq.gz" >> ${bdbag.baseName}.splitData.err + python3 ${baseDir}/scripts/splitFetch.py --fetchFile \${study} 2>>${bdbag.baseName}.splitData.err + echo "LOG: fetch file split by replicates" >> ${bdbag.baseName}.splitData.err + sh ${baseDir}/scripts/splitBag.sh \${study} 2>>${bdbag.baseName}.splitData.err + echo "LOG: bag recreated with replicate split fetch file" >> ${bdbag.baseName}.splitData.err """ } @@ -54,24 +58,45 @@ process splitData { */ process getData { tag "${rep.baseName}" - publishDir "${outDir}/tempOut/fastqs", mode: "symlink" + publishDir "${logsDir}/getData", mode: 'symlink', pattern: "${rep.baseName}.getData.err" input: each rep from bdbagSplit output: - path ("*.R*.fastq.gz", type: 'file', maxDepth: '0') into fastq + set val ("${rep.baseName}"), file ("*.R{1,2}.fastq.gz") into trimming script: """ - hostname - ulimit -a + hostname >>${rep.baseName}.getData.err + ulimit -a >>${rep.baseName}.getData.err export https_proxy=\${http_proxy} replicate=\$(basename "${rep}" | cut -d '.' -f1) - echo "LOG: \${replicate}" - unzip ${rep} - echo "LOG: replicate bdbag unzipped" - sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} - echo "LOG: replicate bdbag fetched" + echo "LOG: \${replicate}" >>${rep.baseName}.getData.err + unzip ${rep} 2>>${rep.baseName}.getData.err + echo "LOG: replicate bdbag unzipped" >>${rep.baseName}.getData.err + sh ${baseDir}/scripts/bdbagFetch.sh \${replicate} 2>>${rep.baseName}.getData.err + echo "LOG: replicate bdbag fetched" >>${rep.baseName}.getData.err """ - } +} + +/* + * trimData: trims any adapter or non-host sequences from the data +*/ +process trimData { + tag "trim-${repID}" + publishDir "${outDir}/tempOut/trimmed", mode: "symlink", pattern: "*_val_{1,2}.fq.gz" + publishDir "${logsDir}/trimData", mode: 'symlink', pattern: "\${rep}.trimData.*" + + input: + set repID, reads from trimming + + output: + path ("*_val_{1,2}.fq.gz", type: 'file', maxDepth: '0') + + script: + """ + rep=`echo ${repID} | cut -f2- -d '_'`; + trim_galore --gzip --max_n 1 --paired --basename \${rep} -j `nproc` ${reads[0]} ${reads[1]} 1>>\${rep}.trimData.log 2>>\${rep}.trimData.err; + """ +}