Skip to content
Snippets Groups Projects
Commit 5677b2e2 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Major cleanup update

parent 4da781e7
2 merge requests!73Develop,!71De containerize
Pipeline #7378 passed with stages
in 9 minutes and 5 seconds
......@@ -3,7 +3,7 @@ before_script:
- module load python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load singularity/3.0.2
- module load nextflow/19.09.0
- module load nextflow/20.01.0
- mkdir -p test_data/hu.v2s1r500
- mkdir -p test_data/hu.v3s1r500
- mkdir -p test_data/mu.v3s1r500
......@@ -30,7 +30,7 @@ astrocyte_cli:
artifacts:
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -52,7 +52,7 @@ astrocyte_cli:
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -74,7 +74,7 @@ astrocyte_cli:
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -96,7 +96,7 @@ astrocyte_cli:
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -120,7 +120,7 @@ astrocyte_cli:
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -144,7 +144,7 @@ GRCh38-3.0.0:
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -168,7 +168,7 @@ mm10-3.0.0:
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -191,6 +191,6 @@ mm10-3.0.0:
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -111,8 +111,8 @@ To Run:
```
* Design example:
| Sample | fastq_R1 | fastq_R2 |
|---------|------------------------------------|------------------------------------|
| Sample | fastq_R1 | fastq_R2 |
|--------|----------|----------|
| sample1 | pbmc_1k_v2_S1_L001_R1_001.fastq.gz | pbmc_1k_v2_S1_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L001_R1_001.fastq.gz | pbmc_1k_v2_S2_L001_R2_001.fastq.gz |
| sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz |
......
### References
1. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
1. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
2. **cellranger**
* Cellranger count [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/count)
3. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
3. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
4. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
4. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
workDir = 's3://'
workDir = 's3://gudmap.rbk/work'
aws.client.storageEncryption = 'AES256'
aws {
region = ''
......@@ -9,7 +9,6 @@ aws {
process {
executor = 'awsbatch'
queue = 'default-'
cpus = 1
memory = '1 GB'
......
process {
queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
}
process {
queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
}
......@@ -8,6 +8,15 @@ main.nf
*
*/
// ######## #### ###### ########
// ## ## ## ## ## ##
// ## ## ## ## ##
// ######## ## ## ######
// ## ## ## ## ##
// ## ## ## ## ## ##
// ######## #### ###### ##
// Define Input variables
params.name = "run"
params.fastq = "test_data/mu.v3s1r500/*.fastq.gz"
......@@ -55,6 +64,7 @@ if (params.astrocyte) {
params.genomeLocationFull = params.genomeLocation+params.genome
// Define variables from input
pipelineVersion = "2.x.x-indev"
name = params.name
designLocation = Channel
.fromPath(params.designFile)
......@@ -81,6 +91,7 @@ references = "${baseDir}/../docs/references.md"
* trackStart: track start of pipeline
*/
params.ci = false
params.dev = false
process trackStart {
script:
"""
......@@ -91,11 +102,13 @@ process trackStart {
curl -H 'Content-Type: application/json' -X PUT -d '{ \
"sessionId": "${workflow.sessionId}", \
"pipeline": "cellranger_count", \
"pipelineVersion": "${pipelineVersion}", \
"start": "${workflow.start}", \
"astrocyte": ${params.astrocyte}, \
"status": "started", \
"nextflowVersion": "${workflow.nextflow.version}",
"ci": ${params.ci}}' \
"ci": ${params.ci},
"dev": ${params.dev}}' \
"https://xku43pcwnf.execute-api.us-east-1.amazonaws.com/ProdDeploy/pipeline-tracking"
"""
}
......@@ -105,7 +118,6 @@ process trackStart {
*/
process checkDesignFile {
tag "${name}"
container = 'bicf/python3:2.0.0'
input:
file designLocation
......@@ -164,10 +176,9 @@ chemistryParam310 = chemistryParam
* count211: run cellranger count version 2.1.1
*/
process count211 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
container 'bicf/cellranger2.1.1:2.0.0'
queue '128GB,256GB,256GBv1,384GB'
input:
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211
......@@ -213,7 +224,6 @@ process count220 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
container 'bicf/cellranger2.2.0:2.0.0'
input:
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples220
......@@ -256,10 +266,9 @@ process count220 {
* count302: run cellranger count version 3.0.2
*/
process count302 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
container 'bicf/cellranger3.0.2:2.0.0'
queue '128GB,256GB,256GBv1,384GB'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302
......@@ -302,10 +311,9 @@ process count302 {
* count310: run cellranger count version 3.1.0
*/
process count310 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
container 'bicf/cellranger3.1.0:2.0.0'
queue '128GB,256GB,256GBv1,384GB'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples310
......@@ -345,13 +353,13 @@ process count310 {
}
/*
* versions: collect too versions into a single yml
* versions: collect all versions into a single yml
*/
process versions {
tag "${name}"
container 'bicf/python3:2.0.0'
input:
file versions_pythonScript
output:
file("*.yaml") into yamlPaths
......@@ -359,10 +367,12 @@ process versions {
script:
"""
hostname
ulimit -u 16384
ulimit -a
echo ${workflow.nextflow.version} > version_nextflow.txt
echo ${version} > version_cellranger.txt
multiqc --version | tr -d 'multiqc, version ' > version_multiqc.txt
echo "${workflow.nextflow.version}" > version_nextflow.txt
echo "${pipelineVersion}" > version_pipeline.txt
echo "${version}" > version_cellranger.txt
bash versions_python.sh > version_python.txt
python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions
python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references
"""
......
profiles {
standard {
includeConfig 'configs/biohpc.config'
}
biohpc {
includeConfig 'conf/biohpc.config'
includeConfig 'configs/biohpc.config'
}
local {
includeConfig 'conf/local.config'
includeConfig 'configs/local.config'
}
cluster {
includeConfig 'conf/cluster.config'
includeConfig 'configs/cluster.config'
}
aws {
includeConfig 'conf/aws.config'
includeConfig 'configs/aws.config'
}
ondemand {
includeConfig 'configs/ondemand.config'
}
spot {
includeConfig 'configs/spot.config'
}
}
......@@ -62,6 +71,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count'
description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.'
mainScript = 'main.nf'
version = 'publish_2.0.4'
version = '2.x.x-indev'
nextflowVersion = '>=0.31.0'
}
......@@ -24,9 +24,10 @@ logger.propagate = False
logger.setLevel(logging.INFO)
SOFTWARE_REGEX = {
'Pipeline': ['version_pipeline.txt', r"(\S+)"],
'Nextflow': ['version_nextflow.txt', r"(\S+)"],
'Cellranger Count': ['version_cellranger.txt', r"(\S+)"],
'MultiQC': ['version_multiqc.txt', r"(\S+)"],
'cellranger count': ['version_cellranger.txt', r"(\S+)"],
'python': ['version_python.txt', r"(\S+)"],
}
......@@ -72,9 +73,10 @@ def main():
out_filename = output + '_mqc.yaml'
results = OrderedDict()
results['Pipeline'] = '<span style="color:#999999;\">N/A</span>'
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['Cellranger Count'] = '<span style="color:#999999;\">N/A</span>'
results['MultiQC'] = '<span style="color:#999999;\">N/A</span>'
results['cellranger count'] = '<span style="color:#999999;\">N/A</span>'
results['python'] = '<span style="color:#999999;\">N/A</span>'
# Check for version files:
check_files(files)
......@@ -106,4 +108,4 @@ def main():
if __name__ == '__main__':
main()
\ No newline at end of file
main()
#!/bin/bash
#versions_python.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
python --version |& grep 'Python ' | sed -n -e 's/^Python //p'
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment