Skip to content
Snippets Groups Projects
Commit 0f50ff0a authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch 'develop' into 'master'

Develop

See merge request !63
parents caecad18 e634ce8e
Branches
Tags
2 merge requests!64Master,!63Develop
Pipeline #6565 passed with stages
in 47 minutes and 46 seconds
Showing with 366 additions and 269 deletions
......@@ -2,12 +2,15 @@ before_script:
- module load astrocyte
- module load python/3.6.1-2-anaconda
- pip install --user pytest-pythonpath==0.7.1 pytest-cov==2.5.1
- module load nextflow/0.31.1_Ignite
- mkdir test_data/hu.v3s1r500
- mkdir test_data/mu.v3s1r500
- mkdir test_data/hu.v3s2r10k
- mkdir test_data/mu.v3s2r10k
- mkdir test_data/hu.v2s2r10k
- module load singularity/3.0.2
- module load nextflow/19.09.0
- mkdir -p test_data/hu.v2s1r500
- mkdir -p test_data/hu.v3s1r500
- mkdir -p test_data/mu.v3s1r500
- mkdir -p test_data/hu.v3s2r10k
- mkdir -p test_data/mu.v3s2r10k
- mkdir -p test_data/hu.v2s2r10k
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s1r500/* test_data/hu.v2s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s1r500/* test_data/hu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/mu.v3s1r500/* test_data/mu.v3s1r500/
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/* test_data/hu.v3s2r10k/
......@@ -15,14 +18,15 @@ before_script:
- ln -sfn /project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v2s2r10k/* test_data/hu.v2s2r10k/
stages:
- astrocyte
- simple
- detailed
- astrocyte_test
- container_test
- reference_test
- multiSample_test
astrocyte_check:
stage: astrocyte
astrocyte_cli:
stage: astrocyte_test
script:
- astrocyte_cli check ../cellranger_count
- astrocyte_cli check .
artifacts:
expire_in: 2 days
retry:
......@@ -30,33 +34,30 @@ astrocyte_check:
when:
- always
simple_1:
stage: simple
2.1.1_test:
stage: container_test
only:
- branches
- tags
except:
refs:
- develop
- master
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.1.0'
- pytest -m count310
- singularity run 'docker://bicf/cellranger2.1.1:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
when:
- always
simple_2:
stage: simple
2.2.2_test:
stage: container_test
only:
- branches
except:
......@@ -65,63 +66,81 @@ simple_2:
- master
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v3s1r500/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v3s1r500/design.csv" --genome 'mm10-3.0.0' --kitVersion 'three' --version '3.1.0'
- pytest -m count310
- singularity run 'docker://bicf/cellranger2.2.0:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
when:
- always
detailed_1:
stage: detailed
3.0.2_test:
stage: container_test
only:
- develop
- master
- branches
except:
refs:
- develop
- master
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0'
- pytest -m count310
- singularity run 'docker://bicf/cellranger3.0.2:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
when:
- always
detailed_2:
stage: detailed
3.1.0_test:
stage: container_test
only:
- develop
- master
- branches
except:
refs:
refs:
- develop
- master
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.0.2'
- pytest -m count302
- singularity run 'docker://bicf/cellranger3.1.0:2.0.0' cellranger count --id=test --transcriptome=/project/apps_database/cellranger/refdata-cellranger-GRCh38-3.0.0 --fastqs=./test_data/hu.v2s1r500 --sample=pbmc_1k_v2 --chemistry=SC3Pv2
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- develop
- master
- test/outs/web_summary.html
expire_in: 2 days
retry:
max: 1
when:
- always
GRCh38-3.0.0:
stage: reference_test
only:
refs:
- develop
- master
except:
- tags
script:
- nextflow -q run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s1r500/*.fastq.gz" --designFile "test_data/hu.v3s1r500/design.csv" --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0'
- pytest -m count310
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count302/sample1/outs/web_summary.html
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
......@@ -129,23 +148,23 @@ detailed_2:
when:
- always
detailed_3:
stage: detailed
mm10-3.0.0:
stage: reference_test
only:
- develop
- master
except:
refs:
- tags
- develop
- master
except:
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/mu.v3s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/mu.v3s2r10k/design.csv" --genome 'mm10-3.0.0' --kitVersion 'three' --version '3.0.1'
- pytest -m count301
- nextflow -q run workflow/main.nf -profile biohpc,cluster --fastq "test_data/mu.v3s1r500/*.fastq.gz" --designFile "test_data/mu.v3s1r500/design.csv" --genome 'mm10-3.0.0' --kitVersion '3GEXv3' --version '3.1.0'
- pytest -m count310
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count301/sample1/outs/web_summary.html
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
......@@ -153,23 +172,22 @@ detailed_3:
when:
- always
detailed_4:
stage: detailed
2Samples:
stage: multiSample_test
only:
- develop
- master
except:
refs:
refs:
- master
- tags
script:
- nextflow run workflow/main.nf --fastq "$CI_PROJECT_DIR/test_data/hu.v2s2r10k/*.fastq.gz" --designFile "$CI_PROJECT_DIR/test_data/hu.v2s2r10k/design.csv" --genome 'GRCh38-1.2.0' --kitVersion 'two' --version '2.1.1'
- pytest -m count211
- nextflow -q run workflow/main.nf -profile biohpc,cluster --fastq "test_data/hu.v3s2r10k/*.fastq.gz" --designFile "test_data/hu.v3s2r10k/design.csv" --genome 'GRCh38-3.0.0' --kitVersion 'auto' --version '3.1.0'
- pytest -m count310
artifacts:
name: "$CI_JOB_NAME"
when: always
paths:
- .nextflow.log
- workflow/output/count211/sample1/outs/web_summary.html
- workflow/output/count310/sample1/outs/web_summary.html
- workflow/output/count310/sample2/outs/web_summary.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
......
# Summary
# Steps to reproduce
# Observed bug behavior
# Expected behavior
# Relevant logs and/or screenshots
# Potential fixes
/label ~bug ~"To Do"
/cc @ghenry
Please fill in the appropriate checklist below (delete those which are not relevant).
These are the most common things requested on pull requests.
## PR checklist
- [ ] This comment contains a description of changes (with reason)
- [ ] If you've fixed a bug or added code that should be tested, add tests!
- [ ] Documentation in `docs` is updated
- [ ] `CHANGELOG.md` is updated
- [ ] `README.md` is updated
- [ ] `LICENSE.md` is updated with new contributors
* [ ] **Close issue**\
Closes #
/cc @ghenry
/assign @ghenry
# v2.0.0 (in development)
**User Facing**
* Check Design File for spaces in name and file contents
* Attempt to preven thredding error (which appears to only happen on 256GBv1 nodes)
* Add option for 5' GEX chemistry
* Remove cellranger 3.0.1 as an option
* Add cellranger 2.2.0 as an option
**Background**
* Add Nextflow Tower integration into CI (GHH's profile)
* Add new layered config folders, including prepare for awsifying
* Update param to new standard
* Use docker containers
* Update CI
*Known Bugs*
* Vizapp does not yet work for Astrocyte
* Running in CLI: to set --fastq path of file/s needs to be in quotes
# v1.2.0
**User Facing**
* Add Cellranger Version 3.1.0
......
|*master*|*develop*|
|:-:|:-:|
|[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/badges/master/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/master)|[![Build Status](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/badges/develop/build.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/develop)|
|[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/badges/master/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/master)|[![pipeline status](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/badges/develop/pipeline.svg)](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/commits/develop)|
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2652622.svg)](https://doi.org/10.5281/zenodo.2652622)
......@@ -77,16 +77,17 @@ To Run:
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* options:
* *'auto'*
* *'three'*
* *'two'*
* eg: **--kitVersion 'three'**
* *'3GEXv3'*
* *'3GEXv2'*
* *'5GEX'*
* eg: **--kitVersion '3GEXv3'**
* **--version**
* cellranger version
* --version (cellranger version) 2.1.1 can only read --kitVersion of two (2)
* --version (cellranger version) 2.1.1 and 2.2.0 can only read --kitVersion of 3GEXv2
* options:
* *'3.1.0'*
* *'3.0.2'*
* *'3.0.1'*
* *'2.2.0'*
* *'2.1.1'*
* eg: **--version '3.1.0'**
* **--outDir**
......@@ -94,7 +95,7 @@ To Run:
* eg: **--outDir 'test'**
* FULL EXAMPLE:
```
nextflow run workflow/main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion 'three' --version '3.1.0' --outDir 'test'
nextflow run workflow/main.nf --fastq '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/*.fastq.gz' --designFile '/project/shared/bicf_workflow_ref/workflow_testdata/cellranger/cellranger_count/hu.v3s2r10k/design.csv' --genome 'GRCh38-3.0.0' --kitVersion '3GEXv3' --version '3.1.0' --outDir 'test'
```
* Design example:
......
......@@ -136,8 +136,9 @@ workflow_parameters:
default: 'auto'
choices:
- ['auto', 'Auto Detect']
- ['three', '3']
- ['two', '2']
- ['3GEXv3', '3prime GEX v3 (3prime Gene Expression)']
- ['3GEXv2', '3prime GEX v2 (3prime Gene Expression)']
- ['5GEX', '5prime GEX Auto (5prime Gene Expression)']
required: true
description: |
10x single cell gene expression chemistry version (only used in cellranger version 3.x).
......
rm *.out
rm pipeline_trace*.txt*
rm report*.html*
rm timeline*.html*
rm .nextflow*.log*
rm -r .nextflow/
rm -r work/
......@@ -25,7 +25,7 @@ To Run:
* column 1 = "Sample"
* column 2 = "fastq_R1"
* column 3 = "fastq_R2"
* can have repeated "Sample" if there are multiole fastq R1/R2 pairs for the samples
* can have repeated "Sample" if there are multiple fastq R1/R2 pairs for the samples
* eg: can be downloaded [HERE](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/master/docs/design.csv)
* **genome**
* Reference species and genome used for alignment and subsequent analysis.
......@@ -41,22 +41,22 @@ To Run:
* *'hg19_and_mm10-1.2.0'* = Human GRCh37 (hg19) + Mouse GRCm38 (mm10) release 84
* *'ercc92-1.2.0'* = ERCC.92 Spike-In
* **expect cells**
* Expected number of recovered cells.
* guides cellranger in it's cutoff for background/low quality cells
* as a guide it doesn't have to be exact
* 0-10000
* if --expextedCells is used then --forceCells is not necessary
* only used if force cells is not entered or set to 0
* **force cells**
* Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells.
* Expected number of recovered cells.
* guides cellranger in it's cutoff for background/low quality cells
* as a guide it doesn't have to be exact
* 0-10000
* if force cells is used then expected cells is not necessary and is ignored
* if --expextedCells is used then --forceCells is not necessary
* only used if force cells is not entered or set to 0
* **force cells**
* Force pipeline to use this number of cells, bypassing the cell detection algorithm. Use this if the number of cells estimated by Cell Ranger is not consistent with the barcode rank plot. A value of 0 ignores this option. Any value other than 0 overrides expect-cells.
* 0-10000
* if force cells is used then expected cells is not necessary and is ignored
* **chemistry version**
* 10x single cell gene expression chemistry version (only used in cellranger version 3.x).
* setting to auto will attempt to autodetect from the detected cycle strategy in the fastq's
* chemistry version is only used if cellranger version is > 2.x
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
* **cellranger version**
* **cellranger version**
* 10x cellranger version.
* cellranger version 2.1.1 can only read chemistry version less than or equal to two (2)
......
profiles {
standard {
includeConfig 'workflow/conf/biohpc.config'
}
}
workDir = 's3://'
aws.client.storageEncryption = 'AES256'
aws {
region = ''
batch {
cliPath = '/home/ec2-user/miniconda/bin/aws'
}
}
process {
executor = 'awsbatch'
queue = 'default-'
cpus = 1
memory = '10 GB'
}
process {
executor = 'slurm'
queue='super'
withLabel: checkDesignFile {
module = ['python/3.6.1-2-anaconda']
executor = 'local'
}
withLabel: count211 {
module = ['cellranger/2.1.1']
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: count301 {
module = ['cellranger/3.0.1']
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: count302 {
module = ['cellranger/3.0.2']
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: count310 {
module = ['cellranger/3.1.0']
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: versions {
module = ['python/3.6.1-2-anaconda','pandoc/2.7','multiqc/1.7']
executor = 'local'
}
withLabel: multiqc {
module = ['multiqc/1.7']
executor = 'local'
}
}
params {
// Reference file paths on BioHPC
genomes {
......@@ -71,30 +37,28 @@ params {
'auto' {
param = 'auto'
}
'one' {
'3GEXv1' {
param = 'SC3Pv1'
}
'two' {
'3GEXv2' {
param = 'SC3Pv2'
}
'three' {
'3GEXv3' {
param = 'SC3Pv3'
}
'5GEX' {
param = 'fiveprime'
}
}
}
trace {
singularity {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
cacheDir = '/project/BICF/BICF_Core/shared/gudmap/singularity_cache/'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
env {
http_proxy = 'http://proxy.swmed.edu:3128'
https_proxy = 'http://proxy.swmed.edu:3128'
all_proxy = 'http://proxy.swmed.edu:3128'
}
process {
executor = 'slurm'
queue = '32GB'
clusterOptions = '--hold'
withLabel: checkDesignFile {
executor = 'local'
}
withLabel: count211 {
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: count220 {
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: count302 {
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: count310 {
queue = '128GB,256GB,256GBv1,384GB'
}
withLabel: versions {
executor = 'local'
}
withLabel: multiqc {
executor = 'local'
}
}
process {
executor = 'local'
}
......@@ -13,30 +13,36 @@ params.name = "run"
params.fastq = "${baseDir}/../test_data/*.fastq.gz"
params.designFile = "${baseDir}/../test_data/design.csv"
params.genome = 'GRCh38-3.0.0'
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
params.expectCells = 10000
params.forceCells = 0
params.kitVersion = 'three'
params.kitVersion = '3GEXv3'
params.version = '3.1.0'
params.astrocyte = false
params.outDir = "${baseDir}/output"
params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml"
params.references = "${baseDir}/../docs/references.md"
if (params.kitVersion == "three" && params.version == '2.1.1') {
// Variable error test
if (params.kitVersion == "3GEXv3" && params.version == '2.1.1') {
print("Cellranger Version 2.1.1 requires kitVersion 2")
System.exit(32)
}
if (params.kitVersion == "3GEXv3" && params.version == '2.2.0') {
print("Cellranger Version 2.2.0 requires kitVersion 2")
System.exit(32)
}
// Assign variables if astrocyte
// Define variables if astrocyte (or from config)
if (params.astrocyte) {
print("Running under astrocyte")
params.genomeLocation = '/project/apps_database/cellranger/refdata-cellranger-'
if (params.kitVersion == "one") {
if (params.kitVersion == "3GEXv1") {
params.chemistryParam ='SC3Pv1'
} else if (params.kitVersion == "two") {
} else if (params.kitVersion == "3GEXv2") {
params.chemistryParam ='SC3Pv2'
} else if (params.kitVersion == "three") {
} else if (params.kitVersion == "3GEXv3") {
params.chemistryParam ='SC3Pv3'
} else if (params.kitVersion == "5GEX") {
params.chemistryParam ='fiveprime'
} else {
params.chemistryParam = 'auto'
}
......@@ -48,7 +54,7 @@ if (params.astrocyte) {
}
params.genomeLocationFull = params.genomeLocation+params.genome
// Define regular variables
// Define variables from input
name = params.name
designLocation = Channel
.fromPath(params.designFile)
......@@ -66,15 +72,17 @@ forceCells = params.forceCells
chemistryParam = params.chemistryParam
version = params.version
outDir = params.outDir
multiqcConf = params.multiqcConf
references = params.references
// Define constant variables
multiqcConf = "${baseDir}/conf/multiqc_config.yaml"
references = "${baseDir}/../docs/references.md"
/*
* checkDesignFile: check design file for errors
*/
process checkDesignFile {
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
module 'python/3.6.1-2-anaconda'
container = 'bicf/python3:2.0.0'
input:
file designLocation
......@@ -87,9 +95,12 @@ process checkDesignFile {
"""
hostname
ulimit -a
python3 ${baseDir}/scripts/check_design.py -d ${designLocation} -f ${fastqList}
noSpaceDesign=\$(echo "${designLocation}" | tr -d ' ')
if [[ "\${noSpaceDesign}" != "${designLocation}" ]]; then
mv "${designLocation}" "\${noSpaceDesign}"
fi
python3 ${baseDir}/scripts/check_design.py -d \${noSpaceDesign} -f ${fastqList}
"""
}
......@@ -100,45 +111,47 @@ samples = designPaths
.groupTuple()
//.subscribe { println it }
// Duplicate variables
samples.into {
samples211
samples301
samples220
samples302
samples310
}
refLocation.into {
refLocation211
refLocation301
refLocation220
refLocation302
refLocation310
}
expectCells211 = expectCells
expectCells301 = expectCells
expectCells220 = expectCells
expectCells302 = expectCells
expectCells310 = expectCells
forceCells211 = forceCells
forceCells301 = forceCells
forceCells220 = forceCells
forceCells302 = forceCells
forceCells310 = forceCells
chemistryParam301 = chemistryParam
chemistryParam211 = chemistryParam
chemistryParam220 = chemistryParam
chemistryParam302 = chemistryParam
chemistryParam310 = chemistryParam
/*
* count211: run cellranger count version 2.1.1
*/
process count211 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/2.1.1'
container 'bicf/cellranger2.1.1:2.0.0'
input:
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211
file ref from refLocation211.first()
expectCells211
forceCells211
chemistryParam211
output:
file("**/outs/**") into outPaths211
......@@ -153,7 +166,7 @@ process count211 {
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells211}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells211} --chemistry=${chemistryParam211}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
......@@ -162,42 +175,42 @@ process count211 {
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells211}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells211} --chemistry=${chemistryParam211}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
process count301 {
/*
* count220: run cellranger count version 2.2.0
*/
process count220 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/3.0.1'
container 'bicf/cellranger2.2.0:2.0.0'
input:
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples301
file ref from refLocation301.first()
expectCells301
forceCells301
chemistryParam301
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples220
file ref from refLocation220.first()
expectCells220
forceCells220
chemistryParam220
output:
file("**/outs/**") into outPaths301
file("*_metrics_summary.tsv") into metricsSummary301
file("**/outs/**") into outPaths220
file("*_metrics_summary.tsv") into metricsSummary220
when:
version == '3.0.1'
version == '2.2.0'
script:
if (forceCells301 == 0) {
if (forceCells220 == 0) {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells301} --chemistry=${chemistryParam301}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells220} --chemistry=${chemistryParam220}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
......@@ -206,20 +219,20 @@ process count301 {
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells301} --chemistry=${chemistryParam301}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells220} --chemistry=${chemistryParam220}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
/*
* count302: run cellranger count version 3.0.2
*/
process count302 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/3.0.2'
container 'bicf/cellranger3.0.2:2.0.0'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302
......@@ -239,6 +252,7 @@ process count302 {
if (forceCells302 == 0) {
"""
hostname
ulimit -u 16384
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells302} --chemistry=${chemistryParam302}
......@@ -248,22 +262,23 @@ process count302 {
else {
"""
hostname
ulimit -u 16384
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells302} --chemistry=${chemistryParam302}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
/*
* count310: run cellranger count version 3.1.0
*/
process count310 {
queue '128GB,256GB,256GBv1,384GB'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/3.1.0'
container 'bicf/cellranger3.1.0:2.0.0'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples310
......@@ -292,21 +307,21 @@ process count310 {
else {
"""
hostname
ulimit -u 16384
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells310} --chemistry=${chemistryParam310}
sed -E 's/("([^"]*)")?(,|\$)/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
/*
* versions: collect too versions into a single yml
*/
process versions {
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
module 'python/3.6.1-2-anaconda:pandoc/2.7:multiqc/1.7'
container 'bicf/python3:2.0.0'
input:
......@@ -323,17 +338,16 @@ process versions {
python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions
python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references
"""
}
// Collect all metrics summaries reguardless of cellranger version
metricsSummary = metricsSummary211.mix(metricsSummary220, metricsSummary302, metricsSummary310)
metricsSummary = metricsSummary211.mix(metricsSummary301, metricsSummary302, metricsSummary310)
/*
* multiqc: create multiqc report
*/
process multiqc {
tag "${name}"
queue 'super'
publishDir "${outDir}/${task.process}/${name}", mode: 'copy'
module 'multiqc/1.7'
......@@ -352,5 +366,4 @@ process multiqc {
sed -i '1s/^.*\tE/Sample\tE/' metrics_summary_mqc.tsv
multiqc -c ${multiqcConf} .
"""
}
\ No newline at end of file
}
profiles {
standard {
biohpc {
includeConfig 'conf/biohpc.config'
}
local {
includeConfig 'conf/local.config'
}
cluster {
includeConfig 'conf/cluster.config'
}
aws {
includeConfig 'conf/aws.config'
}
}
process {
withName:checkDesignFile {
container = 'bicf/python3:2.0.0'
}
withName:count211 {
container = 'bicf/cellranger2.1.1:2.0.0'
}
withName:count220 {
container = 'bicf/cellranger2.2.0:2.0.0'
}
withName:count302 {
container = 'bicf/cellranger3.0.2:2.0.0'
}
withName:count310 {
container = 'bicf/cellranger3.1.0:2.0.0'
}
withName:versions {
container = 'bicf/python3:2.0.0'
}
withName:multiqc {
container = 'bicf/multiqc:2.0.0'
}
}
trace {
enabled = true
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
file = 'timeline.html'
}
report {
enabled = true
file = 'report.html'
}
tower {
accessToken = '3ade8f325d4855434b49aa387421a44c63e3360f'
enabled = true
}
manifest {
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count'
description = 'This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It takes fastq files from 10x Genomics Single Cell Gene Expression libraries, performs alignment, filtering, barcode counting, and UMI counting. It uses the Chromium cellular barcodes to generate gene-barcode matrices, determine clusters, and perform gene expression analysis.'
mainScript = 'main.nf'
version = 'v2.0.0_indev'
nextflowVersion = '>=0.31.0'
}
......@@ -97,7 +97,7 @@ def main():
logger.addHandler(handler)
# Read files as dataframes
design_df = pd.read_csv(args.design, sep=',')
design_df = pd.read_csv(args.design, sep=',', converters={'Sample': str.strip, 'fastq_R1': str.strip, 'fastq_R2': str.strip})
fastq_df = pd.read_csv(args.fastq, sep='\t', names=['name', 'path'])
# Check design file
......@@ -107,4 +107,4 @@ def main():
new_design_df.to_csv('design.checked.csv', header=True, sep=',', index=False)
if __name__ == '__main__':
main()
\ No newline at end of file
main()
#!/usr/bin/env python3
#test_check_design.py
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
import pytest
import pandas as pd
from io import StringIO
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/misc/checkDesignFile/run/'
@pytest.mark.count211
def test_count211_design():
assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv'))
@pytest.mark.count301
def test_count301_design():
assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv'))
@pytest.mark.count302
def test_count302_design():
assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv'))
@pytest.mark.count310
def test_count310_design():
assert os.path.exists(os.path.join(test_output_path, 'design.checked.csv'))
\ No newline at end of file
#!/usr/bin/env python3
#test_versions.py
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
import pytest
import pandas as pd
from io import StringIO
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../output/misc/versions/run/'
@pytest.mark.count211
def test_count211_versions():
assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml'))
assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml'))
@pytest.mark.count301
def test_count301_versions():
assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml'))
assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml'))
@pytest.mark.count302
def test_count302_versions():
assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml'))
assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml'))
@pytest.mark.count310
def test_count310_versions():
assert os.path.exists(os.path.join(test_output_path, 'versions_mqc.yaml'))
assert os.path.exists(os.path.join(test_output_path, 'references_mqc.yaml'))
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment