Skip to content
Snippets Groups Projects
Commit 026b2a52 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '27-CheckStyle' into 'develop'

fixed style

Closes #27

See merge request !48
parents 302083f1 1fe100af
Branches
Tags
2 merge requests!53Develop,!48fixed style
Pipeline #4325 passed with stages
in 23 minutes and 30 seconds
......@@ -103,3 +103,10 @@ To Run:
| sample2 | pbmc_1k_v2_S2_L002_R1_001.fastq.gz | pbmc_1k_v2_S2_L002_R2_001.fastq.gz |
[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/blob/develop/CHANGELOG.md)
Credits
-------
This worklow is was developed jointly with the [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
Please cite in publications: Pipeline was developed by BICF from funding provided by **Cancer Prevention and Research Institute of Texas (RP150596)**.
......@@ -98,15 +98,15 @@ workflow_parameters:
- id: genome
type: select
choices:
- [ 'GRCh38-3.0.0', 'Human GRCh38 release 93']
- [ 'GRCh38-1.2.0', 'Human GRCh38 release 84']
- [ 'hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
- [ 'hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
- [ 'mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
- [ 'mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
- [ 'hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93']
- [ 'hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84']
- [ 'ercc92-1.2.0', 'ERCC.92 Spike-In']
- ['GRCh38-3.0.0', 'Human GRCh38 release 93']
- ['GRCh38-1.2.0', 'Human GRCh38 release 84']
- ['hg19-3.0.0', 'Human GRCh37 (hg19) release 87']
- ['hg19-1.2.0', 'Human GRCh37 (hg19) release 84']
- ['mm10-3.0.0', 'Mouse GRCm38 (mm10) release 93']
- ['mm10-1.2.0', 'Mouse GRCm38 (mm10) release 84']
- ['hg19_and_mm10-3.0.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 93']
- ['hg19_and_mm10-1.2.0', 'Human GRCh37 (hg19) + Mouse GRCm38 (mm19) release 84']
- ['ercc92-1.2.0', 'ERCC.92 Spike-In']
required: true
description: |
Reference species and genome used for alignment and subsequent analysis.
......@@ -133,9 +133,9 @@ workflow_parameters:
type: select
default: 'auto'
choices:
- [ 'auto', 'Auto Detect']
- [ 'three', '3']
- [ 'two', '2']
- ['auto', 'Auto Detect']
- ['three', '3']
- ['two', '2']
required: true
description: |
10x single cell gene expression chemistry version (only used in cellranger version 3.x).
......@@ -144,9 +144,9 @@ workflow_parameters:
type: select
default: '3.0.2'
choices:
- [ '3.0.2', '3.0.2']
- [ '3.0.1', '3.0.1']
- [ '2.1.1', '2.1.1']
- ['3.0.2', '3.0.2']
- ['3.0.1', '3.0.1']
- ['2.1.1', '2.1.1']
required: true
description: |
10x cellranger version.
......
......@@ -8,6 +8,10 @@ This pipeline is a wrapper for the cellranger count tool from 10x Genomics. It t
The pipeline uses Nextflow, a bioinformatics workflow tool.
This pipeline is primarily used with a SLURM cluster on the BioHPC Cluster. However, the pipeline should be able to run on any system that Nextflow supports.
Additionally, the pipeline is designed to work with Astrocyte Workflow System using a simple web interface.
To Run:
-------
......
......@@ -8,3 +8,6 @@
3. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
4. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
......@@ -5,17 +5,17 @@
// Define Input variables
params.name = "run"
params.fastq = "$baseDir/../test_data/*.fastq.gz"
params.designFile = "$baseDir/../test_data/design.csv"
params.fastq = "${baseDir}/../test_data/*.fastq.gz"
params.designFile = "${baseDir}/../test_data/design.csv"
params.genome = 'GRCh38-3.0.0'
params.expectCells = 10000
params.forceCells = 0
params.kitVersion = 'three'
params.version = '3.0.2'
params.astrocyte = false
params.outDir = "$baseDir/output"
params.multiqcConf = "$baseDir/conf/multiqc_config.yaml"
params.references = "$baseDir/../docs/references.md"
params.outDir = "${baseDir}/output"
params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml"
params.references = "${baseDir}/../docs/references.md"
// Assign variables if astrocyte
if (params.astrocyte) {
......@@ -61,23 +61,25 @@ references = params.references
process checkDesignFile {
tag "$name"
publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
module 'python/3.6.1-2-anaconda'
input:
file designLocation
file fastqList
file designLocation
file fastqList
output:
file("design.checked.csv") into designPaths
file("design.checked.csv") into designPaths
script:
"""
hostname
ulimit -a
python3 $baseDir/scripts/check_design.py -d $designLocation -f $fastqList
"""
"""
hostname
ulimit -a
python3 ${baseDir}/scripts/check_design.py -d ${designLocation} -f ${fastqList}
"""
}
......@@ -88,6 +90,7 @@ samples = designPaths
.groupTuple()
//.subscribe { println it }
// Duplicate variables
samples.into {
samples211
......@@ -110,171 +113,186 @@ chemistryParam302 = chemistryParam
process count211 {
queue '128GB,256GB,256GBv1,384GB'
tag "$sample"
publishDir "$outDir/${task.process}", mode: 'copy'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/2.1.1'
input:
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211
file ref from refLocation211.first()
expectCells211
forceCells211
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples211
file ref from refLocation211.first()
expectCells211
forceCells211
output:
file("**/outs/**") into outPaths211
file("*_metrics_summary.tsv") into metricsSummary211
file("**/outs/**") into outPaths211
file("*_metrics_summary.tsv") into metricsSummary211
when:
version == '2.1.1'
script:
if (forceCells211 == 0){
"""
hostname
ulimit -a
bash "$baseDir/scripts/filename_check.sh" -r "$ref"
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells211
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
} else {
"""
hostname
ulimit -a
bash "$baseDir/scripts/filename_check.sh" -r "$ref"
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells211
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
if (forceCells211 == 0) {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells211}
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
else {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells211}
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
process count301 {
queue '128GB,256GB,256GBv1,384GB'
tag "$sample"
publishDir "$outDir/${task.process}", mode: 'copy'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/3.0.1'
input:
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples301
file ref from refLocation301.first()
expectCells301
forceCells301
chemistryParam301
set sample, file("${sample}_S1_L00?_R1_001.fastq.gz"), file("${sample}_S1_L00?_R2_001.fastq.gz") from samples301
file ref from refLocation301.first()
expectCells301
forceCells301
chemistryParam301
output:
file("**/outs/**") into outPaths301
file("*_metrics_summary.tsv") into metricsSummary301
file("**/outs/**") into outPaths301
file("*_metrics_summary.tsv") into metricsSummary301
when:
version == '3.0.1'
version == '3.0.1'
script:
if (forceCells301 == 0){
"""
hostname
ulimit -a
bash "$baseDir/scripts/filename_check.sh" -r "$ref"
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells301 --chemistry="$chemistryParam301"
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
} else {
"""
hostname
ulimit -a
bash "$baseDir/scripts/filename_check.sh" -r "$ref"
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells301 --chemistry="$chemistryParam301"
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
if (forceCells301 == 0) {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells301} --chemistry=${chemistryParam301}
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
else {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells301} --chemistry=${chemistryParam301}
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
process count302 {
queue '128GB,256GB,256GBv1,384GB'
tag "$sample"
publishDir "$outDir/${task.process}", mode: 'copy'
tag "${sample}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'cellranger/3.0.2'
input:
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302
file ref from refLocation302.first()
expectCells302
forceCells302
chemistryParam302
set sample, file("${sample}_S?_L001_R1_001.fastq.gz"), file("${sample}_S?_L001_R2_001.fastq.gz") from samples302
file ref from refLocation302.first()
expectCells302
forceCells302
chemistryParam302
output:
file("**/outs/**") into outPaths302
file("*_metrics_summary.tsv") into metricsSummary302
file("**/outs/**") into outPaths302
file("*_metrics_summary.tsv") into metricsSummary302
when:
version == '3.0.2'
version == '3.0.2'
script:
if (forceCells302 == 0){
"""
hostname
ulimit -a
bash "$baseDir/scripts/filename_check.sh" -r "$ref"
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --expect-cells=$expectCells302 --chemistry="$chemistryParam302"
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
} else {
"""
hostname
ulimit -a
bash "$baseDir/scripts/filename_check.sh" -r "$ref"
cellranger count --id="$sample" --transcriptome="./$ref" --fastqs=. --sample="$sample" --force-cells=$forceCells302 --chemistry="$chemistryParam302"
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
if (forceCells302 == 0) {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --expect-cells=${expectCells302} --chemistry=${chemistryParam302}
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
else {
"""
hostname
ulimit -a
bash ${baseDir}/scripts/filename_check.sh -r ${ref}
cellranger count --id=${sample} --transcriptome=./${ref} --fastqs=. --sample=${sample} --force-cells=${forceCells302} --chemistry=${chemistryParam302}
sed -E 's/("([^"]*)")?,/\\2\t/g' ${sample}/outs/metrics_summary.csv | tr -d "," | sed "s/^/${sample}\t/" > ${sample}_metrics_summary.tsv
"""
}
}
process versions {
tag "$name"
publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
module 'python/3.6.1-2-anaconda:pandoc/2.7:multiqc/1.7'
input:
output:
file("*.yaml") into yamlPaths
file("*.yaml") into yamlPaths
script:
"""
hostname
ulimit -a
echo $workflow.nextflow.version > version_nextflow.txt
echo $version > version_cellranger.txt
multiqc --version | tr -d 'multiqc, version ' > version_multiqc.txt
python3 "$baseDir/scripts/generate_versions.py" -f version_*.txt -o versions
python3 "$baseDir/scripts/generate_references.py" -r "$references" -o references
"""
"""
hostname
ulimit -a
echo ${workflow.nextflow.version} > version_nextflow.txt
echo ${version} > version_cellranger.txt
multiqc --version | tr -d 'multiqc, version ' > version_multiqc.txt
python3 "${baseDir}/scripts/generate_versions.py" -f version_*.txt -o versions
python3 "${baseDir}/scripts/generate_references.py" -r "${references}" -o references
"""
}
metricsSummary = metricsSummary211.mix(metricsSummary301, metricsSummary302)
// Generate MultiQC Report
process multiqc {
tag "$name"
tag "${name}"
queue 'super'
publishDir "$outDir/${task.process}/$name", mode: 'copy'
publishDir "${outDir}/${task.process}/${name}", mode: 'copy'
module 'multiqc/1.7'
input:
file ('*') from metricsSummary.collect()
file yamlPaths
file ('*') from metricsSummary.collect()
file yamlPaths
output:
file "multiqc_report.html" into mqcPaths
file "multiqc_report.html" into mqcPaths
script:
"""
hostname
ulimit -a
awk 'FNR==1 && NR!=1{next;}{print}' *.tsv > metrics_summary_mqc.tsv
sed -i '1s/^.*\tE/Sample\tE/' metrics_summary_mqc.tsv
multiqc -c $multiqcConf .
"""
"""
hostname
ulimit -a
awk 'FNR==1 && NR!=1{next;}{print}' *.tsv > metrics_summary_mqc.tsv
sed -i '1s/^.*\tE/Sample\tE/' metrics_summary_mqc.tsv
multiqc -c ${multiqcConf} .
"""
}
......@@ -39,13 +39,14 @@ def get_args():
def check_design_headers(design):
'''Check if design file conforms to sequencing type.'''
'''Check if design file contains correct headers.'''
# Default headers
design_template = [
'Sample',
'fastq_R1',
'fastq_R2']
'fastq_R1',
'fastq_R2',
]
design_headers = list(design.columns.values)
......
......@@ -5,28 +5,27 @@ usage() {
echo "-r --ref file"
exit 1
}
OPTIND=1
while getopts :r: opt
do
case $opt in
r) ref=$OPTARG;;
esac
case ${opt} in
r) ref=${OPTARG};;
esac
done
shift $(($OPTIND -1));
shift $((${OPTIND} -1))
name=`readlink -e $ref`
name=$(readlink -e ${ref})
if [ `find $name -name "* *" | wc -l` -gt 0 ];
then
echo "Error: Spaces found in Reference Files";
echo `find $name -name "* *"`;
exit 21;
fi;
if [ $(find $name -name "* *" | wc -l) -gt 0 ]; then
echo "Error: Spaces found in Reference Files"
echo $(find $name -name "* *")
exit 21
fi
if [ $(echo "$ref" | tr -d ' ') != "$ref" ];
then
echo "Error: Spaces found in Reference Files";
echo "$ref";
exit 21;
fi;
if [ $(echo "${ref}" | tr -d ' ') != "${ref}" ]; then
echo "Error: Spaces found in Reference Files"
echo ${ref}
exit 21
fi
#
# * --------------------------------------------------------------------------
# * Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_count/LICENSE.md)
# * --------------------------------------------------------------------------
#
#!/usr/bin/env python3
'''Make header for HTML of references.'''
......
......@@ -16,7 +16,6 @@ For more details:
'''
# SETTINGS
logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.propagate = False
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment