Commit 428d3688 authored by Gervaise Henry's avatar Gervaise Henry 🤠
Browse files

Squash many small commits to get working

parent 7ecda2fb
Pipeline #5482 passed with stages
in 7 minutes and 4 seconds
......@@ -34,10 +34,10 @@ simple_1FC:
when: always
paths:
- .nextflow.log
##- workflow/output/multiqc/run/multiqc_report.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
......@@ -51,9 +51,9 @@ simple_2FC:
when: always
paths:
- .nextflow.log
##- workflow/output/multiqc/run/multiqc_report.html
- workflow/output/multiqc/run/multiqc_report.html
expire_in: 2 days
retry:
max: 1
max: 0
when:
- always
\ No newline at end of file
### References
1. **python**:
1. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
2. **python**:
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
2. **pigz**:
3. **pigz**:
* Parallel implementation of gzip [https://zlib.net/pigz/](https://zlib.net/pigz/)
3. **bcl2fastq**:
* Ilumina's bcl2fastq [https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html](https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html)
3. **cellranger**:
4. **cellranger**:
* 10x Genomics cellranger mkfastq [https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/mkfastq)
4. **fastqc**:
* fastqc [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
5. **bcl2fastq**:
* Ilumina's bcl2fastq [https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html](https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html)
5. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
6. **fastqc**:
* fastqc [https://www.bioinformatics.babraham.ac.uk/projects/fastqc/](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
6. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
7. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
\ No newline at end of file
......@@ -10,26 +10,28 @@ aws {
process {
executor = 'awsbatch'
queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
container = 'docker://bicf/bicfbase:1.4'
container = 'bicf/bicfbase:1.4'
cpus = 1
memory = '1 GB'
time = '1h'
withName:checkDesignFile {
container = 'docker://bicf/python3:1.3'
container = 'bicf/python3:1.3'
cpus = 4
}
withName:mkfastq {
container = 'docker://bicf/cellranger3.1.0:1.0'
container = 'bicf/cellranger3.1.0:1.0'
cpus = 2
memory = '2 GB'
}
withName:fastqc {
container = 'docker://bicf/fastqc:1.5'
container = 'bicf/fastqc:1.5'
cpus = 4
memory = '2 GB'
}
withName:versions {
container = 'bicf/python3:1.3'
}
withName:multiqc {
container = 'docker://bicf/multiqc:1.4'
container = 'bicf/multiqc:1.4'
}
}
\ No newline at end of file
......@@ -10,26 +10,28 @@ aws {
process {
executor = 'awsbatch'
queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
container = 'docker://bicf/bicfbase:1.4'
container = 'bicf/bicfbase:1.4'
cpus = 1
memory = '1 GB'
time = '1h'
withName:checkDesignFile {
container = 'docker://bicf/python3:1.3'
container = 'bicf/python3:1.3'
cpus = 4
}
withName:mkfastq {
container = 'docker://bicf/cellranger3.1.0:1.0'
container = 'bicf/cellranger3.1.0:1.0'
cpus = 2
memory = '2 GB'
}
withName:fastqc {
container = 'docker://bicf/fastqc:1.5'
container = 'bicf/fastqc:1.5'
cpus = 4
memory = '2 GB'
}
withName:versions {
container = 'bicf/python3:1.3'
}
withName:multiqc {
container = 'docker://bicf/multiqc:1.4'
container = 'bicf/multiqc:1.4'
}
}
\ No newline at end of file
......@@ -22,6 +22,10 @@ process {
container = 'docker://bicf/fastqc:1.5'
queue = 'super'
}
withName:versions {
container = 'docker://bicf/python3:1.3'
executor = 'local'
}
withName:multiqc {
container = 'docker://bicf/multiqc:1.4'
executor = 'local'
......
......@@ -15,6 +15,9 @@ process {
withName:fastqc {
container = 'docker://bicf/fastqc:1.5'
}
withName:versions {
container = 'docker://bicf/python3:1.3'
}
withName:multiqc {
container = 'docker://bicf/multiqc:1.4'
}
......
......@@ -13,8 +13,6 @@ params.name = "run"
params.bcl = "${baseDir}/../test_data/*.tar.gz"
params.designFile = "${baseDir}/../test_data/design.csv"
params.outDir = "${baseDir}/output"
params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml"
params.references = "${baseDir}/../docs/references.md"
// Define list of files
tarList = Channel
......@@ -29,14 +27,24 @@ designLocation = Channel
.fromPath(params.designFile)
.ifEmpty { exit 1, "design file not found: ${params.designFile}" }
outDir = params.outDir
multiqcConf = params.multiqcConf
references = params.references
// Define script files
check_designScript = Channel.fromPath("$baseDir/scripts/check_design.py")
untarBCLScript = Channel.fromPath("$baseDir/scripts/untarBCL.sh")
countDesignScript = Channel.fromPath("$baseDir/scripts/countDesign.sh")
fastqcScript = Channel.fromPath("$baseDir/scripts/fastqc.sh")
versionsScript = Channel.fromPath("$baseDir/scripts/generate_versions.py")
referencesScript = Channel.fromPath("$baseDir/scripts/generate_references.py")
versions_pythonScript = Channel.fromPath("$baseDir/scripts/versions_python.sh")
versions_pigzScript = Channel.fromPath("$baseDir/scripts/versions_pigz.sh")
versions_cellrangerScript = Channel.fromPath("$baseDir/scripts/versions_cellranger.sh")
versions_bcl2fastqScript = Channel.fromPath("$baseDir/scripts/versions_bcl2fastq.sh")
versions_fastqcScript = Channel.fromPath("$baseDir/scripts/versions_fastqc.sh")
// Define report files
multiqcConf = Channel.fromPath("${baseDir}/conf/multiqc_config.yaml")
references = Channel.fromPath("${baseDir}/../docs/references.md")
process checkDesignFile {
......@@ -44,11 +52,15 @@ process checkDesignFile {
input:
file check_designScript
file versions_pythonScript
file designLocation
output:
file("design.checked.csv") into designPaths
file("design.checked.csv") into designCount
file("version_pipeline.txt") into version_pipeline
file("version_nextflow.txt") into version_nextflow
file("version_python.txt") into version_python
script:
"""
......@@ -59,27 +71,33 @@ process checkDesignFile {
mv "${designLocation}" "\${noSpaceDesign}"
fi
python3 check_design.py -d \${noSpaceDesign}
echo "${workflow.manifest.version}" > version_pipeline.txt
echo "${workflow.nextflow.version}"> version_nextflow.txt
bash versions_python.sh > version_python.txt
"""
}
process untarBCL {
process untarBCL {
tag "${tar}"
tag "${tar.simpleName}"
input:
file untarBCLScript
file tar from tarList
file versions_pigzScript
each file(tar) from tarList
output:
file("*") into bclPaths mode flatten
file("*[!version_pigz.txt]") into bclPaths mode flatten
file("version_pigz.txt") into version_pigz
script:
"""
hostname
ulimit -a
bash ${baseDir}/scripts/untarBCL.sh -t ${tar}
bash untarBCL.sh -t ${tar}
bash versions_pigz.sh > version_pigz.txt
"""
}
......@@ -87,24 +105,33 @@ process untarBCL {
process mkfastq {
tag "${bcl.baseName}"
tag "${bcl.simpleName}"
publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}"
input:
each bcl from bclPaths.collect()
file versions_cellrangerScript
file versions_bcl2fastqScript
each path(bcl) from bclPaths.collect()
file design from designPaths
output:
file("**/outs/**/*.fastq.gz") into fastqPaths
file("**/outs/**/*.fastq.gz") into cellrangerCount
file("fq/${bcl.simpleName}/*.fastq.gz") into fastqPaths
val "${bcl.simpleName}" into bclName
file("**/outs/**/*.fastq.gz") into cellrangerCount mode flatten
file("**/outs/fastq_path/Stats/*") into bqcPaths
val "${bcl.baseName}" into bclName
file("version_cellranger.txt") into version_cellranger
file("version_bcl2fastq.txt") into version_bcl2fastq
script:
"""
hostname
ulimit -a
cellranger mkfastq --id=${bcl.baseName} --run=${bcl} --csv=${design}
cellranger mkfastq --id=mkfastq_${bcl.simpleName} --run=${bcl} --csv=${design}
mkdir fq
mkdir "fq/${bcl.simpleName}"
find . -name "*.fastq.gz" -exec cp {} fq/${bcl.simpleName}/ \\;
bash versions_cellranger.sh > version_cellranger.txt
bash versions_bcl2fastq.sh > version_bcl2fastq.txt
"""
}
......@@ -137,32 +164,45 @@ if (bclCount.value == 1) {
process fastqc {
tag "${bclName}"
tag "${bcl}"
input:
file fastqcScript
file fastqPaths
val bclName
file fastqcScript
file versions_fastqcScript
file(fastq) from fastqPaths
each bcl from bclName
output:
file("*fastqc.zip") into fqcPaths
file("*fastqc.zip") into fqcPaths mode flatten
file("version_fastqc.txt") into version_fastqc
script:
"""
hostname
ulimit -a
find *.fastq.gz -exec mv {} ${bclName}.{} \\;
find *.fastq.gz -exec mv {} ${bcl}.{} \\;
bash fastqc.sh
bash versions_fastqc.sh > version_fastqc.txt
"""
}
/*
process versions {
tag "${name}"
input:
file versionsScript
file referencesScript
file version_pipeline
file version_nextflow
file version_python
file version_pigz
file version_cellranger
file version_bcl2fastq
file version_fastqc
file references
output:
file("*.yaml") into yamlPaths
......@@ -171,11 +211,8 @@ process versions {
"""
hostname
ulimit -a
echo ${workflow.nextflow.version} > version_nextflow.txt
bash ${baseDir}/scripts/versions_mkfastq.sh
bash ${baseDir}/scripts/versions_fastqc.sh
python3 ${baseDir}/scripts/generate_versions.py -f version_*.txt -o versions
python3 ${baseDir}/scripts/generate_references.py -r ${references} -o references
python3 generate_versions.py -f version_*.txt -o versions
python3 generate_references.py -r ${references} -o references
"""
}
......@@ -187,8 +224,9 @@ process multiqc {
publishDir "${outDir}/${task.process}/${name}", mode: 'copy', pattern: "{multiqc*}"
input:
file multiqcConf
file bqc name "bqc/?/*" from bqcPaths.collect()
file fqc name "fqc/*" from fqcPaths.collect()
file fqc name "fqc/?/*" from fqcPaths.collect()
file yamlPaths
output:
......@@ -198,8 +236,9 @@ process multiqc {
"""
hostname
ulimit -a
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
multiqc -c ${multiqcConf} .
"""
}
*/
\ No newline at end of file
}
\ No newline at end of file
......@@ -38,6 +38,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq'
description = 'This pipeline is a wrapper for the cellranger mkfastq tool from 10x Genomics (which uses Illuminas bcl2fastq). It takes demultiplexes samples from 10x Genomics Single Cell Gene Expression libraries into fastqs.'
mainScript = 'main.nf'
version = 'v1.4.0_indev'
version = '1.4.0_indev'
nextflowVersion = '>=0.31.0'
}
\ No newline at end of file
......@@ -6,4 +6,4 @@
#* --------------------------------------------------------------------------
#*
find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j $(grep -c ^processor /proc/cpuinfo) --verbose
\ No newline at end of file
find -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j $(grep -c ^processor /proc/cpuinfo) --verbose
\ No newline at end of file
......@@ -26,8 +26,11 @@ logger.propagate = False
logger.setLevel(logging.INFO)
SOFTWARE_REGEX = {
'Pipeline': ['version_pipeline.txt', r"(\S+)"],
'Nextflow': ['version_nextflow.txt', r"(\S+)"],
'cellranger mkfastq': ['version_cellranger.mkfastq.txt', r"(\S+)"],
'python': ['version_python.txt', r"(\S+)"],
'pigz': ['version_pigz.txt', r"(\S+)"],
'cellranger': ['version_cellranger.txt', r"(\S+)"],
'bcl2fastq': ['version_bcl2fastq.txt', r"(\S+)"],
'fastqc': ['version_fastqc.txt', r"(\S+)"],
}
......@@ -75,8 +78,11 @@ def main():
out_filename = output + '_mqc.yaml'
results = OrderedDict()
results['Pipeline'] = '<span style="color:#999999;\">N/A</span>'
results['Nextflow'] = '<span style="color:#999999;\">N/A</span>'
results['cellranger mkfastq'] = '<span style="color:#999999;\">N/A</span>'
results['python'] = '<span style="color:#999999;\">N/A</span>'
results['pigz'] = '<span style="color:#999999;\">N/A</span>'
results['cellranger'] = '<span style="color:#999999;\">N/A</span>'
results['bcl2fastq'] = '<span style="color:#999999;\">N/A</span>'
results['fastqc'] = '<span style="color:#999999;\">N/A</span>'
......
#!/bin/bash
#versions_mkfastq.sh
#versions_bcl2fastq.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
cellranger mkfastq --version | grep 'cellranger mkfastq ' | sed 's/.*(\(.*\))/\1/' > version_cellranger.mkfastq.txt
bcl2fastq --version |& grep 'bcl2fastq v' | sed -n -e 's/^bcl2fastq v//p' > version_bcl2fastq.txt
\ No newline at end of file
bcl2fastq --version |& grep 'bcl2fastq v' | sed -n -e 's/^bcl2fastq v//p'
\ No newline at end of file
#!/bin/bash
#versions_cellranger.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
cellranger mkfastq --version | grep 'cellranger mkfastq ' | sed 's/.*(\(.*\))/\1/'
\ No newline at end of file
......@@ -6,4 +6,4 @@
#* --------------------------------------------------------------------------
#*
fastqc --version | grep 'FastQC v' | sed -n -e 's/^FastQC v//p' > version_fastqc.txt
\ No newline at end of file
fastqc --version | grep 'FastQC v' | sed -n -e 's/^FastQC v//p'
\ No newline at end of file
#!/bin/bash
#versions_pigz.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
pigz --version |& grep 'pigz ' | sed -n -e 's/^pigz //p'
\ No newline at end of file
#!/bin/bash
#versions_python.sh
#*
#* --------------------------------------------------------------------------
#* Licensed under MIT (https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/LICENSE)
#* --------------------------------------------------------------------------
#*
python --version |& grep 'Python ' | sed -n -e 's/^Python //p'
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment