Skip to content
Snippets Groups Projects
Commit ec49d090 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Merge branch '24-CheckStyle' into 'develop'

fixed style

Closes #24

See merge request !39
parents 5a111ac2 a830a0c9
Branches
Tags
2 merge requests!41Develop,!39fixed style
Pipeline #4326 passed with stages
in 5 minutes and 16 seconds
......@@ -54,3 +54,10 @@ To Run:
[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/CHANGELOG.md)
Credits
-------
This worklow is was developed jointly with the [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
Please cite in publications: Pipeline was developed by BICF from funding provided by **Cancer Prevention and Research Institute of Texas (RP150596)**.
......@@ -9,7 +9,7 @@
# A unique identifier for the workflow package, text/underscores only
name: 'cellranger_mkfastq'
# Who wrote this?
author: 'Gervaise H. Henr, Jon Gesell, Jeremy Mathews, and Venkat Malladi'
author: 'Gervaise H. Henry, Jon Gesell, Jeremy Mathews, and Venkat Malladi'
# A contact email address for questions
email: 'bicf@utsouthwestern.edu'
# A more informative title for the workflow package
......
......@@ -4,7 +4,7 @@
* Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))
2. **pigz**:
* parallel implementation of gzip [https://zlib.net/pigz/](https://zlib.net/pigz/)
* Parallel implementation of gzip [https://zlib.net/pigz/](https://zlib.net/pigz/)
3. **bcl2fastq**:
* Ilumina's bcl2fastq [https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html](https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html)
......@@ -17,3 +17,6 @@
5. **MultiQc**:
* Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
6. **Nextflow**:
* Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
......@@ -3,13 +3,15 @@
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
// Define Input variables
params.name = "run"
params.bcl = "$baseDir/../test_data/*.tar.gz"
params.designFile = "$baseDir/../test_data/design.csv"
params.outDir = "$baseDir/output"
params.multiqcConf = "$baseDir/conf/multiqc_config.yaml"
params.references = "$baseDir/../docs/references.md"
params.bcl = "${baseDir}/../test_data/*.tar.gz"
params.designFile = "${baseDir}/../test_data/design.csv"
params.outDir = "${baseDir}/output"
params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml"
params.references = "${baseDir}/../docs/references.md"
// Define List of Files
tarList = Channel
......@@ -18,6 +20,7 @@ bclCount = Channel
.fromPath( params.bcl )
.count()
// Define regular variables
name = params.name
designLocation = Channel
......@@ -29,155 +32,171 @@ references = params.references
process checkDesignFile {
tag "$name"
publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
module 'python/3.6.1-2-anaconda'
input:
file designLocation
file designLocation
output:
file("design.checked.csv") into designPaths
file("design.checked.csv") into designCount
file("design.checked.csv") into designPaths
file("design.checked.csv") into designCount
script:
"""
hostname
ulimit -a
python3 "$baseDir/scripts/check_design.py" -d "$designLocation"
"""
"""
hostname
ulimit -a
python3 ${baseDir}/scripts/check_design.py -d ${designLocation}
"""
}
process untarBCL {
tag "$tar"
publishDir "$outDir/${task.process}", mode: 'copy'
tag "${tar}"
publishDir "${outDir}/${task.process}", mode: 'copy'
module 'pigz/2.4'
input:
file tar from tarList
file tar from tarList
output:
file("*") into bclPaths mode flatten
file("*") into bclPaths mode flatten
script:
"""
hostname
ulimit -a
bash "$baseDir/scripts/untarBCL.sh" -t "$tar"
"""
"""
hostname
ulimit -a
bash ${baseDir}/scripts/untarBCL.sh -t ${tar}
"""
}
process mkfastq {
tag "${bcl.baseName}"
queue '128GB,256GB,256GBv1,384GB'
publishDir "$outDir/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}"
publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}"
module 'cellranger/3.0.2:bcl2fastq/2.19.1'
input:
each bcl from bclPaths.collect()
file design from designPaths
each bcl from bclPaths.collect()
file design from designPaths
output:
file("**/outs/**/*.fastq.gz") into fastqPaths
file("**/outs/**/*.fastq.gz") into cellrangerCount
file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths
val "${bcl.baseName}" into bclName
file("**/outs/**/*.fastq.gz") into fastqPaths
file("**/outs/**/*.fastq.gz") into cellrangerCount
file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths
val "${bcl.baseName}" into bclName
script:
"""
hostname
ulimit -a
cellranger mkfastq --id="${bcl.baseName}" --run="$bcl" --csv=$design -r \$SLURM_CPUS_ON_NODE -p \$SLURM_CPUS_ON_NODE -w \$SLURM_CPUS_ON_NODE
"""
"""
hostname
ulimit -a
cellranger mkfastq --id=${bcl.baseName} --run=${bcl} --csv=${design} -r \$SLURM_CPUS_ON_NODE -p \$SLURM_CPUS_ON_NODE -w \$SLURM_CPUS_ON_NODE
"""
}
if (bclCount.value == 1) {
process countDesign {
tag "$name"
publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
input:
file fastqs from cellrangerCount.collect()
file design from designCount
file fastqs from cellrangerCount.collect()
file design from designCount
output:
file("Cellranger_Count_Design.csv") into CountDesign
file("Cellranger_Count_Design.csv") into CountDesign
script:
"""
bash "$baseDir/scripts/countDesign.sh"
"""
"""
bash ${baseDir}/scripts/countDesign.sh
"""
}
}
process fastqc {
tag "$bclName"
tag "${bclName}"
queue 'super'
publishDir "$outDir/misc/${task.process}/$name/$bclName", mode: 'copy', pattern: "{*fastqc.zip}"
publishDir "${outDir}/misc/${task.process}/${name}/${bclName}", mode: 'copy', pattern: "{*fastqc.zip}"
module 'fastqc/0.11.5:parallel'
input:
file fastqPaths
val bclName
file fastqPaths
val bclName
output:
file("*fastqc.zip") into fqcPaths
file("*fastqc.zip") into fqcPaths
script:
"""
hostname
ulimit -a
find *.fastq.gz -exec mv {} $bclName.{} \\;
bash "$baseDir/scripts/fastqc.sh"
"""
"""
hostname
ulimit -a
find *.fastq.gz -exec mv {} ${bclName}.{} \\;
bash ${baseDir}/scripts/fastqc.sh
"""
}
process versions {
tag "$name"
publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
tag "${name}"
publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
module 'python/3.6.1-2-anaconda:cellranger/3.0.2:bcl2fastq/2.19.1:fastqc/0.11.5:pandoc/2.7'
input:
output:
file("*.yaml") into yamlPaths
file("*.yaml") into yamlPaths
script:
"""
hostname
ulimit -a
echo $workflow.nextflow.version > version_nextflow.txt
bash "$baseDir/scripts/versions_mkfastq.sh"
bash "$baseDir/scripts/versions_fastqc.sh"
python3 "$baseDir/scripts/generate_versions.py" -f version_*.txt -o versions
python3 "$baseDir/scripts/generate_references.py" -r "$references" -o references
"""
"""
hostname
ulimit -a
echo ${workflow.nextflow.version} > version_nextflow.txt
bash ${baseDir}/scripts/versions_mkfastq.sh
bash ${baseDir}/scripts/versions_fastqc.sh
python3 ${baseDir}/scripts/generate_versions.py -f version_*.txt -o versions
python3 ${baseDir}/scripts/generate_references.py -r ${references} -o references
"""
}
process multiqc {
tag "$name"
tag "${name}"
queue 'super'
publishDir "$outDir/${task.process}/$name", mode: 'copy', pattern: "{multiqc*}"
publishDir "${outDir}/${task.process}/${name}", mode: 'copy', pattern: "{multiqc*}"
module 'multiqc/1.7'
input:
file bqc name "bqc/?/*" from bqcPaths.collect()
file fqc name "fqc/*" from fqcPaths.collect()
file yamlPaths
file bqc name "bqc/?/*" from bqcPaths.collect()
file fqc name "fqc/*" from fqcPaths.collect()
file yamlPaths
output:
file("multiqc_report.html") into mqcPaths
file("multiqc_report.html") into mqcPaths
script:
"""
hostname
ulimit -a
multiqc -c $multiqcConf .
"""
"""
hostname
ulimit -a
multiqc -c ${multiqcConf} .
"""
}
......@@ -35,7 +35,7 @@ def get_args():
def check_design_headers(design):
'''Check if design file conforms to sequencing type.'''
'''Check if design file has correct headers.'''
# Default headers
design_template = [
......
#!/bin/bash
#countDesign.sh
fastqs=`ls *.fastq.gz`;
design=`ls *.csv`;
sample=`cat $design | tail -n +2 | cut -d ',' -f2`;
fastqs=$(ls *.fastq.gz)
design=$(ls *.csv)
sample=$(cat ${design} | tail -n +2 | cut -d ',' -f2)
for i in ${fastqs};
do
if [[ ${i} == *_S0_* ]];
then
continue;
elif [[ ${i} == *_I* ]];
then
continue;
else
good=( "${good[@]}" "${i}" );
fi;
if [[ ${i} == *_S0_* ]]; then
continue
elif [[ ${i} == *_I* ]]; then
continue
else
good=(${good[@]} ${i})
fi
done
echo "Sample,fastq_R1,fastq_R2" > Cellranger_Count_Design.csv;
......
#!/bin/bash
find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j `grep -c ^processor /proc/cpuinfo` --verbose
find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j $(grep -c ^processor /proc/cpuinfo) --verbose
#find . -name '*fastqc.*' | xargs -I '{}' mv '{}' ./
#for i in `ls *.fastq.gz`;
#do echo "fastqc ${i}";
......
......@@ -57,7 +57,7 @@ def check_files(files):
software_files = np.array(list(SOFTWARE_REGEX.values()))[:,0]
extra_files = set(files) - set(software_files)
extra_files = set(files) - set(software_files)
if len(extra_files) > 0:
logger.error('Missing regex: %s', list(extra_files))
......
......@@ -8,26 +8,25 @@ usage() {
OPTIND=1
while getopts :t: opt
do
case $opt in
t) tar=$OPTARG;;
esac
case ${opt} in
t) tar=${OPTARG};;
esac
done
shift $(($OPTIND -1));
shift $((${OPTIND} -1))
folder=$(tar -tf $tar | grep -o "^[^/]*/\$");
folder1=`echo "$folder" | tr -d ' '`;
folder=$(tar -tf ${tar} | grep -o "^[^/]*/\$")
folder1=$(echo "$folder" | tr -d ' ')
if [ "$folder" != "$folder1" ];
then
echo "Error: Spaces found in BCL Directory Path";
echo "$folder";
exit 21;
fi;
if [ "${folder}" != "${folder1}" ]; then
echo "Error: Spaces found in BCL Directory Path"
echo ${folder}
exit 21
fi
name=`echo "${tar}" | rev | cut -f1 -d '.' | rev`;
name=$(echo ${tar} | rev | cut -f1 -d '.' | rev)
if [ "${name}" == "gz" ];
then tar -xvf "$tar" -I pigz;
else tar -xvf "$tar";
fi;
if [ "${name}" == "gz" ]; then
tar -xvf ${tar} -I pigz
else tar -xvf ${tar}
fi
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment