Merge branch '24-CheckStyle' into 'develop'

fixed style Closes #24 See merge request !39

Merge branch '24-CheckStyle' into 'develop'
fixed style Closes #24 See merge request !39
ec49d090 · Gervaise Henry · 5a111ac2 · a830a0c9 · ec49d090 · ec49d090
Commit ec49d090 authored 5 years ago by Gervaise Henry
--- a/README.md
+++ b/README.md
@@ -54,3 +54,10 @@ To Run:


 [**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/Astrocyte/cellranger_mkfastq/blob/develop/CHANGELOG.md)
+
+Credits
+-------
+This worklow is was developed jointly with the [Bioinformatic Core Facility (BICF), Department of Bioinformatics](http://www.utsouthwestern.edu/labs/bioinformatics/)
+
+
+Please cite in publications: Pipeline was developed by BICF from funding provided by **Cancer Prevention and Research Institute of Texas (RP150596)**.
--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -9,7 +9,7 @@
 # A unique identifier for the workflow package, text/underscores only
 name: 'cellranger_mkfastq'
 # Who wrote this?
-author: 'Gervaise H. Henr, Jon Gesell, Jeremy Mathews, and Venkat Malladi'
+author: 'Gervaise H. Henry, Jon Gesell, Jeremy Mathews, and Venkat Malladi'
 # A contact email address for questions
 email: 'bicf@utsouthwestern.edu'
 # A more informative title for the workflow package

--- a/docs/references.md
+++ b/docs/references.md
@@ -4,7 +4,7 @@
  * Anaconda (Anaconda Software Distribution, [https://anaconda.com](https://anaconda.com))

 2. **pigz**:
-  * parallel implementation of gzip [https://zlib.net/pigz/](https://zlib.net/pigz/)
+  * Parallel implementation of gzip [https://zlib.net/pigz/](https://zlib.net/pigz/)

 3. **bcl2fastq**:
  * Ilumina's bcl2fastq [https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html](https://support.illumina.com/sequencing/sequencing_software/bcl2fastq-conversion-software.html)
@@ -17,3 +17,6 @@

 5. **MultiQc**:
  * Ewels P., Magnusson M., Lundin S. and Käller M. 2016. MultiQC: Summarize analysis results for multiple tools and samples in a single report. Bioinformatics 32(19): 3047–3048. doi:[10.1093/bioinformatics/btw354](https://dx.doi.org/10.1093/bioinformatics/btw354)
+
+6. **Nextflow**:
+  * Di Tommaso P., Chatzou M., Floden E. W., Barja P. P., Palumbo E., and Notredame C. 2017. Nextflow enables reproducible computational workflows. Nature biotechnology 35(4): 316. doi:[10.1038/nbt.3820](https://doi.org/10.1038/nbt.3820)
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -3,13 +3,15 @@
 // Path to an input file, or a pattern for multiple inputs
 // Note - $baseDir is the location of this workflow file main.nf

+
 // Define Input variables
 params.name = "run"
-params.bcl = "$baseDir/../test_data/*.tar.gz"
-params.designFile = "$baseDir/../test_data/design.csv"
-params.outDir = "$baseDir/output"
-params.multiqcConf = "$baseDir/conf/multiqc_config.yaml"
-params.references = "$baseDir/../docs/references.md"
+params.bcl = "${baseDir}/../test_data/*.tar.gz"
+params.designFile = "${baseDir}/../test_data/design.csv"
+params.outDir = "${baseDir}/output"
+params.multiqcConf = "${baseDir}/conf/multiqc_config.yaml"
+params.references = "${baseDir}/../docs/references.md"
+

 // Define List of Files
 tarList = Channel
@@ -18,6 +20,7 @@ bclCount = Channel
  .fromPath( params.bcl )
  .count()

+
 // Define regular variables
 name = params.name
 designLocation = Channel
@@ -29,155 +32,171 @@ references = params.references


 process checkDesignFile {
-  tag "$name"
-  publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
+
+  tag "${name}"
+  publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
  module 'python/3.6.1-2-anaconda'

  input:
-  file designLocation
+    file designLocation

  output:
-  file("design.checked.csv") into designPaths
-  file("design.checked.csv") into designCount
+    file("design.checked.csv") into designPaths
+    file("design.checked.csv") into designCount

  script:
-  """
-  hostname
-  ulimit -a
-  python3 "$baseDir/scripts/check_design.py" -d "$designLocation"
-  """
+    """
+    hostname
+    ulimit -a
+    python3 ${baseDir}/scripts/check_design.py -d ${designLocation}
+    """
+
 }


 process untarBCL {
-  tag "$tar"
-  publishDir "$outDir/${task.process}", mode: 'copy'
+
+  tag "${tar}"
+  publishDir "${outDir}/${task.process}", mode: 'copy'
  module 'pigz/2.4'

  input:
-  file tar from tarList
+    file tar from tarList

  output:
-  file("*") into bclPaths mode flatten
+    file("*") into bclPaths mode flatten

  script:
-  """
-  hostname
-  ulimit -a
-  bash "$baseDir/scripts/untarBCL.sh" -t "$tar"
-  """
+    """
+    hostname
+    ulimit -a
+    bash ${baseDir}/scripts/untarBCL.sh -t ${tar}
+    """
+
 }


 process mkfastq {
+
  tag "${bcl.baseName}"
  queue '128GB,256GB,256GBv1,384GB'
-  publishDir "$outDir/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}"
+  publishDir "${outDir}/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}"
  module 'cellranger/3.0.2:bcl2fastq/2.19.1'

  input:
-  each bcl from bclPaths.collect()
-  file design from designPaths
+    each bcl from bclPaths.collect()
+    file design from designPaths

  output:
-  file("**/outs/**/*.fastq.gz") into fastqPaths
-  file("**/outs/**/*.fastq.gz") into cellrangerCount
-  file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths
-  val "${bcl.baseName}" into bclName
+    file("**/outs/**/*.fastq.gz") into fastqPaths
+    file("**/outs/**/*.fastq.gz") into cellrangerCount
+    file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths
+    val "${bcl.baseName}" into bclName

  script:
-  """
-  hostname
-  ulimit -a  
-  cellranger mkfastq --id="${bcl.baseName}" --run="$bcl" --csv=$design -r \$SLURM_CPUS_ON_NODE  -p \$SLURM_CPUS_ON_NODE  -w \$SLURM_CPUS_ON_NODE 
-  """
+    """
+    hostname
+    ulimit -a  
+    cellranger mkfastq --id=${bcl.baseName} --run=${bcl} --csv=${design} -r \$SLURM_CPUS_ON_NODE  -p \$SLURM_CPUS_ON_NODE  -w \$SLURM_CPUS_ON_NODE 
+    """
+
 }


 if (bclCount.value == 1) {
+
  process countDesign {
-    tag "$name"
-    publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
+
+    tag "${name}"
+    publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'

    input:
-    file fastqs from cellrangerCount.collect()
-    file design from designCount
+      file fastqs from cellrangerCount.collect()
+      file design from designCount

    output:
-    file("Cellranger_Count_Design.csv") into CountDesign
+      file("Cellranger_Count_Design.csv") into CountDesign

    script:
-    """
-    bash "$baseDir/scripts/countDesign.sh"
-    """
+      """
+      bash ${baseDir}/scripts/countDesign.sh
+      """
+
  }
+
 }


 process fastqc {
-  tag "$bclName"
+
+  tag "${bclName}"
  queue 'super'
-  publishDir "$outDir/misc/${task.process}/$name/$bclName", mode: 'copy', pattern: "{*fastqc.zip}"
+  publishDir "${outDir}/misc/${task.process}/${name}/${bclName}", mode: 'copy', pattern: "{*fastqc.zip}"
  module 'fastqc/0.11.5:parallel'

  input:
-  file fastqPaths
-  val bclName
+    file fastqPaths
+    val bclName

  output:
-  file("*fastqc.zip") into fqcPaths
+    file("*fastqc.zip") into fqcPaths

  script:
-  """
-  hostname
-  ulimit -a
-  find *.fastq.gz -exec mv {} $bclName.{} \\;
-  bash "$baseDir/scripts/fastqc.sh"
-  """
+    """
+    hostname
+    ulimit -a
+    find *.fastq.gz -exec mv {} ${bclName}.{} \\;
+    bash ${baseDir}/scripts/fastqc.sh
+    """
+
 }


 process versions {
-  tag "$name"
-  publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
+
+  tag "${name}"
+  publishDir "${outDir}/misc/${task.process}/${name}", mode: 'copy'
  module 'python/3.6.1-2-anaconda:cellranger/3.0.2:bcl2fastq/2.19.1:fastqc/0.11.5:pandoc/2.7'

  input:

  output:
-  file("*.yaml") into yamlPaths
+    file("*.yaml") into yamlPaths

  script:
-  """
-  hostname
-  ulimit -a
-  echo $workflow.nextflow.version > version_nextflow.txt
-  bash "$baseDir/scripts/versions_mkfastq.sh"
-  bash "$baseDir/scripts/versions_fastqc.sh"
-  python3 "$baseDir/scripts/generate_versions.py" -f version_*.txt -o versions
-  python3 "$baseDir/scripts/generate_references.py" -r "$references" -o references
-  """
+    """
+    hostname
+    ulimit -a
+    echo ${workflow.nextflow.version} > version_nextflow.txt
+    bash ${baseDir}/scripts/versions_mkfastq.sh
+    bash ${baseDir}/scripts/versions_fastqc.sh
+    python3 ${baseDir}/scripts/generate_versions.py -f version_*.txt -o versions
+    python3 ${baseDir}/scripts/generate_references.py -r ${references} -o references
+    """
+
 }


 process multiqc {
-  tag "$name"
+
+  tag "${name}"
  queue 'super'
-  publishDir "$outDir/${task.process}/$name", mode: 'copy', pattern: "{multiqc*}"
+  publishDir "${outDir}/${task.process}/${name}", mode: 'copy', pattern: "{multiqc*}"
  module 'multiqc/1.7'

  input:
-  file bqc name "bqc/?/*" from bqcPaths.collect()
-  file fqc name "fqc/*" from fqcPaths.collect()
-  file yamlPaths
+    file bqc name "bqc/?/*" from bqcPaths.collect()
+    file fqc name "fqc/*" from fqcPaths.collect()
+    file yamlPaths

  output:
-  file("multiqc_report.html") into mqcPaths
+    file("multiqc_report.html") into mqcPaths

  script:
-  """
-  hostname
-  ulimit -a
-  multiqc -c $multiqcConf .
-  """
+    """
+    hostname
+    ulimit -a
+    multiqc -c ${multiqcConf} .
+    """
+
 }
--- a/workflow/scripts/check_design.py
+++ b/workflow/scripts/check_design.py
@@ -35,7 +35,7 @@ def get_args():


 def check_design_headers(design):
-    '''Check if design file conforms to sequencing type.'''
+    '''Check if design file has correct headers.'''

    # Default headers
    design_template = [

--- a/workflow/scripts/countDesign.sh
+++ b/workflow/scripts/countDesign.sh
 #!/bin/bash
 #countDesign.sh

-fastqs=`ls *.fastq.gz`;
-design=`ls *.csv`;
-sample=`cat $design | tail -n +2 | cut -d ',' -f2`;
+fastqs=$(ls *.fastq.gz)
+design=$(ls *.csv)
+sample=$(cat ${design} | tail -n +2 | cut -d ',' -f2)

 for i in ${fastqs};
 do
-   if [[ ${i} == *_S0_* ]];
-   then
-	continue;
-   elif [[ ${i} == *_I* ]];
-   then
-	continue;
-   else
-	good=( "${good[@]}" "${i}" );
-   fi;
+  if [[ ${i} == *_S0_* ]]; then
+    continue
+  elif [[ ${i} == *_I* ]]; then
+    continue
+  else
+    good=(${good[@]} ${i})
+  fi
 done

 echo "Sample,fastq_R1,fastq_R2" > Cellranger_Count_Design.csv;

--- a/workflow/scripts/fastqc.sh
+++ b/workflow/scripts/fastqc.sh
 #!/bin/bash

-find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j `grep -c ^processor /proc/cpuinfo` --verbose
+find . -name '*.fastq.gz' | awk '{printf("fastqc \"%s\"\n", $0)}' | parallel -j $(grep -c ^processor /proc/cpuinfo) --verbose
 #find . -name '*fastqc.*' | xargs -I '{}' mv '{}' ./ 
 #for i in `ls *.fastq.gz`;
 #do echo "fastqc ${i}";

--- a/workflow/scripts/generate_versions.py
+++ b/workflow/scripts/generate_versions.py
@@ -57,7 +57,7 @@ def check_files(files):

    software_files = np.array(list(SOFTWARE_REGEX.values()))[:,0]

-    extra_files =  set(files) - set(software_files)
+    extra_files = set(files) - set(software_files)

    if len(extra_files) > 0:
            logger.error('Missing regex: %s', list(extra_files))

--- a/workflow/scripts/untarBCL.sh
+++ b/workflow/scripts/untarBCL.sh
@@ -8,26 +8,25 @@ usage() {
 OPTIND=1
 while getopts :t: opt
 do
-   case $opt in
-	t) tar=$OPTARG;;
-   esac
+  case ${opt} in
+	t) tar=${OPTARG};;
+  esac
 done

-shift $(($OPTIND -1));
+shift $((${OPTIND} -1))

-folder=$(tar -tf $tar | grep -o "^[^/]*/\$");
-folder1=`echo "$folder" | tr -d ' '`;
+folder=$(tar -tf ${tar} | grep -o "^[^/]*/\$")
+folder1=$(echo "$folder" | tr -d ' ')

-if [ "$folder" != "$folder1" ];
-then
-   echo "Error: Spaces found in BCL Directory Path";
-   echo "$folder";
-   exit 21;
-fi;
+if [ "${folder}" != "${folder1}" ]; then
+  echo "Error: Spaces found in BCL Directory Path"
+  echo ${folder}
+  exit 21
+fi

-name=`echo "${tar}" | rev | cut -f1 -d '.' | rev`;
+name=$(echo ${tar} | rev | cut -f1 -d '.' | rev)

-if [ "${name}" == "gz" ];
-   then tar -xvf "$tar" -I pigz;
-   else tar -xvf "$tar";
-fi;
+if [ "${name}" == "gz" ]; then 
+  tar -xvf ${tar} -I pigz
+  else tar -xvf ${tar}
+fi