main.nf 3.9 KB
Newer Older
Gervaise Henry's avatar
Gervaise Henry committed
1
2
3
4
5
6
#!/usr/bin/env nextflow

// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf

// Define Input variables
7
params.name = "run"
Venkat Malladi's avatar
Venkat Malladi committed
8
params.bcl = "$baseDir/../test_data/*.tar.gz"
Gervaise Henry's avatar
Gervaise Henry committed
9
params.designFile = "$baseDir/../test_data/design.csv"
10
params.outDir = "$baseDir/output"
11
params.multiqcConf = "$baseDir/conf/multiqc_config.yaml"
12
params.references = "$baseDir/../docs/references.md"
Gervaise Henry's avatar
Gervaise Henry committed
13
14

// Define List of Files
Gervaise Henry's avatar
Gervaise Henry committed
15
16
tarList = Channel
  .fromPath( params.bcl )
17
bclCount = Channel
Gervaise Henry's avatar
Gervaise Henry committed
18
19
  .fromPath( params.bcl )
  .count()
Gervaise Henry's avatar
Gervaise Henry committed
20
21

// Define regular variables
Gervaise Henry's avatar
Gervaise Henry committed
22
name = params.name
23
24
25
26
designLocation = Channel
  .fromPath(params.designFile)
  .ifEmpty { exit 1, "design file not found: ${params.designFile}" }
outDir = params.outDir
27
multiqcConf = params.multiqcConf
28
references = params.references
Gervaise Henry's avatar
Gervaise Henry committed
29

Gervaise Henry's avatar
Gervaise Henry committed
30

Gervaise Henry's avatar
Gervaise Henry committed
31
process checkDesignFile {
Gervaise Henry's avatar
Gervaise Henry committed
32
33
  tag "$name"
  publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
34
  module 'python/3.6.1-2-anaconda'
Gervaise Henry's avatar
Gervaise Henry committed
35
36

  input:
37
  file designLocation
Gervaise Henry's avatar
Gervaise Henry committed
38
39

  output:
40
  file("design.checked.csv") into designPaths
41
  file("design.checked.csv") into designCount
Gervaise Henry's avatar
Gervaise Henry committed
42
43
44

  script:
  """
45
46
  hostname
  ulimit -a
47
  python3 "$baseDir/scripts/check_design.py" -d "$designLocation"
Gervaise Henry's avatar
Gervaise Henry committed
48
49
50
51
52
53
  """
}


process untarBCL {
  tag "$tar"
54
  publishDir "$outDir/${task.process}", mode: 'copy'
55
  module 'pigz/2.4'
Gervaise Henry's avatar
Gervaise Henry committed
56
57
58
59
60

  input:
  file tar from tarList

  output:
61
  file("*") into bclPaths mode flatten
Gervaise Henry's avatar
Gervaise Henry committed
62
63
64

  script:
  """
65
66
  hostname
  ulimit -a
Jeremy Mathews's avatar
Jeremy Mathews committed
67
  bash "$baseDir/scripts/untarBCL.sh" -t "$tar"
Gervaise Henry's avatar
Gervaise Henry committed
68
69
70
  """
}

Gervaise Henry's avatar
Gervaise Henry committed
71

Gervaise Henry's avatar
Gervaise Henry committed
72
73
process mkfastq {
  tag "${bcl.baseName}"
74
  queue '128GB,256GB,256GBv1,384GB'
Gervaise Henry's avatar
Fix    
Gervaise Henry committed
75
  publishDir "$outDir/${task.process}", mode: 'copy', pattern: "{*/outs/**/*.fastq.gz}"
76
  module 'cellranger/3.0.2:bcl2fastq/2.19.1'
Gervaise Henry's avatar
Gervaise Henry committed
77
78

  input:
79
80
  each bcl from bclPaths.collect()
  file design from designPaths
Gervaise Henry's avatar
Gervaise Henry committed
81
82

  output:
83
  file("**/outs/**/*.fastq.gz") into fastqPaths
84
  file("**/outs/**/*.fastq.gz") into cellrangerCount
85
  file("**/outs/fastq_path/Stats/Stats.json") into bqcPaths
86
  val "${bcl.baseName}" into bclName
Gervaise Henry's avatar
Gervaise Henry committed
87
88
89

  script:
  """
90
  hostname
91
  ulimit -a  
92
  cellranger mkfastq --id="${bcl.baseName}" --run="$bcl" --csv=$design -r \$SLURM_CPUS_ON_NODE  -p \$SLURM_CPUS_ON_NODE  -w \$SLURM_CPUS_ON_NODE 
Gervaise Henry's avatar
Gervaise Henry committed
93
94
  """
}
Gervaise Henry's avatar
Gervaise Henry committed
95

Gervaise Henry's avatar
Gervaise Henry committed
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
if (bclCount.value == 1) {
  process countDesign {
    tag "$name"
    publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'

    input:
    file fastqs from cellrangerCount.collect()
    file design from designCount

    output:
    file("Cellranger_Count_Design.csv") into CountDesign

    script:
    """
    bash "$baseDir/scripts/countDesign.sh"
    """
  }
}

Gervaise Henry's avatar
Gervaise Henry committed
116

117
process fastqc {
118
  tag "$bclName"
119
  queue 'super'
Gervaise Henry's avatar
Gervaise Henry committed
120
  publishDir "$outDir/misc/${task.process}/$name/$bclName", mode: 'copy', pattern: "{*fastqc.zip}"
Gervaise Henry's avatar
Fix    
Gervaise Henry committed
121
  module 'fastqc/0.11.5:parallel'
122
123

  input:
124
  file fastqPaths
Gervaise Henry's avatar
Gervaise Henry committed
125
  val bclName
126
127

  output:
128
  file("*fastqc.zip") into fqcPaths
129
130
131
132
133

  script:
  """
  hostname
  ulimit -a
134
  find *.fastq.gz -exec mv {} $bclName.{} \\;
135
  bash "$baseDir/scripts/fastqc.sh"
136
137
138
139
  """
}


140
process versions {
Gervaise Henry's avatar
Gervaise Henry committed
141
142
  tag "$name"
  publishDir "$outDir/misc/${task.process}/$name", mode: 'copy'
143
  module 'python/3.6.1-2-anaconda:cellranger/3.0.2:bcl2fastq/2.19.1:fastqc/0.11.5:pandoc/2.7'
144
145
146
147
148
149
150
151
152
153
154

  input:

  output:
  file("*.yaml") into yamlPaths

  script:
  """
  hostname
  ulimit -a
  echo $workflow.nextflow.version > version_nextflow.txt
155
156
157
  bash "$baseDir/scripts/versions_mkfastq.sh"
  bash "$baseDir/scripts/versions_fastqc.sh"
  python3 "$baseDir/scripts/generate_versions.py" -f version_*.txt -o versions
158
  python3 "$baseDir/scripts/generate_references.py" -r "$references" -o references
159
160
161
  """
}

Gervaise Henry's avatar
Gervaise Henry committed
162

Gervaise Henry's avatar
Gervaise Henry committed
163
process multiqc {
Gervaise Henry's avatar
Gervaise Henry committed
164
  tag "$name"
Gervaise Henry's avatar
Gervaise Henry committed
165
  queue 'super'
166
  publishDir "$outDir/${task.process}/$name", mode: 'copy', pattern: "{multiqc*}"
167
  module 'multiqc/1.7'
Gervaise Henry's avatar
Gervaise Henry committed
168
169

  input:
170
  file bqc name "bqc/?/*" from bqcPaths.collect()
171
  file fqc name "fqc/*" from fqcPaths.collect()
172
  file yamlPaths
Gervaise Henry's avatar
Gervaise Henry committed
173
174

  output:
175
  file("multiqc_report.html") into mqcPaths
Gervaise Henry's avatar
Gervaise Henry committed
176
177
178
179
180

  script:
  """
  hostname
  ulimit -a
181
  multiqc -c $multiqcConf .
Gervaise Henry's avatar
Gervaise Henry committed
182
  """
183
}