Commit e61e9861 authored by Brandi Cantarel's avatar Brandi Cantarel

update process_scripts

parent 1ac0c0fc
......@@ -168,12 +168,12 @@ workflow_parameters:
- id: geneset
type: select
choices:
- ['h.all.v5.1.symbols.gmt','Hallmark Gene Sets']
- ['h.all.v6.2.symbols.gmt','Hallmark Gene Sets']
- ['c2.all.v5.1.symbols.gmt','Curated Gene Sets']
- ['c3.all.v5.1.symbols.gmt','Motif Gene Sets']
- ['c5.all.v5.1.entrez.gmt','Gene Ontology Gene Sets']
- ['c5.all.v6.2.symbols.gmt','Gene Ontology Gene Sets']
- ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures']
- ['c7.all.v5.1.entrez.gmt','Immunological Signatures']
- ['c7.all.v5.1.symbols.gmt','Immunological Signatures']
required: true
description: |
......
......@@ -59,34 +59,31 @@ fastqs
if (params.pairs == 'pe') {
spltnames
.splitCsv()
.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
.set { read }
.splitCsv()
.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
.set { read }
} else {
spltnames
.splitCsv()
.filter { fileMap.get(it[1]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
.set { read }
.splitCsv()
.filter { fileMap.get(it[1]) != null }
.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
.set { read }
}
if( ! read ) { error "Didn't match any input files with entries in the design file" }
// Trim raw reads using trimgalore
process trim {
errorStrategy 'ignore'
input:
set pair_id, file(read1), file(read2) from read
output:
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
script:
"""
bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
"""
errorStrategy 'ignore'
input:
set pair_id, file(read1), file(read2) from read
output:
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
script:
"""
bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
"""
}
// Align trimmed reads to genome indes with hisat2
......@@ -95,157 +92,116 @@ process trim {
// Alignment stats with samtools
process starfusion {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from fusionfq
output:
file("${pair_id}.starfusion.txt") into fusionout
when:
params.fusion == 'detect' && params.pairs == 'pe'
script:
"""
bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
"""
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from fusionfq
output:
file("${pair_id}.starfusion.txt") into fusionout
when:
params.fusion == 'detect' && params.pairs == 'pe'
script:
"""
bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
"""
}
process align {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from trimread
output:
set pair_id, file("${pair_id}.bam") into aligned
set pair_id, file("${pair_id}.bam") into aligned2
file("${pair_id}.alignerout.txt") into hsatout
script:
"""
bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
"""
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(fq1), file(fq2) from trimread
output:
set pair_id, file("${pair_id}.bam") into aligned
set pair_id, file("${pair_id}.bam") into aligned2
file("${pair_id}.alignerout.txt") into hsatout
script:
"""
bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
"""
}
process alignqc {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(bam) from aligned2
output:
file("${pair_id}.flagstat.txt") into alignstats
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
script:
"""
bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
"""
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(bam) from aligned2
output:
file("${pair_id}.flagstat.txt") into alignstats
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
script:
"""
bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
"""
}
// Summarize all flagstat output
process parse_alignstat {
publishDir "$params.output", mode: 'copy'
input:
file(txt) from alignstats.toList()
file(txt) from hsatout.toList()
output:
file('alignment.summary.txt')
script:
"""
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
publishDir "$params.output", mode: 'copy'
input:
file(txt) from alignstats.toList()
file(txt) from hsatout.toList()
output:
file('alignment.summary.txt')
script:
"""
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
}
// Identify duplicate reads with Picard
process markdups {
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from aligned
output:
set pair_id, file("${pair_id}.dedup.bam") into deduped1
set pair_id, file("${pair_id}.dedup.bam") into deduped2
script:
"""
bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
"""
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from aligned
output:
set pair_id, file("${pair_id}.dedup.bam") into deduped1
set pair_id, file("${pair_id}.dedup.bam") into deduped2
script:
"""
bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
"""
}
// Read summarization with subread
// Assemble transcripts with stringtie
process geneabund {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from deduped1
output:
file("${pair_id}.cts") into counts
file("${pair_id}.cts.summary") into ctsum
file("${pair_id}_stringtie") into strcts
file("${pair_id}.fpkm.txt") into fpkm
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
"""
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(sbam) from deduped1
output:
file("${pair_id}.cts") into counts
file("${pair_id}.cts.summary") into ctsum
file("${pair_id}_stringtie") into strcts
file("${pair_id}.fpkm.txt") into fpkm
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
"""
}
process statanal {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
file count_file from counts.toList()
file count_sum from ctsum.toList()
file newdesign name 'design.txt'
file genenames
file geneset name 'geneset.gmt'
file fpkm_file from fpkm.toList()
file stringtie_dir from strcts.toList()
output:
file "*.txt" into txtfiles
file "*.png" into psfiles
file("*.rda") into rdafiles
file("geneset.shiny.gmt") into gmtfile
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
"""
}
process gatkbam {
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
set pair_id, file(rbam) from deduped2
output:
set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam
when:
params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/'
script:
"""
bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id
"""
errorStrategy 'ignore'
publishDir "$params.output", mode: 'copy'
input:
file count_file from counts.toList()
file count_sum from ctsum.toList()
file newdesign name 'design.txt'
file genenames
file geneset name 'geneset.gmt'
file fpkm_file from fpkm.toList()
file stringtie_dir from strcts.toList()
output:
file "*.txt" into txtfiles
file "*.png" into psfiles
file("*.rda") into rdafiles
file("geneset.shiny.gmt") into gmtfile
script:
"""
bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
"""
}
Subproject commit 12ef61633f4a6008cba307bb4371845163987371
Subproject commit c2e0f1fac2b0a90fd2d87d2aba9d885855d22272
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment