From e61e9861322989830ea05819e08d866c4cfaa4ac Mon Sep 17 00:00:00 2001 From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu> Date: Fri, 22 May 2020 14:00:03 -0500 Subject: [PATCH] update process_scripts --- astrocyte_pkg.yml | 6 +- workflow/main.nf | 254 ++++++++++++++++----------------------- workflow/process_scripts | 2 +- 3 files changed, 109 insertions(+), 153 deletions(-) diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml index afd00cb..1b88f58 100644 --- a/astrocyte_pkg.yml +++ b/astrocyte_pkg.yml @@ -168,12 +168,12 @@ workflow_parameters: - id: geneset type: select choices: - - ['h.all.v5.1.symbols.gmt','Hallmark Gene Sets'] + - ['h.all.v6.2.symbols.gmt','Hallmark Gene Sets'] - ['c2.all.v5.1.symbols.gmt','Curated Gene Sets'] - ['c3.all.v5.1.symbols.gmt','Motif Gene Sets'] - - ['c5.all.v5.1.entrez.gmt','Gene Ontology Gene Sets'] + - ['c5.all.v6.2.symbols.gmt','Gene Ontology Gene Sets'] - ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures'] - - ['c7.all.v5.1.entrez.gmt','Immunological Signatures'] + - ['c7.all.v5.1.symbols.gmt','Immunological Signatures'] required: true description: | diff --git a/workflow/main.nf b/workflow/main.nf index 1d4104c..c276a1e 100644 --- a/workflow/main.nf +++ b/workflow/main.nf @@ -59,34 +59,31 @@ fastqs if (params.pairs == 'pe') { spltnames - .splitCsv() - .filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null } - .map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) } - .set { read } + .splitCsv() + .filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null } + .map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) } + .set { read } } else { spltnames - .splitCsv() - .filter { fileMap.get(it[1]) != null } - .map { it -> tuple(it[0], fileMap.get(it[1]),'') } - .set { read } + .splitCsv() + .filter { fileMap.get(it[1]) != null } + .map { it -> tuple(it[0], fileMap.get(it[1]),'') } + .set { read } } if( ! read ) { error "Didn't match any input files with entries in the design file" } // Trim raw reads using trimgalore process trim { - errorStrategy 'ignore' - - input: - set pair_id, file(read1), file(read2) from read - - output: - set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread - set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq - - script: - """ - bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2} - """ + errorStrategy 'ignore' + input: + set pair_id, file(read1), file(read2) from read + output: + set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread + set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq + script: + """ + bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2} + """ } // Align trimmed reads to genome indes with hisat2 @@ -95,157 +92,116 @@ process trim { // Alignment stats with samtools process starfusion { - errorStrategy 'ignore' - publishDir "$params.output", mode: 'copy' - - input: - set pair_id, file(fq1), file(fq2) from fusionfq - - output: - file("${pair_id}.starfusion.txt") into fusionout - - when: - params.fusion == 'detect' && params.pairs == 'pe' - - script: - """ - bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f - """ + errorStrategy 'ignore' + publishDir "$params.output", mode: 'copy' + input: + set pair_id, file(fq1), file(fq2) from fusionfq + output: + file("${pair_id}.starfusion.txt") into fusionout + when: + params.fusion == 'detect' && params.pairs == 'pe' + script: + """ + bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f + """ } process align { - errorStrategy 'ignore' - publishDir "$params.output", mode: 'copy' - - input: - set pair_id, file(fq1), file(fq2) from trimread - - output: - set pair_id, file("${pair_id}.bam") into aligned - set pair_id, file("${pair_id}.bam") into aligned2 - file("${pair_id}.alignerout.txt") into hsatout - - script: - """ - bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2} - """ + errorStrategy 'ignore' + publishDir "$params.output", mode: 'copy' + input: + set pair_id, file(fq1), file(fq2) from trimread + output: + set pair_id, file("${pair_id}.bam") into aligned + set pair_id, file("${pair_id}.bam") into aligned2 + file("${pair_id}.alignerout.txt") into hsatout + script: + """ + bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2} + """ } process alignqc { - errorStrategy 'ignore' - publishDir "$params.output", mode: 'copy' - - input: - set pair_id, file(bam) from aligned2 - - output: - file("${pair_id}.flagstat.txt") into alignstats - set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc - - script: - """ - bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna - """ + errorStrategy 'ignore' + publishDir "$params.output", mode: 'copy' + input: + set pair_id, file(bam) from aligned2 + output: + file("${pair_id}.flagstat.txt") into alignstats + set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc + script: + """ + bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna + """ } // Summarize all flagstat output process parse_alignstat { - publishDir "$params.output", mode: 'copy' - - input: - file(txt) from alignstats.toList() - file(txt) from hsatout.toList() - - output: - file('alignment.summary.txt') - - script: - """ - perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt - """ + publishDir "$params.output", mode: 'copy' + input: + file(txt) from alignstats.toList() + file(txt) from hsatout.toList() + output: + file('alignment.summary.txt') + script: + """ + perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt + """ } // Identify duplicate reads with Picard process markdups { - publishDir "$params.output", mode: 'copy' - - input: - set pair_id, file(sbam) from aligned - - output: - set pair_id, file("${pair_id}.dedup.bam") into deduped1 - set pair_id, file("${pair_id}.dedup.bam") into deduped2 - - script: - """ - bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id - """ + publishDir "$params.output", mode: 'copy' + input: + set pair_id, file(sbam) from aligned + output: + set pair_id, file("${pair_id}.dedup.bam") into deduped1 + set pair_id, file("${pair_id}.dedup.bam") into deduped2 + script: + """ + bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id + """ } // Read summarization with subread // Assemble transcripts with stringtie process geneabund { - errorStrategy 'ignore' - publishDir "$params.output", mode: 'copy' - - input: - set pair_id, file(sbam) from deduped1 - - output: - file("${pair_id}.cts") into counts - file("${pair_id}.cts.summary") into ctsum - file("${pair_id}_stringtie") into strcts - file("${pair_id}.fpkm.txt") into fpkm - - script: - """ - bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam} - """ + errorStrategy 'ignore' + publishDir "$params.output", mode: 'copy' + input: + set pair_id, file(sbam) from deduped1 + output: + file("${pair_id}.cts") into counts + file("${pair_id}.cts.summary") into ctsum + file("${pair_id}_stringtie") into strcts + file("${pair_id}.fpkm.txt") into fpkm + script: + """ + bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam} + """ } process statanal { - errorStrategy 'ignore' - publishDir "$params.output", mode: 'copy' - - input: - file count_file from counts.toList() - file count_sum from ctsum.toList() - file newdesign name 'design.txt' - file genenames - file geneset name 'geneset.gmt' - file fpkm_file from fpkm.toList() - file stringtie_dir from strcts.toList() - - output: - file "*.txt" into txtfiles - file "*.png" into psfiles - file("*.rda") into rdafiles - file("geneset.shiny.gmt") into gmtfile - - script: - """ - bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea - """ -} - -process gatkbam { - errorStrategy 'ignore' - publishDir "$params.output", mode: 'copy' - - input: - set pair_id, file(rbam) from deduped2 - - output: - set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam - - when: - params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/' - - script: - """ - bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id - """ + errorStrategy 'ignore' + publishDir "$params.output", mode: 'copy' + input: + file count_file from counts.toList() + file count_sum from ctsum.toList() + file newdesign name 'design.txt' + file genenames + file geneset name 'geneset.gmt' + file fpkm_file from fpkm.toList() + file stringtie_dir from strcts.toList() + output: + file "*.txt" into txtfiles + file "*.png" into psfiles + file("*.rda") into rdafiles + file("geneset.shiny.gmt") into gmtfile + script: + """ + bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea + """ } diff --git a/workflow/process_scripts b/workflow/process_scripts index 12ef616..c2e0f1f 160000 --- a/workflow/process_scripts +++ b/workflow/process_scripts @@ -1 +1 @@ -Subproject commit 12ef61633f4a6008cba307bb4371845163987371 +Subproject commit c2e0f1fac2b0a90fd2d87d2aba9d885855d22272 -- GitLab