From e61e9861322989830ea05819e08d866c4cfaa4ac Mon Sep 17 00:00:00 2001
From: Brandi Cantarel <brandi.cantarel@utsouthwestern.edu>
Date: Fri, 22 May 2020 14:00:03 -0500
Subject: [PATCH] update process_scripts

---
 astrocyte_pkg.yml        |   6 +-
 workflow/main.nf         | 254 ++++++++++++++++-----------------------
 workflow/process_scripts |   2 +-
 3 files changed, 109 insertions(+), 153 deletions(-)

diff --git a/astrocyte_pkg.yml b/astrocyte_pkg.yml
index afd00cb..1b88f58 100644
--- a/astrocyte_pkg.yml
+++ b/astrocyte_pkg.yml
@@ -168,12 +168,12 @@ workflow_parameters:
   - id: geneset
     type: select
     choices:
-      - ['h.all.v5.1.symbols.gmt','Hallmark Gene Sets']
+      - ['h.all.v6.2.symbols.gmt','Hallmark Gene Sets']
       - ['c2.all.v5.1.symbols.gmt','Curated Gene Sets']
       - ['c3.all.v5.1.symbols.gmt','Motif Gene Sets']
-      - ['c5.all.v5.1.entrez.gmt','Gene Ontology Gene Sets']
+      - ['c5.all.v6.2.symbols.gmt','Gene Ontology Gene Sets']
       - ['c6.all.v5.1.symbols.gmt','Oncogenic Signatures']
-      - ['c7.all.v5.1.entrez.gmt','Immunological Signatures']
+      - ['c7.all.v5.1.symbols.gmt','Immunological Signatures']
 
     required: true
     description: |
diff --git a/workflow/main.nf b/workflow/main.nf
index 1d4104c..c276a1e 100644
--- a/workflow/main.nf
+++ b/workflow/main.nf
@@ -59,34 +59,31 @@ fastqs
 
 if (params.pairs == 'pe') {
   spltnames
-	.splitCsv()
-	.filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
-	.map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
-	.set { read }
+  .splitCsv()
+  .filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
+  .map { it -> tuple(it[0], fileMap.get(it[1]), fileMap.get(it[2])) }
+  .set { read }
 } else {
   spltnames
-	.splitCsv()
-	.filter { fileMap.get(it[1]) != null }
-	.map { it -> tuple(it[0], fileMap.get(it[1]),'') }
-	.set { read }
+  .splitCsv()
+  .filter { fileMap.get(it[1]) != null }
+  .map { it -> tuple(it[0], fileMap.get(it[1]),'') }
+  .set { read }
 }
 if( ! read ) { error "Didn't match any input files with entries in the design file" }
 
 // Trim raw reads using trimgalore
 process trim {
-        errorStrategy 'ignore'
-
-	input:
-	set pair_id, file(read1), file(read2) from read
-
-	output:
-	set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
-	set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
-
-	script:
-	"""
-	bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
-	"""
+  errorStrategy 'ignore'
+  input:
+  set pair_id, file(read1), file(read2) from read
+  output:
+  set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into trimread
+  set pair_id, file("${pair_id}.trim.R1.fastq.gz"),file("${pair_id}.trim.R2.fastq.gz") into fusionfq
+  script:
+  """
+  bash $baseDir/process_scripts/preproc_fastq/trimgalore.sh -p ${pair_id} -a ${read1} -b ${read2}
+  """
 }
 
 // Align trimmed reads to genome indes with hisat2
@@ -95,157 +92,116 @@ process trim {
 // Alignment stats with samtools
 
 process starfusion {
-        errorStrategy 'ignore'
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        set pair_id, file(fq1), file(fq2) from fusionfq
-
-        output:
-        file("${pair_id}.starfusion.txt") into fusionout
-
-        when:
-        params.fusion == 'detect' && params.pairs == 'pe'
-
-        script:
-        """
-        bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
-        """
+  errorStrategy 'ignore'
+  publishDir "$params.output", mode: 'copy'
+  input:
+  set pair_id, file(fq1), file(fq2) from fusionfq
+  output:
+  file("${pair_id}.starfusion.txt") into fusionout
+  when:
+  params.fusion == 'detect' && params.pairs == 'pe'
+  script:
+  """
+  bash $baseDir/process_scripts/alignment/starfusion.sh -p ${pair_id} -r ${index_path} -a ${fq1} -b ${fq2} -m trinity -f
+  """
 }
 
 process align {
-        errorStrategy 'ignore'
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        set pair_id, file(fq1), file(fq2) from trimread
-
-        output:
-        set pair_id, file("${pair_id}.bam") into aligned
-        set pair_id, file("${pair_id}.bam") into aligned2
-        file("${pair_id}.alignerout.txt") into hsatout
-
-        script:
-        """
-        bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
-        """
+  errorStrategy 'ignore'
+  publishDir "$params.output", mode: 'copy'
+  input:
+  set pair_id, file(fq1), file(fq2) from trimread
+  output:
+  set pair_id, file("${pair_id}.bam") into aligned
+  set pair_id, file("${pair_id}.bam") into aligned2
+  file("${pair_id}.alignerout.txt") into hsatout
+  script:
+  """
+  bash $baseDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} -x ${fq1} -y ${fq2}
+  """
 }
 
 process alignqc {
-        errorStrategy 'ignore'
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        set pair_id, file(bam) from aligned2
-
-        output:
-        file("${pair_id}.flagstat.txt") into alignstats
-        set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
-
-        script:
-        """
-        bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
-        """
+  errorStrategy 'ignore'
+  publishDir "$params.output", mode: 'copy'
+  input:
+  set pair_id, file(bam) from aligned2
+  output:
+  file("${pair_id}.flagstat.txt") into alignstats
+  set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
+  script:
+  """
+  bash $baseDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
+  """
 }
 
 // Summarize all flagstat output
 
 process parse_alignstat {
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        file(txt) from alignstats.toList()
-        file(txt) from  hsatout.toList()
-
-        output:
-        file('alignment.summary.txt')
-
-        script:
-        """
-        perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
-        """
+  publishDir "$params.output", mode: 'copy'
+  input:
+  file(txt) from alignstats.toList()
+  file(txt) from  hsatout.toList()
+  output:
+  file('alignment.summary.txt')
+  script:
+  """
+  perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
+  """
 }
 
 // Identify duplicate reads with Picard
 
 process markdups {
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        set pair_id, file(sbam) from aligned
-
-        output:
-        set pair_id, file("${pair_id}.dedup.bam") into deduped1
-        set pair_id, file("${pair_id}.dedup.bam") into deduped2
-
-        script:
-        """
-        bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
-        """
+  publishDir "$params.output", mode: 'copy'
+  input:
+  set pair_id, file(sbam) from aligned
+  output:
+  set pair_id, file("${pair_id}.dedup.bam") into deduped1
+  set pair_id, file("${pair_id}.dedup.bam") into deduped2
+  script:
+  """
+  bash $baseDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
+  """
 }
 
 // Read summarization with subread
 // Assemble transcripts with stringtie
 
 process geneabund {
-        errorStrategy 'ignore'
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        set pair_id, file(sbam) from deduped1
-
-        output:
-        file("${pair_id}.cts") into counts
-        file("${pair_id}.cts.summary") into ctsum
-        file("${pair_id}_stringtie") into strcts
-        file("${pair_id}.fpkm.txt") into fpkm
-
-        script:
-        """
-        bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
-        """
+  errorStrategy 'ignore'
+  publishDir "$params.output", mode: 'copy'
+  input:
+  set pair_id, file(sbam) from deduped1
+  output:
+  file("${pair_id}.cts") into counts
+  file("${pair_id}.cts.summary") into ctsum
+  file("${pair_id}_stringtie") into strcts
+  file("${pair_id}.fpkm.txt") into fpkm
+  script:
+  """
+  bash $baseDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
+  """
 }
 
 process statanal {
-        errorStrategy 'ignore'
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        file count_file from counts.toList()
-        file count_sum from ctsum.toList()
-        file newdesign name 'design.txt'
-        file genenames
-        file geneset name 'geneset.gmt'
-        file fpkm_file from fpkm.toList()
-        file stringtie_dir from strcts.toList()
-
-        output:
-        file "*.txt" into txtfiles
-        file "*.png" into psfiles
-        file("*.rda") into rdafiles
-        file("geneset.shiny.gmt") into gmtfile
-
-        script:
-        """
-        bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
-        """
-}
-
-process gatkbam {
-        errorStrategy 'ignore'
-        publishDir "$params.output", mode: 'copy'
-
-        input:
-        set pair_id, file(rbam) from deduped2
-
-        output:
-        set file("${pair_id}.final.bam"),file("${pair_id}.final.bai") into gatkbam
-
-        when:
-        params.align == 'hisat' && $index_path == '/project/shared/bicf_workflow_ref/GRCh38/'
-
-        script:
-        """
-        bash $baseDir/process_scripts/variants/gatkrunner.sh -a gatkbam_rna -b $rbam -r ${index_path}/hisat_index -p $pair_id
-        """
+  errorStrategy 'ignore'
+  publishDir "$params.output", mode: 'copy'
+  input:
+  file count_file from counts.toList()
+  file count_sum from ctsum.toList()
+  file newdesign name 'design.txt'
+  file genenames
+  file geneset name 'geneset.gmt'
+  file fpkm_file from fpkm.toList()
+  file stringtie_dir from strcts.toList()
+  output:
+  file "*.txt" into txtfiles
+  file "*.png" into psfiles
+  file("*.rda") into rdafiles
+  file("geneset.shiny.gmt") into gmtfile
+  script:
+  """
+  bash $baseDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
+  """
 }
diff --git a/workflow/process_scripts b/workflow/process_scripts
index 12ef616..c2e0f1f 160000
--- a/workflow/process_scripts
+++ b/workflow/process_scripts
@@ -1 +1 @@
-Subproject commit 12ef61633f4a6008cba307bb4371845163987371
+Subproject commit c2e0f1fac2b0a90fd2d87d2aba9d885855d22272
-- 
GitLab