Skip to content
Snippets Groups Projects
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
main.nf 4.69 KiB
#!/usr/bin/env nextflow

// Default parameter values to run tests
params.input = "$baseDir"
params.output= "$baseDir/output"
params.design="$params.input/design.txt"
params.genome="/project/shared/bicf_workflow_ref/human/GRCh38/"
params.markdups="picard"
params.stranded="0"
params.pairs="pe"
params.geneset = 'h.all.v5.1.symbols.gmt'
params.align = 'hisat'
params.fusion = 'skip'
params.dea = 'detect'

design_file = file(params.design)
fqs = Channel.fromPath("$params.input/*")
gtf_file = file("$params.genome/gencode.gtf")
genenames = file("$params.genome/genenames.txt")
geneset = file("$params.genome/../gsea_gmt/$params.geneset")
dbsnp="$params.genome/dbSnp.vcf.gz"
indel="$params.genome/GoldIndels.vcf.gz"
knownindel=file(indel)
dbsnp=file(dbsnp)


if (params.repoDir) {
  repoDir=params.repoDir
} else if (workflow.projectDir) {
  repoDir=workflow.projectDir
} else {
  repoDir="$baseDir"
}

// params genome is the directory
// base name for the index is always genome
index_path = file(params.genome)

process checkdesignfile {
	queue 'super'
	label 'trim'
	publishDir "$params.output", mode: 'copy'
	input:
        file design_file name 'design.ori.txt'
	file ("*") from fqs.collect()
	output:
	file("design.valid.txt") into newdesign
	file("*.fastq.gz") into fastqs mode flatten
	stdout spltnames
	script:
	"""
  # convert dos \r to unix format
  sed -i 's/\\x0D$//' design.ori.txt
	bash $repoDir/process_scripts/design_file/checkdesignfile.sh ${params.pairs} design.ori.txt
	"""
}

def fileMap = [:]

fastqs
    .subscribe { 
	def fileName = it.getFileName()
	fileMap."$fileName" = it
    }

if (params.pairs == 'pe') {
  spltnames
  .splitCsv()
  .filter { fileMap.get(it[1]) != null & fileMap.get(it[2]) != null }
  .map { it -> tuple(it[0], ([fileMap.get(it[1]), fileMap.get(it[2])])) } 
  .set { read }
} else {
  spltnames
  .splitCsv()
  .filter { fileMap.get(it[1]) != null }
  .map { it -> tuple(it[0], ([fileMap.get(it[1])])) }
  .set { read }
}
if( ! read ) { error "Didn't match any input files with entries in the design file" }

// Trim raw reads using trimgalore
process trim {
  errorStrategy 'ignore'
  label 'trim'
  input:
  set pair_id, file(fqs) from read
  output:
  set pair_id, file("${pair_id}.trim.R*.fastq.gz") into trimread
  script:
  """
  bash $repoDir/process_scripts/preproc_fastq/trimgalore.sh -f -p ${pair_id} ${fqs}
  """
}

process ralign {
  errorStrategy 'ignore'
  label 'ralign'
  publishDir "$params.output", mode: 'copy'
  
  input:
  set pair_id, file(fqs) from trimread
  
  output:
  set pair_id, file("${pair_id}.bam") into aligned
  set pair_id, file("${pair_id}.bam") into aligned2
  file("${pair_id}.alignerout.txt") into hsatout
  
  script:
  """
  bash $repoDir/process_scripts/alignment/rnaseqalign.sh -a $params.align -p ${pair_id} -r ${index_path} ${fqs}
  """
}

process alignqc {
  errorStrategy 'ignore'
  label 'profiling_qc'
  publishDir "$params.output", mode: 'copy'
  input:
  set pair_id, file(bam) from aligned2
  output:
  file("${pair_id}.flagstat.txt") into alignstats
  set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqc
  script:
  """
  bash $repoDir/process_scripts/alignment/bamqc.sh -p ${pair_id} -b ${bam} -y rna
  """
}

// Identify duplicate reads with Picard
process markdups {
  publishDir "$params.output", mode: 'copy'
  label 'dnaalign'
  input:
  set pair_id, file(sbam) from aligned
  output:
  set pair_id, file("${pair_id}.dedup.bam") into deduped1
  set pair_id, file("${pair_id}.dedup.bam") into deduped2
  script:
  """
  bash $repoDir/process_scripts/alignment/markdups.sh -a $params.markdups -b $sbam -p $pair_id
  """
}

// Read summarization with subread
// Assemble transcripts with stringtie
process geneabund {
  errorStrategy 'ignore'
  label 'geneabund'
  publishDir "$params.output", mode: 'copy'
  input:
  set pair_id, file(sbam) from deduped1
  output:
  file("${pair_id}.cts") into counts
  file("${pair_id}.cts.summary") into ctsum
  file("${pair_id}_stringtie") into strcts
  file("${pair_id}.fpkm.txt") into fpkm
  script:
  """
  bash $repoDir/process_scripts/genect_rnaseq/geneabundance.sh -s $params.stranded -g ${gtf_file} -p ${pair_id} -b ${sbam}
  """
}

process statanal {
  errorStrategy 'ignore'
  publishDir "$params.output", mode: 'copy'
  label 'rnaseqstat'
  input:
  file count_file from counts.toList()
  file count_sum from ctsum.toList()
  file newdesign name 'design.txt'
  file genenames
  file geneset name 'geneset.gmt'
  file fpkm_file from fpkm.toList()
  file stringtie_dir from strcts.toList()
  output:
  file "*.txt" into txtfiles
  file "*.png" into psfiles
  file("*.rda") into rdafiles
  file("geneset.shiny.gmt") into gmtfile
  script:
  """
  bash $repoDir/process_scripts/genect_rnaseq/statanal.sh -d $params.dea
  """
}