Skip to content
Snippets Groups Projects
Commit 4df64710 authored by Beibei Chen's avatar Beibei Chen
Browse files

update main

parent 58edd844
Branches
Tags
1 merge request!1Merge develop into master
#!/usr/bin/env nextflow
// Default parameter values to run tests
params.bams="$baseDir/../test_data/*.fastq.gz"
params.design="$baseDir/../test_data/design.pe.txt"
params.genome="/project/shared/bicf_workflow_ref/GRCh38/"
params.bams="$baseDir/../test/*.bam"
params.design="$baseDir/../test/design.csv"
params.genome="/project/shared/bicf_workflow_ref/GRCh37/"
design_file = file(params.design)
fastqs=file(params.fastqs)
design_file = file(params.design)
bams=file(params.bams)
gtf_file = file("$params.genome/gencode.gtf")
genenames = file("$params.genome/genenames.txt")
geneset = file("$params.genome/gsea_gmt/$params.geneset")
// params genome is the directory
// base name for the index is always genome
index_path = file(params.genome)
index_name = "genome"
star_index = 'star_index/'
// Pair handling, helper function taken from rnatoy
// which is covered by the GNU General Public License v3
......@@ -66,221 +60,34 @@ Channel
.set { read_pe }
}
//
// Trim raw reads using trimgalore
//
process trimpe {
input:
set pair_id, file(read1), file(read2) from read_pe
output:
set pair_id, file("${read1.baseName.split("\\.", 2)[0]}_val_1.fq.gz"), file("${read2.baseName.split("\\.", 2)[0]}_val_2.fq.gz") into trimpe
script:
"""
module load trimgalore/0.4.1 cutadapt/1.9.1
trim_galore --paired -q 25 --illumina --gzip --length 35 ${read1} ${read2}
"""
}
process trimse {
input:
set pair_id, file(read1) from read_se
output:
set pair_id, file("${read1.baseName.split("\\.", 2)[0]}_trimmed.fq.gz") into trimse
script:
"""
module load trimgalore/0.4.1 cutadapt/1.9.1
trim_galore -q 25 --illumina --gzip --length 35 ${read1}
"""
}
//
// Align trimmed reads to genome indes with hisat2
// Sort and index with samtools
// QC aligned reads with fastqc
// Alignment stats with samtools
//
process alignpe {
publishDir "$baseDir/output", mode: 'copy'
cpus 32
input:
set pair_id, file(fq1), file(fq2) from trimpe
output:
set pair_id, file("${pair_id}.bam") into alignpe
file("${pair_id}.flagstat.txt") into alignstats_pe
file("${pair_id}.hisatout.txt") into hsatoutpe
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqcpe
script:
if (params.align == 'hisat')
"""
module load hisat2/2.0.1-beta-intel samtools/intel/1.3 fastqc/0.11.2 picard/1.127 speedseq/20160506
hisat2 -p 30 --no-unal --dta -x ${index_path}/${index_name} -1 ${fq1} -2 ${fq2} -S out.sam 2> ${pair_id}.hisatout.txt
sambamba view -t 30 -f bam -S -o output.bam out.sam
sambamba sort -t 30 -o ${pair_id}.bam output.bam
sambamba flagstat -t 30 ${pair_id}.bam > ${pair_id}.flagstat.txt
fastqc -f bam ${pair_id}.bam
"""
else
"""
module load star/2.4.2a samtools/intel/1.3 fastqc/0.11.2 picard/1.127 speedseq/20160506
STAR --genomeDir ${index_path}/${star_index} --readFilesIn ${fq1} ${fq2} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype BAM SortedByCoordinate --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
mv outLog.final.out ${pair_id}.hisatout.txt
sambamba sort -t 30 -o ${pair_id}.bam outAligned.sortedByCoord.out.bam
sambamba flagstat -t 30 ${pair_id}.bam > ${pair_id}.flagstat.txt
fastqc -f bam ${pair_id}.bam
"""
}
process alignse {
publishDir "$baseDir/output", mode: 'copy'
cpus 32
input:
set pair_id, file(fq1) from trimse
output:
set pair_id, file("${pair_id}.bam") into alignse
file("${pair_id}.flagstat.txt") into alignstats_se
file("${pair_id}.hisatout.txt") into hsatoutse
set file("${pair_id}_fastqc.zip"),file("${pair_id}_fastqc.html") into fastqcse
script:
if (params.align == 'hisat')
"""
module load hisat2/2.0.1-beta-intel samtools/intel/1.3 fastqc/0.11.2 speedseq/20160506 picard/1.127
hisat2 -p 30 --no-unal --dta -x ${index_path}/${index_name} -U ${fq1} -S out.sam 2> ${pair_id}.hisatout.txt
sambamba view -t 30 -f bam -S -o output.bam out.sam
sambamba sort -t 30 -o ${pair_id}.bam output.bam
sambamba flagstat -t 30 ${pair_id}.bam > ${pair_id}.flagstat.txt
fastqc -f bam ${pair_id}.bam
"""
else
"""
module load star/2.4.2a samtools/intel/1.3 fastqc/0.11.2 picard/1.127 speedseq/20160506
STAR --genomeDir ${index_path}/${star_index} --readFilesIn ${fq1} --readFilesCommand zcat --genomeLoad NoSharedMemory --outFilterMismatchNmax 999 --outFilterMismatchNoverReadLmax 0.04 --outFilterMultimapNmax 20 --alignSJoverhangMin 8 --alignSJDBoverhangMin 1 --alignIntronMin 20 --alignIntronMax 1000000 --alignMatesGapMax 1000000 --outSAMheaderCommentFile COfile.txt --outSAMheaderHD @HD VN:1.4 SO:coordinate --outSAMunmapped Within --outFilterType BySJout --outSAMattributes NH HI AS NM MD --outSAMstrandField intronMotif --outSAMtype SAM --quantMode TranscriptomeSAM --sjdbScore 1 --limitBAMsortRAM 60000000000 --outFileNamePrefix out
mv outLog.final.out ${pair_id}.hisatout.txt
sambamba view -t 30 -f bam -S -o output.bam outAligned.out.sam
sambamba sort -t 30 -o ${pair_id}.bam output.bam
sambamba flagstat -t 30 ${pair_id}.bam > ${pair_id}.flagstat.txt
fastqc -f bam ${pair_id}.bam
"""
}
// From here on we are the same for PE and SE, so merge channels and carry on
Channel
.empty()
.mix(alignse, alignpe)
.tap { aligned2 }
.set { aligned }
Channel
.empty()
.mix(alignstats_se, alignstats_pe)
.set { alignstats }
Channel
.empty()
.mix(hsatoutse, hsatoutpe)
.set { hsatout }
//
// Summarize all flagstat output
//
process parse_alignstat {
publishDir "$baseDir/output", mode: 'copy'
input:
file(txt) from alignstats.toList()
file(txt) from hsatout.toList()
output:
file('alignment.summary.txt')
"""
perl $baseDir/scripts/parse_flagstat.pl *.flagstat.txt
"""
}
//
// Identify duplicate reads with Picard
//
process markdups {
memory '4GB'
publishDir "$baseDir/output", mode: 'copy'
input:
set pair_id, file(sbam) from aligned
output:
set pair_id, file("${pair_id}.dedup.bam") into deduped1
set pair_id, file("${pair_id}.dedup.bam") into deduped2
script:
if (params.markdups == 'mark')
"""
module load picard/1.127 speedseq/20160506
sambamba markdup -t 20 -r ${sbam} ${pair_id}.dedup.bam
"""
else
"""
cp ${sbam} ${pair_id}.dedup.bam
"""
}
//
// Read summarization with subread
//
process featurect {
publishDir "$baseDir/output", mode: 'copy'
cpus 32
input:
set pair_id, file(dbam) from deduped1
file gtf_file
output:
file("${pair_id}.cts") into counts
"""
module load module subread/1.5.0-intel
featureCounts -s params.stranded -T 30 -p -g gene_name -a ${gtf_file} -o ${pair_id}.cts ${dbam}
"""
}
//
// Assemble transcripts with stringtie
//
process stringtie {
publishDir "$baseDir/output", mode: 'copy'
cpus 32
input:
set pair_id, file(dbam) from deduped2
file gtf_file
output:
file("${pair_id}_stringtie") into strcts
"""
module load stringtie/1.1.2-intel
mkdir ${pair_id}_stringtie
cd ${pair_id}_stringtie
stringtie ../${dbam} -p 30 -G ../${gtf_file} -B -e -o denovo.gtf -A ../geneabund.stringtie.tab
"""
process peakanno {
publishDir "$baseDir/output", mode: 'copy'
input:
file peak_file from greencenter
file design_file from input
file annotation Tdx
output:
set peak_id, file("${pattern}_annotation.xls"), file("${pattern}_peakTssDistribution.png") into peakanno
"""
module load R/3.2.1-intel
Rscript $baseDir/scripts/runchipseeker.R
"""
}
process statanal {
//Need to do it with more than 1 condition
process diffbind {
publishDir "$baseDir/output", mode: 'copy'
input:
file count_file from counts.toList()
file peak_file from input
file design_file name 'design.txt'
file genenames
file geneset name 'geneset.gmt'
file bam_file from input
output:
file "*.txt" into txtfiles
file "*.png" into psfiles
file "*"
"""
module load R/3.2.1-intel
perl $baseDir/scripts/concat_cts.pl -o ./ *.cts
cp design.txt design.shiny.txt
cp geneset.gmt geneset.shiny.gmt
Rscript $baseDir/scripts/dea.R
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment