Skip to content
Snippets Groups Projects
Commit 91cd945b authored by David Trudgian's avatar David Trudgian
Browse files

TMP_DIR for picard. Unixify main.nf EOL

parent e20e7f71
Branches
No related merge requests found
/*
* Copyright (c) 2016. The University of Texas Southwestern Medical Center
*
* This file is part of the BioHPC Workflow Platform
*
* Example ChIP-Seq analysis script, demonstrating the BioHPC Workflow Platform
*
* @authors
* David Trudgian <David.Trudgian@UTSouthwestern.edu>
*
*/
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
params.fastq = "$baseDir/../test_data/*.fastq"
// Path to the BWA Index (.fa file) that we are using for the analysis
params.index = "/project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa"
// First, get the list of fastqs. When multiple files are selected on the web
// a glob pattern will be passed in
fastqs = Channel.fromPath( params.fastq )
// Now find the path to the BWA index directory
index_path = file(params.index).parent
// And get the name of the actual index inside that directory
index_name = file(params.index).name
// bwa_aln
// Run BWA aln on a fastq file, to produce sai output
//
// Input - fastq_file is taken from the fastq channel
// - BWA index at $index_path/$index_name
//
// Output - pair of fastq & generated sai file into the alignments channel
process bwa_aln {
// Tell Nextflow we will use 32 cpus here for BWA
cpus 32
input:
file fastq_file from fastqs
output:
set file(fastq_file), file("${fastq_file.name}.sai") into alignments
"""
module load BWA/0.7.5
bwa aln $index_path/$index_name -t 32 $fastq_file > "${fastq_file.name}.sai"
"""
}
// bwa_aln
// Run bwa samse to produce sam.gz from an sai alignment
//
// Input - pair of fastq file and corresponding sai file, from alignments channel
//
// Output - .sam.gz into the samfiles channel, and baseDir/output
process bwa_samse {
// bwa samse will use a single cpu core
cpus 1
// Publish the outputs we create here into the workflow output directory
publishDir "$baseDir/output", mode: 'copy'
input:
set file(fastq_file), file(sai_file) from alignments
output:
file "${fastq_file.name}.sam.gz" into samfiles
"""
module load BWA/0.7.5
bwa samse -r "@RG\tID:${fastq_file.name}\tLB:${fastq_file.name}\tSM:${fastq_file.name}\tPL:ILLUMINA" $index_path/$index_name\
$sai_file $fastq_file | gzip > "${fastq_file.name}.sam.gz"
"""
}
// sam2bam
// Convert SAM file to BAM file, sorting by co-ordinate and indexing
//
// Input - a sam file, (possibly gzipped) from the samfile channel
//
// Output - .sam.gz into the samfiles channel, and baseDir/output
process sam2bam {
// Tell Nextflow picard will only use one cpu.
// We are allocating 32GB to java though, so tell
// Nextflow so it can assign the task appropriately.
cpus 1
memory '32GB'
// Publish the outputs we create here into the workflow output directory
publishDir "$baseDir/output", mode: 'copy'
input:
file sam_file from samfiles
output:
file "${sam_file.name}.bam" into bamfiles
"""
module add picard/1.127
java -Xmx32G -jar \$PICARD/picard.jar SortSam \
INPUT="${sam_file}" \
OUTPUT="${sam_file.name}.bam" \
SORT_ORDER=coordinate \
VALIDATION_STRINGENCY=LENIENT \
CREATE_INDEX=true
"""
}
// macs
// Peak calling on a bam using MACS 1.4
//
// Input - a bam file, from the bamfiles channel
//
// Output - various wig and bed into baseDir/output
process macs14 {
// Publish the outputs we create here into the workflow output directory
publishDir "$baseDir/output", mode: 'copy'
input:
file bam_file from bamfiles
output:
file "${bam_file}_bwa_nomodel_MACS_wiggle"
file "${bam_file}_bwa_nomodel_peaks.bed"
file "${bam_file}_bwa_nomodel_peaks.xls"
file "${bam_file}_bwa_nomodel_summits.bed"
"""
module add macs/1.4.2
macs14 -t ${bam_file} \
--name ${bam_file}_bwa_nomodel \
--nomodel \
--wig \
--single-profile \
-f BAM
"""
}
\ No newline at end of file
/*
* Copyright (c) 2016. The University of Texas Southwestern Medical Center
*
* This file is part of the BioHPC Workflow Platform
*
* Example ChIP-Seq analysis script, demonstrating the BioHPC Workflow Platform
*
* @authors
* David Trudgian <David.Trudgian@UTSouthwestern.edu>
*
*/
// Path to an input file, or a pattern for multiple inputs
// Note - $baseDir is the location of this workflow file main.nf
params.fastq = "$baseDir/../test_data/*.fastq"
// Path to the BWA Index (.fa file) that we are using for the analysis
params.index = "/project/apps_database/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa"
// First, get the list of fastqs. When multiple files are selected on the web
// a glob pattern will be passed in
fastqs = Channel.fromPath( params.fastq )
// Now find the path to the BWA index directory
index_path = file(params.index).parent
// And get the name of the actual index inside that directory
index_name = file(params.index).name
// bwa_aln
// Run BWA aln on a fastq file, to produce sai output
//
// Input - fastq_file is taken from the fastq channel
// - BWA index at $index_path/$index_name
//
// Output - pair of fastq & generated sai file into the alignments channel
process bwa_aln {
// Tell Nextflow we will use 32 cpus here for BWA
cpus 32
input:
file fastq_file from fastqs
output:
set file(fastq_file), file("${fastq_file.name}.sai") into alignments
"""
module load BWA/0.7.5
bwa aln $index_path/$index_name -t 32 $fastq_file > "${fastq_file.name}.sai"
"""
}
// bwa_aln
// Run bwa samse to produce sam.gz from an sai alignment
//
// Input - pair of fastq file and corresponding sai file, from alignments channel
//
// Output - .sam.gz into the samfiles channel, and baseDir/output
process bwa_samse {
// bwa samse will use a single cpu core
cpus 1
// Publish the outputs we create here into the workflow output directory
publishDir "$baseDir/output", mode: 'copy'
input:
set file(fastq_file), file(sai_file) from alignments
output:
file "${fastq_file.name}.sam.gz" into samfiles
"""
module load BWA/0.7.5
bwa samse -r "@RG\tID:${fastq_file.name}\tLB:${fastq_file.name}\tSM:${fastq_file.name}\tPL:ILLUMINA" $index_path/$index_name\
$sai_file $fastq_file | gzip > "${fastq_file.name}.sam.gz"
"""
}
// sam2bam
// Convert SAM file to BAM file, sorting by co-ordinate and indexing
//
// Input - a sam file, (possibly gzipped) from the samfile channel
//
// Output - .sam.gz into the samfiles channel, and baseDir/output
process sam2bam {
// Tell Nextflow picard will only use one cpu.
// We are allocating 32GB to java though, so tell
// Nextflow so it can assign the task appropriately.
cpus 1
memory '32GB'
// Publish the outputs we create here into the workflow output directory
publishDir "$baseDir/output", mode: 'copy'
input:
file sam_file from samfiles
output:
file "${sam_file.name}.bam" into bamfiles
"""
module add picard/1.127
java -Xmx32G -jar \$PICARD/picard.jar SortSam \
INPUT="${sam_file}" \
OUTPUT="${sam_file.name}.bam" \
SORT_ORDER=coordinate \
VALIDATION_STRINGENCY=LENIENT \
CREATE_INDEX=true \
TMP_DIR="${workflow.workDir}/tmp"
"""
}
// macs
// Peak calling on a bam using MACS 1.4
//
// Input - a bam file, from the bamfiles channel
//
// Output - various wig and bed into baseDir/output
process macs14 {
// Publish the outputs we create here into the workflow output directory
publishDir "$baseDir/output", mode: 'copy'
input:
file bam_file from bamfiles
output:
file "${bam_file}_bwa_nomodel_MACS_wiggle"
file "${bam_file}_bwa_nomodel_peaks.bed"
file "${bam_file}_bwa_nomodel_peaks.xls"
file "${bam_file}_bwa_nomodel_summits.bed"
"""
module add macs/1.4.2
macs14 -t ${bam_file} \
--name ${bam_file}_bwa_nomodel \
--nomodel \
--wig \
--single-profile \
-f BAM
"""
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment