Skip to content
Snippets Groups Projects
Commit 2b89cc74 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add align sampled data

parent 0f30eb62
2 merge requests!37v0.0.1,!28Resolve "Move inference to start of pipeline"
Pipeline #6500 canceled with stages
in 1 hour, 19 minutes, and 1 second
......@@ -12,6 +12,9 @@ process {
withName:parseMetadata {
executor = 'local'
}
withName: getRefInfer {
executor = 'local'
}
withName:getRef {
executor = 'local'
}
......@@ -21,6 +24,9 @@ process {
withName:downsampleData {
executor = 'local'
}
withName:alignSampleData {
queue = 'super'
}
withName:alignData {
queue = '256GB,256GBv1'
}
......
......@@ -20,6 +20,9 @@ process {
withName: parseMetadata {
container = 'bicf/python3:1.3'
}
withName: getRefInfer {
container = 'bicf/awscli:1.1'
}
withName: getRef {
container = 'bicf/awscli:1.1'
}
......@@ -29,6 +32,9 @@ process {
withName: downsampleData {
container = 'bicf/seqtk:2.0.0'
}
withName: alignSampleData {
container = 'bicf/gudmaprbkaligner:2.0.0'
}
withName: alignData {
container = 'bicf/gudmaprbkaligner:2.0.0'
}
......
......@@ -118,7 +118,7 @@ process getData {
"""
}
// Replicate raw fastqs for multiple process inputs
// Replicate raw fastq's for multiple process inputs
fastqs.into {
fastqs_downsampleData
fastqs_trimData
......@@ -195,6 +195,7 @@ metadata.splitCsv(sep: ",", header: false).separate(
// Replicate metadata for multiple process inputs
endsManual.into {
endsManual_downsampleData
endsManual_alignSampleData
endsManual_trimData
endsManual_alignData
endsManual_featureCounts
......@@ -221,7 +222,7 @@ process getRefInfer {
val referenceInfer
output:
tuple val ("${referenceInfer}"), path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf") into refInfer
tuple val (referenceInfer), path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf") into refInfer
path ("${repRID}.getRefInfer.{out,err}")
script:
......@@ -359,7 +360,7 @@ process trimData {
hostname > ${repRID}.trimData.err
ulimit -a >> ${repRID}.trimData.err
#Trim fastqs using trim_galore
#Trim fastq's using trim_galore
if [ "${endsManual_trimData}" == "se" ]
then
echo "LOG: running trim_galore using single-end settings" >> ${repRID}.trimData.err
......@@ -372,7 +373,7 @@ process trimData {
"""
}
// Replicate trimmed fastqs
// Replicate trimmed fastq's
fastqsTrim.into {
fastqsTrim_downsampleData
fastqsTrim_alignData
......@@ -390,7 +391,8 @@ process downsampleData {
path fastq from fastqsTrim_downsampleData
output:
path ("sampled.{1,2}.fq") into fastqsSample
path ("sampled.1.fq") into fastqs1Sample
path ("sampled.2.fq") optional true into fastqs2Sample
path ("${repRID}.downsampleData.{out,err}")
script:
......@@ -413,6 +415,54 @@ process downsampleData {
"""
}
// Replicate the dowsampled fastq's and attatched to the references
inferInput = endsManual_alignSampleData.combine(refInfer.combine(fastqs1Sample.collect().combine(fastqs2Sample.collect())))
/*
* alignSampleData: aligns the downsampled reads to a reference database
*/
process alignSampleData {
tag "${ref}"
publishDir "${logsDir}", mode: "copy", pattern: "${repRID}.alignSampleData.{out,err}"
input:
tuple val (ends), val (ref), path (hisat2), path (bed), path (fna), path (gtf), path (fastq1), path (fastq2) from inferInput
output:
tuple val (ref), path ("sampled.sorted.bam"), path ("sampled.sorted.bam.bai"), path (bed) into sampleBam
path ("*.alignSampleSummary.txt") into alignSampleQC
path ("${repRID}.alignSampleData.{out,err}")
script:
"""
hostname > ${repRID}.alignSampleData.err
ulimit -a >> ${repRID}.alignSampleData.err
#Align the reads with Hisat 2
if [ "${ends}" == "se" ]
then
echo "LOG: running Hisat2 with single-end settings" >> ${repRID}.align.err
hisat2 -p `nproc` --add-chrname -S sampled.sam -x hisat2/genome -U ${fastq1} --summary-file ${repRID}.alignSampleSummary.txt --new-summary 1>> ${repRID}.alignSampleData.out 2>> ${repRID}.alignSampleData.err
elif [ "${ends}" == "pe" ]
then
echo "LOG: running Hisat2 with paired-end settings" >> ${repRID}.align.err
hisat2 -p `nproc` --add-chrname -S sampled.sam -x hisat2/genome --no-mixed --no-discordant -1 ${fastq1} -2 ${fastq2} --summary-file ${repRID}.alignSampleSummary.txt --new-summary 1>> ${repRID}.alignSampleData.out 2>> ${repRID}.alignSampleData.err
fi
#Convert the output sam file to a sorted bam file using Samtools
echo "LOG: converting from sam to bam" >> ${repRID}.alignSampleData.err
samtools view -1 -@ `nproc` -F 4 -F 8 -F 256 -o sampled.bam sampled.sam 1>> ${repRID}.alignSampleData.out 2>> ${repRID}.alignSampleData.err;
#Sort the bam file using Samtools
echo "LOG: sorting the bam file" >> ${repRID}.alignSampleData.err
samtools sort -@ `nproc` -O BAM -o sampled.sorted.bam sampled.bam 1>> ${repRID}.alignSampleData.out 2>> ${repRID}.alignSampleData.err;
#Index the sorted bam using Samtools
echo "LOG: indexing sorted bam file" >> ${repRID}.alignSampleData.err
samtools index -@ `nproc` -b sampled.sorted.bam sampled.sorted.bam.bai 1>> ${repRID}.alignSampleData.out 2>> ${repRID}.alignSampleData.err;
"""
}
/*
* alignData: aligns the reads to a reference database
*/
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment