Skip to content
Snippets Groups Projects
Commit eac0a7c5 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '9-fastqc' into 'develop'

Resolve "process_fastqc"

Closes #9

See merge request !18
parents a30baa9c 645546fa
2 merge requests!37v0.0.1,!18Resolve "process_fastqc"
Pipeline #6039 failed with stages
in 8 minutes and 39 seconds
...@@ -67,6 +67,12 @@ dedupData: ...@@ -67,6 +67,12 @@ dedupData:
- singularity exec 'docker://bicf/picard2.21.7:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true - singularity exec 'docker://bicf/picard2.21.7:2.0.0' java -jar /picard/build/libs/picard.jar MarkDuplicates I=./test_data/bam/small/Q-Y5JA_1M.se.sorted.bam O=Q-Y5JA_1M.se.deduped.bam M=Q-Y5JA_1M.se.deduped.Metrics.txt REMOVE_DUPLICATES=true
- pytest -m dedupData - pytest -m dedupData
fastqc:
stage: unit
script:
- singularity run 'docker://bicf/fastqc:2.0.0' ./test_data/fastq/small/Q-Y5JA_1M.R1.fastq.gz -o .
- pytest -m fastqc
integration_se: integration_se:
stage: integration stage: integration
script: script:
......
...@@ -11,7 +11,7 @@ process { ...@@ -11,7 +11,7 @@ process {
executor = 'awsbatch' executor = 'awsbatch'
queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' queue = 'highpriority-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
cpus = 1 cpus = 1
memory = '1 GB' memory = '2 GB'
withName:parseMetadata { withName:parseMetadata {
cpus = 5 cpus = 5
...@@ -25,10 +25,13 @@ process { ...@@ -25,10 +25,13 @@ process {
} }
withName:alignData { withName:alignData {
cpus = 50 cpus = 50
memory = '10 GB' memory = '5 GB'
} }
withName:dedupData { withName:dedupData {
cpus = 2 cpus = 2
memory = '20 GB' memory = '20 GB'
} }
withName:fastqc {
memory = '5 GB'
}
} }
\ No newline at end of file
...@@ -11,7 +11,7 @@ process { ...@@ -11,7 +11,7 @@ process {
executor = 'awsbatch' executor = 'awsbatch'
queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc' queue = 'default-3278a8b0-1fc8-11ea-b1ac-021e2396e2cc'
cpus = 1 cpus = 1
memory = '1 GB' memory = '2 GB'
withName:parseMetadata { withName:parseMetadata {
cpus = 5 cpus = 5
...@@ -25,10 +25,13 @@ process { ...@@ -25,10 +25,13 @@ process {
} }
withName:alignData { withName:alignData {
cpus = 50 cpus = 50
memory = '10 GB' memory = '5 GB'
} }
withName:dedupData { withName:dedupData {
cpus = 2 cpus = 2
memory = '20 GB' memory = '20 GB'
} }
withName:fastq {
memory = '5 GB'
}
} }
...@@ -24,6 +24,9 @@ process { ...@@ -24,6 +24,9 @@ process {
withName: dedupData { withName: dedupData {
queue = 'super' queue = 'super'
} }
withName: fastqc {
queue = 'super'
}
} }
singularity { singularity {
......
...@@ -32,6 +32,9 @@ process { ...@@ -32,6 +32,9 @@ process {
withName: dedupData { withName: dedupData {
container = 'bicf/picard2.21.7:2.0.0' container = 'bicf/picard2.21.7:2.0.0'
} }
withName: fastqc {
container = 'bicf/fastqc:2.0.0'
}
} }
trace { trace {
......
...@@ -105,6 +105,12 @@ process getData { ...@@ -105,6 +105,12 @@ process getData {
""" """
} }
// Split fastq's
fastqs.into {
fastqs_trimData
fastqs_fastqc
}
/* /*
* parseMetadata: parses metadata to extract experiment parameters * parseMetadata: parses metadata to extract experiment parameters
*/ */
...@@ -239,7 +245,7 @@ process trimData { ...@@ -239,7 +245,7 @@ process trimData {
input: input:
val endsManual_trimData val endsManual_trimData
path (fastq) from fastqs path (fastq) from fastqs_trimData
output: output:
path ("*.fq.gz") into fastqs_trimmed path ("*.fq.gz") into fastqs_trimmed
...@@ -303,7 +309,7 @@ process alignData { ...@@ -303,7 +309,7 @@ process alignData {
} }
/* /*
*dedupReads: mark the duplicate reads, specifically focused on PCR or optical duplicates *dedupData: mark the duplicate reads, specifically focused on PCR or optical duplicates
*/ */
process dedupData { process dedupData {
tag "${repRID}" tag "${repRID}"
...@@ -323,7 +329,31 @@ process dedupData { ...@@ -323,7 +329,31 @@ process dedupData {
hostname >${repRID}.dedup.err hostname >${repRID}.dedup.err
ulimit -a >>${repRID}.dedup.err ulimit -a >>${repRID}.dedup.err
#Remove duplicated reads # remove duplicated reads
java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err java -jar /picard/build/libs/picard.jar MarkDuplicates I=${rawBam} O=${repRID}.deduped.bam M=${repRID}.deduped.Metrics.txt REMOVE_DUPLICATES=true 1>>${repRID}.dedup.out 2>> ${repRID}.dedup.err
""" """
}
/*
*fastqc: run fastqc on untrimmed fastq's
*/
process fastqc {
tag "${repRID}"
publishDir "${outDir}/fastqc", mode: 'copy', pattern: "*_fastqc.zip"
publishDir "${logsDir}", mode: 'copy', pattern: "*.fastq.err"
input:
path (fastq) from fastqs_fastqc
output:
path ("*_fastqc.zip") into fastqc
script:
"""
hostname >${repRID}.fastqc.err
ulimit -a >>${repRID}.fastqc.err
# run fastqc
fastqc *.fastq.gz -o . >>${repRID}.fastqc.err
"""
} }
\ No newline at end of file
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../'
@pytest.mark.fastqc
def test_fastqc():
assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.R1_fastqc.zip'))
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment