Skip to content
Snippets Groups Projects
Commit 11a742a3 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Add new metadata extraction to ci

parent 788a539f
Branches
Tags
2 merge requests!37v0.0.1,!36Metadata output update
Pipeline #7789 failed with stages
in 1 minute and 55 seconds
...@@ -67,6 +67,8 @@ trimData: ...@@ -67,6 +67,8 @@ trimData:
script: script:
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- pytest -m trimData - pytest -m trimData
downsampleData: downsampleData:
...@@ -105,6 +107,7 @@ countData: ...@@ -105,6 +107,7 @@ countData:
script: script:
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.featureCounts - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.featureCounts
- assignedReads=grep -m 1 'Assigned' *.countData.summary | grep -oe '\([0-9.]*\)'
- pytest -m makeFeatureCounts - pytest -m makeFeatureCounts
makeBigWig: makeBigWig:
......
...@@ -298,7 +298,7 @@ process trimData { ...@@ -298,7 +298,7 @@ process trimData {
hostname > ${repRID}.trimData.log hostname > ${repRID}.trimData.log
ulimit -a >> ${repRID}.trimData.log ulimit -a >> ${repRID}.trimData.log
# trim fastq's using trim_galore # trim fastq's using trim_galore and extract median read length
echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
if [ "${ends}" == "se" ] if [ "${ends}" == "se" ]
then then
...@@ -875,7 +875,8 @@ process countData { ...@@ -875,7 +875,8 @@ process countData {
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
fi fi
echo -e "LOG: counted" >> ${repRID}.countData.log echo -e "LOG: counted" >> ${repRID}.countData.log
# extract assigned reads
assignedReads=grep -m 1 'Assigned' *.countData.summary | grep -oe '\([0-9.]*\)' assignedReads=grep -m 1 'Assigned' *.countData.summary | grep -oe '\([0-9.]*\)'
echo -e \${assignedReads} > assignedReads.csv echo -e \${assignedReads} > assignedReads.csv
echo -e "LOG: assigned reads: "\${assignedReads} >> ${repRID}.countData.log echo -e "LOG: assigned reads: "\${assignedReads} >> ${repRID}.countData.log
...@@ -887,6 +888,12 @@ process countData { ...@@ -887,6 +888,12 @@ process countData {
""" """
} }
// Extract number of assigned reads metadata into channel
assignedReadsInfer = Channel.create()
inferMetadata_assignedReads.splitCsv(sep: ",", header: false).separate(
assignedReads
)
/* /*
*fastqc: run fastqc on untrimmed fastq's *fastqc: run fastqc on untrimmed fastq's
*/ */
...@@ -911,12 +918,6 @@ process fastqc { ...@@ -911,12 +918,6 @@ process fastqc {
""" """
} }
// Extract number of assigned reads metadata into channel
assignedReadsInfer = Channel.create()
inferMetadata_assignedReads.splitCsv(sep: ",", header: false).separate(
assignedReads
)
/* /*
*dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates
*/ */
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment