Skip to content
Snippets Groups Projects
Commit 10cbf01b authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Update consistency checks to be fully in python.

parent c114a629
2 merge requests!58Develop,!46Update consistency checks to be fully in python.
Pipeline #8260 failed with stages
in 5 hours, 44 minutes, and 59 seconds
......@@ -130,7 +130,7 @@ dedupData:
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools sort -@ 20 -O BAM -o Q-Y5F6_1M.se.sorted.deduped.bam ./test_data/bam/small/Q-Y5F6_1M.se.deduped.bam
- singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' samtools index -@ 20 -b ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam Q-Y5F6_1M.se.sorted.deduped.bam.bai
- >
for i in {"chr8","chr4","chrY"}; do
for i in {"chr8","chr4","chrY"}; do
echo "samtools view -b Q-Y5F6_1M.se.sorted.deduped.bam ${i} > Q-Y5F6_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5F6_1M.se.sorted.deduped.${i}.bam Q-Y5F6_1M.se.sorted.deduped.${i}.bam.bai;";
done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
- pytest -m dedupData
......@@ -145,7 +145,7 @@ countData:
script:
- ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/geneID.tsv
- ln -s /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/Entrez.tsv
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
- singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o Q-Y5F6_1M.se.countData -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.countData
- singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/convertGeneSymbols.R --repRID Q-Y5F6_1M.se
- assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)')
......@@ -366,7 +366,7 @@ override_fastq:
max: 1
when:
- always
override_species:
stage: integration
only: [merge_requests]
......@@ -388,7 +388,7 @@ override_species:
max: 1
when:
- always
consistency:
stage: consistency
......@@ -397,10 +397,6 @@ consistency:
variables:
- $CI_MERGE_REQUEST_TARGET_BRANCH_NAME =~ /master/
script:
- grep -m 1 \"Assigned\":.[0-9] SE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedSE.txt
- grep -m 1 \"Assigned\":.[0-9] PE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedPE.txt
- echo 7742416 > assignedExpectSE.txt
- echo 2599140 > assignedExpectPE.txt
- pytest -m consistencySE
- pytest -m consistencyPE
artifacts:
......@@ -409,8 +405,4 @@ consistency:
paths:
- SE_multiqc_data.json
- PE_multiqc_data.json
- assignedSE.txt
- assignedPE.txt
- assignedExpectSE.txt
- assignedExpectPE.txt
expire_in: 7 days
\ No newline at end of file
expire_in: 7 days
......@@ -4,6 +4,7 @@ import pytest
import pandas as pd
from io import StringIO
import os
import json
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../'
......@@ -13,23 +14,21 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
def test_consistencySE():
assert os.path.exists(os.path.join(
test_output_path, 'SE_multiqc_data.json'))
assert readAssigned("assignedSE.txt", "assignedExpectSE.txt")
with open(os.path.join(
test_output_path, 'SE_multiqc_data.json')) as f:
assigned_reads_json = json.load(f)
assigned_reads = assigned_reads_json['report_general_stats_data'][0]['16-1ZX4']['Assigned']
assert assigned_reads == 7742416
@pytest.mark.consistencyPE
def test_consistencyPE():
assert os.path.exists(os.path.join(
test_output_path, 'PE_multiqc_data.json'))
assert readAssigned("assignedPE.txt", "assignedExpectPE.txt")
def readAssigned(fileAssigned, fileExpectAssigned):
data = False
assigned = open(fileAssigned, "r")
expect = open(fileExpectAssigned, "r")
lineAssigned = assigned.readline()
lineExpect = expect.readline()
if int(lineAssigned.strip()) < (int(lineExpect.strip())+(int(lineExpect.strip())*0.00001)) and int(lineAssigned.strip()) > (int(lineExpect.strip())-(int(lineExpect.strip())*0.00001)):
data = True
return data
with open(os.path.join(
test_output_path, 'PE_multiqc_data.json')) as f:
assigned_reads_json = json.load(f)
assigned_reads = assigned_reads_json['report_general_stats_data'][0]['Q-Y5JA']['Assigned']
assert assigned_reads == 2599140
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment