From c17b8a62e4275d1c5547b79468d490d6c0b2fb3a Mon Sep 17 00:00:00 2001
From: "Gervaise H. Henry" <gervaise.henry@utsouthwestern.edu>
Date: Wed, 22 Apr 2020 13:45:10 -0500
Subject: [PATCH] Add unit CI test for (new) inferMetadata

---
 .gitlab-ci.yml                       | 33 ++++++++++++++++++++++------
 workflow/rna-seq.nf                  |  2 +-
 workflow/tests/test_dataQC.py        | 14 ++++++++++++
 workflow/tests/test_inferMetadata.py |  2 +-
 4 files changed, 42 insertions(+), 9 deletions(-)
 create mode 100644 workflow/tests/test_dataQC.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a4417f8..ae5a8ed 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -35,6 +35,24 @@ parseMetadata:
   - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded
   - singularity run 'docker://bicf/python3:1.3' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species
 
+inferMetadata:
+  stage: unit
+  script:
+  - >
+    align=$(echo $(grep "Overall alignment rate" ./test_data/meta/Q-Y5JA_1M.se.alignSummary.txt | cut -f2 -d ':' | cut -f2 -d ' ' | tr -d '%'))
+    if [[ ${align} == "" ]]
+    then
+      exit 1
+    fi
+  - >
+    singularity run 'docker://bicf/rseqc3.0:2.0.0' infer_experiment.py -r "/project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed" -i "./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam" 1>> Q-Y5JA_1M.se.inferMetadata.log
+    ended=`bash inferMeta.sh endness Q-Y5JA_1M.se.inferMetadata.log
+    if [[ ${ended} == "" ]]
+    then
+      exit 1
+    fi
+  - pytest -m inferMetadata
+
 getRef:
   stage: unit
   script:
@@ -73,20 +91,20 @@ dedupData:
       echo "samtools view -b Q-Y5JA_1M.se.sorted.deduped.bam ${i} > Q-Y5JA_1M.se.sorted.deduped.${i}.bam; samtools index -@ 20 -b Q-Y5JA_1M.se.sorted.deduped.${i}.bam Q-Y5JA_1M.se.sorted.deduped.${i}.bam.bai;";
       done | singularity run 'docker://bicf/gudmaprbkdedup:2.0.0' parallel -j 20 -k
   - pytest -m dedupData
-  
-makeBigWig:
-  stage: unit
-  script:
-  - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -o Q-Y5JA_1M.se.bw
-  - pytest -m makeBigWig
 
-makeFeatureCounts:
+  countData:
   stage: unit
   script:
   - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -R SAM -p -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -T 20 -s 1 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -o Q-Y5JA_1M.se.featureCounts -g 'gene_name' --primary --ignoreDup -B ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam 
   - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count Q-Y5JA_1M.se.featureCounts
   - pytest -m makeFeatureCounts
 
+makeBigWig:
+  stage: unit
+  script:
+  - singularity run 'docker://bicf/deeptools3.3:2.0.0' bamCoverage -p 20 -b ./test_data/bam/small/Q-Y5JA_1M.se.sorted.deduped.bam -o Q-Y5JA_1M.se.bw
+  - pytest -m makeBigWig
+
 fastqc:
   stage: unit
   script:
@@ -106,6 +124,7 @@ downsampleData:
   - singularity exec 'docker://bicf/seqtk:2.0.0' seqtk sample -s100 ./test_data/fastq/small/Q-Y5JA_1M_trimmed.fq.gz 1000 1> sampled.1.fq
   - pytest -m downsampleData
 
+
 integration_se:
   stage: integration
   script:
diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf
index 9412658..03816c1 100644
--- a/workflow/rna-seq.nf
+++ b/workflow/rna-seq.nf
@@ -437,7 +437,7 @@ process inferMetadata {
 
     # infer experimental setting from dedup bam
     echo "LOG: infer experimental setting from dedup bam" >> ${repRID}.inferMetadata.err
-    infer_experiment.py -r "\${bed}" -i "\${bam}" > ${repRID}.inferMetadata.log 2>> ${repRID}.inferMetadata.err
+    infer_experiment.py -r "\${bed}" -i "\${bam}" 1>> ${repRID}.inferMetadata.log 2>> ${repRID}.inferMetadata.err
 
     echo "LOG: determining endedness and strandedness from file" >> ${repRID}.inferMetadata.err
     ended=`bash inferMeta.sh endness ${repRID}.inferMetadata.log` 1>> ${repRID}.inferMetadata.out 2>> ${repRID}.inferMetadata.err
diff --git a/workflow/tests/test_dataQC.py b/workflow/tests/test_dataQC.py
new file mode 100644
index 0000000..95eb332
--- /dev/null
+++ b/workflow/tests/test_dataQC.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+import pytest
+import pandas as pd
+from io import StringIO
+import os
+
+test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
+                '/../../'
+
+@pytest.mark.dataQC
+def test_dataQC():
+    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.sorted.deduped.tin.xls'))
+
diff --git a/workflow/tests/test_inferMetadata.py b/workflow/tests/test_inferMetadata.py
index 44ffbc3..f2908be 100644
--- a/workflow/tests/test_inferMetadata.py
+++ b/workflow/tests/test_inferMetadata.py
@@ -10,5 +10,5 @@ test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
 
 @pytest.mark.inferMetadata
 def test_inferMetadata():
-    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.sorted.deduped.tin.xls'))
+    assert os.path.exists(os.path.join(test_output_path, 'Q-Y5JA_1M.se.inferMetadata.log'))
 
-- 
GitLab