Skip to content
Snippets Groups Projects
Commit e6676204 authored by Venkat Malladi's avatar Venkat Malladi
Browse files

Merge branch '62-output.inputBagit' into 'develop'

Resolve "Add input bagit to ouput"

Closes #62

See merge request !38
parents 8a8ce097 a86785e3
2 merge requests!39v0.0.2,!38Resolve "Add input bagit to ouput"
Pipeline #7909 passed with stages
in 1 hour, 40 minutes, and 15 seconds
......@@ -65,8 +65,8 @@ getRef:
trimData:
stage: unit
script:
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --basename Q-Y5F6_1M.se ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz
- singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --length 35 --paired --basename Q-Y5F6_1M.pe ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz
- readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
- pytest -m trimData
......@@ -130,6 +130,13 @@ dataQC:
echo "tin.py -i ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.${i}.bam -r /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/bed/genome.bed; cat Q-Y5F6_1M.se.sorted.deduped.${i}.tin.xls | tr -s \"\\w\" \"\\t\" | grep -P \"\\t${i}\\t\";"; done | singularity run 'docker://bicf/rseqc3.0:2.0.1_indev' parallel -j 20 -k >> Q-Y5F6_1M.se.sorted.deduped.tin.xls
- pytest -m dataQC
outputBag:
stage: unit
script:
- mkdir test
- singularity run 'docker://bicf/gudmaprbkfilexfer:2.0.1_indev' bdbag test --archiver zip
- pytest -m outputBag
integration_se:
stage: integration
......@@ -181,5 +188,4 @@ consistency:
- assignedPE.txt
- assignedExpectSE.txt
- assignedExpectPE.txt
expire_in: 7 days
expire_in: 7 days
\ No newline at end of file
# v0.0.2 (in development)
**User Facing**
* Output:
* inputBag
* outputBag
**Background**
*Known Bugs*
* outputBag does not contain fetch for processed data
* Does not include automatic data upload
<hr>
......
......@@ -62,7 +62,8 @@ To run a set of replicates from study RID:
------------------------------------------
Run in repo root dir:
* `sh workflow/scripts/splitStudy.sh [studyRID]`
It will run in parallel in batches of 5 replicatesRID
It will run in parallel in batches of 25 replicatesRID with 30 second delays between launches.\
NOTE: Nextflow "local" processes for all replicates will run on the node/machine the bash script is launched from... consider running the study script on the BioHPC's SLURM cluster (use `sbatch`).
......@@ -115,4 +116,4 @@ Please cite in publications: Pipeline was developed by BICF from funding provide
Pipeline Directed Acyclic Graph
-------------------------------
![dag](docs/dag.png "DAG")
![dag](docs/dag.png "DAG")
\ No newline at end of file
docs/dag.png

665 KiB | W: | H:

docs/dag.png

673 KiB | W: | H:

docs/dag.png
docs/dag.png
docs/dag.png
docs/dag.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -80,4 +80,8 @@ process {
cpus = 2
memory = '1 GB'
}
withName: outputBag {
cpus = 1
memory = '1 GB'
}
}
......@@ -10,7 +10,7 @@ process {
executor = 'local'
}
withName: getData {
executor = 'local'
queue = 'super'
}
withName: parseMetadata {
executor = 'local'
......@@ -19,7 +19,7 @@ process {
queue = 'super'
}
withName: getRefInfer {
executor = 'local'
queue = 'super'
}
withName: downsampleData {
executor = 'local'
......@@ -31,7 +31,7 @@ process {
queue = 'super'
}
withName: getRef {
executor = 'local'
queue = 'super'
}
withName: alignData {
queue = '256GB,256GBv1'
......@@ -54,6 +54,9 @@ process {
withName: aggrQC {
executor = 'local'
}
withName: outputBag {
executor = 'local'
}
}
singularity {
......
......@@ -67,21 +67,24 @@ process {
withName: aggrQC {
container = 'bicf/multiqc1.8:2.0.1_indev'
}
withName:outputBag {
container = 'bicf/gudmaprbkfilexfer:2.0.1_indev'
}
}
trace {
enabled = true
enabled = false
file = 'pipeline_trace.txt'
fields = 'task_id,native_id,process,name,status,exit,submit,start,complete,duration,realtime,%cpu,%mem,rss'
}
timeline {
enabled = true
enabled = false
file = 'timeline.html'
}
report {
enabled = true
enabled = false
file = 'report.html'
}
......@@ -94,6 +97,6 @@ manifest {
homePage = 'https://git.biohpc.swmed.edu/gudmap_rbk/rna-seq'
description = 'This pipeline was created to be a standard mRNA-sequencing analysis pipeline which integrates with the GUDMAP and RBK consortium data-hub.'
mainScript = 'rna-seq.nf'
version = 'v0.0.1'
version = 'v0.0.2_indev'
nextflowVersion = '>=19.09.0'
}
......@@ -110,6 +110,7 @@ Development : ${params.dev}
*/
process getBag {
tag "${repRID}"
publishDir "${outDir}/inputBag", mode: 'copy', pattern: "Replicate_*.zip"
input:
path credential, stageAs: "credential.json" from deriva
......@@ -303,11 +304,11 @@ process trimData {
echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log
if [ "${ends}" == "se" ]
then
trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]}
trim_galore --gzip -q 25 --length 35 --basename ${repRID} ${fastq[0]}
readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
elif [ "${ends}" == "pe" ]
then
trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]}
trim_galore --gzip -q 25 --length 35 --paired --basename ${repRID} ${fastq[0]} ${fastq[1]}
readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}')
fi
echo -e "LOG: trimmed" >> ${repRID}.trimData.log
......@@ -834,7 +835,7 @@ process makeBigWig {
*/
process countData {
tag "${repRID}"
publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*.countTable.csv"
publishDir "${outDir}/count", mode: 'copy', pattern: "${repRID}*.tpmTable.csv"
input:
path script_calculateTPM
......@@ -1057,4 +1058,28 @@ process aggrQC {
multiqc -c ${multiqcConfig} . -n ${repRID}.multiqc.html
cp ${repRID}.multiqc_data/multiqc_data.json ${repRID}.multiqc_data.json
"""
}
\ No newline at end of file
}
/*
*ouputBag: create ouputBag
*/
process outputBag {
tag "${repRID}"
publishDir "${outDir}/outputBag", mode: 'copy', pattern: "Replicate_${repRID}.outputBag.zip"
input:
path multiqc
path multiqcJSON
output:
path ("Replicate_*.zip") into outputBag
script:
"""
mkdir Replicate_${repRID}.outputBag
cp ${multiqc} Replicate_${repRID}.outputBag
cp ${multiqcJSON} Replicate_${repRID}.outputBag
bdbag Replicate_${repRID}.outputBag --archiver zip
"""
}
#!/bin/bash
#SBATCH -p super
#SBATCH --job-name GUDMAP-RBK_Study
#SBATCH -t 7-0:0:0
# query GUDMAP/RBK for study RID
echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Replicate/Study_RID="${1}"'" | bash > $1_studyRID.json
......@@ -10,7 +14,7 @@ python3 ./workflow/scripts/splitStudy.py -s $1
# run pipeline on replicate RIDs in parallel
module load nextflow/20.01.0
module load singularity/3.5.3
while read repRID; do echo ${repRID}; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow run workflow/rna-seq.nf --repRID {}
while read repRID; do echo ${repRID}; sleep 15; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow -q run workflow/rna-seq.nf --repRID {}
# cleanup study RID files
rm $1_studyRID.json
......
#!/usr/bin/env python3
import pytest
import pandas as pd
from io import StringIO
import os
test_output_path = os.path.dirname(os.path.abspath(__file__)) + \
'/../../'
@pytest.mark.outputBag
def test_outputBag():
assert os.path.exists(os.path.join(test_output_path, 'test.zip'))
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment