diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b3749cd9b15f1b7ea5f54f41601a827ec2268d7..6ce68e8016756cb4bea4c6652b7514f292b3e312 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ * Upload mRNA QC * Create and upload output bag * Add optional to not upload +* Update references to use bags +* Update to newer references (GRCh38.p13.v36 and GRCm38.p6.vM25) +* Use production server for data-hub reference call +* Stop pipeline if submitted does not match infered +* Update execution run with "Success" or "Error" **Background** * Remove (comment out) option to pull references from S3 @@ -17,10 +22,12 @@ * Changed order of steps so that fastqc is done after the trim step * Change docker images to production * Add automated version badges +* Only calculate/report tin values on regular chromosomes (from gtf) *Known Bugs* * Datahub reference pull uses dev.gudmap.org as source until referencencs are placed on production * Override params (inputBag, fastq, species) aren't checked for integrity +* <hr> diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 5c548b28b8c92e5a3cb4317621ee7cfb448211c0..567c7204c4f6bd7029156efe5dc44f5b36f8bb2c 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -1248,7 +1248,7 @@ process dedupData { samtools index -@ `nproc` -b ${repRID}_sorted.deduped.bam ${repRID}_sorted.deduped.bam.bai # split the deduped BAM file for multi-threaded tin calculation - for i in `samtools view ${repRID}_sorted.deduped.bam | cut -f3 | sort | uniq`; + for i in `samtools view ${repRID}_sorted.deduped.bam | grep -o chr.* | cut -f3 | sort | uniq`; do echo "echo \"LOG: splitting each chromosome into its own BAM and BAI files with Samtools\"; samtools view -b ${repRID}_sorted.deduped.bam \${i} 1>> ${repRID}_sorted.deduped.\${i}.bam; samtools index -@ `nproc` -b ${repRID}_sorted.deduped.\${i}.bam ${repRID}_sorted.deduped.\${i}.bam.bai" done | parallel -j `nproc` -k