diff --git a/.gitignore b/.gitignore index f500ef73462d50e596722c6cbca6e15bdfe0a741..12288788210fa386427657fa55ab47b9ac14a6aa 100644 --- a/.gitignore +++ b/.gitignore @@ -297,6 +297,8 @@ timeline*.html* *.tmp *.swp *.out +*_studyRID.json +*_studyRID.csv run*.sh !.gitkeep diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b36504f9ea56479147ffeb414ae758b948736fa6..dc2eab106b9fff3a2d3a00dc5bde49c81046f5b2 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,7 +38,8 @@ parseMetadata: - stranded=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p stranded) - spike=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p spike) - species=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.csv" -p species) - - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${exp},${study},${rep}" > design.csv + - readLength=$(singularity run 'docker://bicf/python3:2.0.1_indev' python3 ./workflow/scripts/parseMeta.py -r Replicate_RID -m "./test_data/meta/metaTest.stageNew.csv" -p readLength) + - echo -e "${endsMeta},${endsManual},${stranded},${spike},${species},${readLength},${exp},${study},${rep}" > design.csv - pytest -m parseMetadata inferMetadata: @@ -66,6 +67,8 @@ trimData: script: - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --basename Q-Y5F6_1M.se -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz - singularity run 'docker://bicf/trimgalore:1.1' trim_galore --gzip -q 25 --illumina --length 35 --paired --basename Q-Y5F6_1M.pe -j 20 ./test_data/fastq/small/Q-Y5F6_1M.R1.fastq.gz ./test_data/fastq/small/Q-Y5F6_1M.R2.fastq.gz + - readLengthSE=$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') + - readLengthPE=$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length($1)}' | sort -n | awk '{a[NR]=$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') - pytest -m trimData downsampleData: @@ -104,6 +107,7 @@ countData: script: - singularity run 'docker://bicf/subread2:2.0.0' featureCounts -T 20 -a /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.gtf -G /project/BICF/BICF_Core/shared/gudmap/references/GRCh38.p12.v31/genome.fna -g 'gene_name' -o Q-Y5F6_1M.se.featureCounts -s 1 -R SAM --primary --ignoreDup ./test_data/bam/small/Q-Y5F6_1M.se.sorted.deduped.bam - singularity run 'docker://bicf/subread2:2.0.0' Rscript ./workflow/scripts/calculateTPM.R --count ./test_data/counts/small/Q-Y5F6_1M.se.featureCounts + - assignedReads=$(grep -m 1 'Assigned' *.summary | grep -oe '\([0-9.]*\)') - pytest -m makeFeatureCounts makeBigWig: @@ -164,7 +168,7 @@ consistency: - grep -m 1 \"Assigned\":.[0-9] SE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedSE.txt - grep -m 1 \"Assigned\":.[0-9] PE_multiqc_data.json | grep -oe '\([0-9.]*\)' > assignedPE.txt - echo 7742416 > assignedExpectSE.txt - - echo 2599149 > assignedExpectPE.txt + - echo 2599140 > assignedExpectPE.txt - pytest -m consistencySE - pytest -m consistencyPE artifacts: diff --git a/README.md b/README.md index bc12d2c3a91db96756ad941444c4b67797afda59..23b24d840c661ec6f8c1694629b9024bf9b07caa 100644 --- a/README.md +++ b/README.md @@ -58,11 +58,17 @@ FULL EXAMPLE: nextflow run workflow/rna-seq.nf --deriva ./data/credential.json --bdbag ./data/cookies.txt --repRID Q-Y5JA ``` +To run a set of replicates from study RID: +------------------------------------------ +Run in repo root dir: +* `sh workflow/scripts/splitStudy.sh [studyRID]` +It will run in parallel in batches of 5 replicatesRID -[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/blob/develop/CHANGELOG.md) ---- +<hr> +[**CHANGELOG**](https://git.biohpc.swmed.edu/BICF/gudmap_rbk/rna-seq/blob/develop/CHANGELOG.md) +<hr> Credits ======= diff --git a/cleanup.sh b/cleanup.sh index 9569ff54fd71cd94bddde415af03a101820ab514..aa289201c531fa4f4667a04f80fd015d2200e40c 100644 --- a/cleanup.sh +++ b/cleanup.sh @@ -5,3 +5,5 @@ rm timeline*.html* rm .nextflow*.log* rm -r .nextflow/ rm -r work/ +rm *_studyRID.json +rm *_studyRID.csv diff --git a/docs/dag.png b/docs/dag.png index fcedac6aed613fa3a90575157459b81394f223ca..8c4896f2f0f6d2c765d6b020cddb4cda23064c97 100644 Binary files a/docs/dag.png and b/docs/dag.png differ diff --git a/workflow/conf/multiqc_config.yaml b/workflow/conf/multiqc_config.yaml index 87ce3ba5492d9cb6fb649413a1227c0bd5242883..db6a335a8e5e6f8677dd8c433d391c6fd5224751 100644 --- a/workflow/conf/multiqc_config.yaml +++ b/workflow/conf/multiqc_config.yaml @@ -70,16 +70,21 @@ custom_data: meta: file_format: 'tsv' section_name: 'Metadata' - description: 'This is the comparison of infered metadata and submitter provided' + description: 'This is the comparison of infered metadata, submitter provided, and calculated' plot_type: 'table' pconfig: id: 'meta' + format: '{:,.0f}' headers: Source Species Ends Stranded Spike-in + Raw Reads + Assigned Reads + Median Read Length + Median TIN tin: file_format: 'tsv' section_name: 'TIN' diff --git a/workflow/rna-seq.nf b/workflow/rna-seq.nf index 2e1e2b2c5d75cdb95ca856f1f5807a187c046deb..772d076174400bca13110869a797f87617ff5c89 100644 --- a/workflow/rna-seq.nf +++ b/workflow/rna-seq.nf @@ -53,6 +53,7 @@ script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh") script_parseMeta = Channel.fromPath("${baseDir}/scripts/parseMeta.py") script_inferMeta = Channel.fromPath("${baseDir}/scripts/inferMeta.sh") script_calculateTPM = Channel.fromPath("${baseDir}/scripts/calculateTPM.R") +script_convertGeneSymbols = Channel.fromPath("${baseDir}/scripts/convertGeneSymbols.R") script_tinHist = Channel.fromPath("${baseDir}/scripts/tinHist.py") /* @@ -237,8 +238,16 @@ process parseMetadata { species=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experiment}" -p species) echo -e "LOG: species metadata parsed: \${species}" >> ${repRID}.parseMetadata.log - # gave design file - echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${exp},\${study}" > design.csv + # get read length metadata + readLength=\$(python3 ${script_parseMeta} -r ${repRID} -m "${experimentSettings}" -p readLength) + if [ "\${readLength}" = "nan"] + then + readLength="NA" + fi + echo -e "LOG: read length metadata parsed: \${readLength}" >> ${repRID}.parseMetadata.log + + # save design file + echo -e "\${endsMeta},\${endsManual},\${stranded},\${spike},\${species},\${readLength},\${exp},\${study}" > design.csv """ } @@ -248,6 +257,7 @@ endsManual = Channel.create() strandedMeta = Channel.create() spikeMeta = Channel.create() speciesMeta = Channel.create() +readLengthMeta = Channel.create() expRID = Channel.create() studyRID = Channel.create() metadata.splitCsv(sep: ",", header: false).separate( @@ -256,6 +266,7 @@ metadata.splitCsv(sep: ",", header: false).separate( strandedMeta, spikeMeta, speciesMeta, + readLengthMeta, expRID, studyRID ) @@ -281,25 +292,38 @@ process trimData { output: path ("*.fq.gz") into fastqsTrim path ("*_trimming_report.txt") into trimQC + path ("readLength.csv") into inferMetadata_readLength script: """ hostname > ${repRID}.trimData.log ulimit -a >> ${repRID}.trimData.log - # trim fastq's using trim_galore + # trim fastq's using trim_galore and extract median read length echo -e "LOG: trimming ${ends}" >> ${repRID}.trimData.log if [ "${ends}" == "se" ] then trim_galore --gzip -q 25 --illumina --length 35 --basename ${repRID} -j `nproc` ${fastq[0]} + readLength=\$(zcat *_trimmed.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') elif [ "${ends}" == "pe" ] then trim_galore --gzip -q 25 --illumina --length 35 --paired --basename ${repRID} -j `nproc` ${fastq[0]} ${fastq[1]} + readLength=\$(zcat *_1.fq.gz | awk '{if(NR%4==2) print length(\$1)}' | sort -n | awk '{a[NR]=\$0}END{print(NR%2==1)?a[int(NR/2)+1]:(a[NR/2]+a[NR/2+1])/2}') fi echo -e "LOG: trimmed" >> ${repRID}.trimData.log + echo -e "LOG: average trimmed read length: \${readLength}" >> ${repRID}.trimData.log + + # save read length file + echo -e "\${readLength}" > readLength.csv """ } +// Extract calculated read length metadata into channel +readLengthInfer = Channel.create() +inferMetadata_readLength.splitCsv(sep: ",", header: false).separate( + readLengthInfer +) + // Replicate trimmed fastq's fastqsTrim.into { fastqsTrim_alignData @@ -605,7 +629,7 @@ process getRef { val species from speciesInfer_getRef output: - tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf") into reference + tuple path ("hisat2", type: 'dir'), path ("bed", type: 'dir'), path ("*.fna"), path ("*.gtf"), path ("geneID.tsv"), path ("Entrez.tsv") into reference script: """ @@ -641,12 +665,16 @@ process getRef { aws s3 cp "\${references}"/bed ./bed --recursive aws s3 cp "\${references}"/genome.fna ./ aws s3 cp "\${references}"/genome.gtf ./ + aws s3 cp "\${references}"/geneID.tsv ./ + aws s3 cp "\${references}"/Entrez.tsv ./ elif [ ${referenceBase} == "/project/BICF/BICF_Core/shared/gudmap/references" ] then ln -s "\${references}"/hisat2 ln -s "\${references}"/bed ln -s "\${references}"/genome.fna ln -s "\${references}"/genome.gtf + ln -s "\${references}"/geneID.tsv + ln -s "\${references}"/Entrez.tsv fi echo -e "LOG: fetched" >> ${repRID}.getRef.log """ @@ -810,14 +838,16 @@ process countData { input: path script_calculateTPM + path script_convertGeneSymbols tuple path (bam), path (bai) from dedupBam_countData path ref from reference_countData val ends from endsInfer_countData val stranded from strandedInfer_countData output: - path ("*.countTable.csv") into counts + path ("*.tpmTable.csv") into counts path ("*.countData.summary") into countsQC + path ("assignedReads.csv") into inferMetadata_assignedReads script: """ @@ -837,7 +867,7 @@ process countData { elif [ "${stranded}" == "reverse" ] then stranding=2 - echo -e "LOG: strandedness set to forward stranded [2]" >> ${repRID}.countData.log + echo -e "LOG: strandedness set to reverse stranded [2]" >> ${repRID}.countData.log fi # run featureCounts @@ -850,13 +880,26 @@ process countData { featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam fi echo -e "LOG: counted" >> ${repRID}.countData.log + + # extract assigned reads + grep -m 1 'Assigned' *.countData.summary | grep -oe '\\([0-9.]*\\)' > assignedReads.csv # calculate TPM from the resulting countData table echo -e "LOG: calculating TPM with R" >> ${repRID}.countData.log Rscript calculateTPM.R --count "${repRID}.countData" + + # convert gene symbols to Entrez id's + echo -e "LOG: convert gene symbols to Entrez id's" >> ${repRID}.countData.log + Rscript convertGeneSymbols.R --repRID "${repRID}" """ } +// Extract number of assigned reads metadata into channel +assignedReadsInfer = Channel.create() +inferMetadata_assignedReads.splitCsv(sep: ",", header: false).separate( + assignedReadsInfer +) + /* *fastqc: run fastqc on untrimmed fastq's */ @@ -868,6 +911,7 @@ process fastqc { output: path ("*_fastqc.zip") into fastqc + path ("rawReads.csv") into inferMetadata_rawReads script: """ @@ -876,11 +920,19 @@ process fastqc { # run fastqc echo -e "LOG: running fastq on raw fastqs" >> ${repRID}.fastqc.log - #fastqc *.fastq.gz -o . - touch test_fastqc.zip + fastqc *.fastq.gz -o . + + # count raw reads + zcat *.R1.fastq.gz | echo \$((`wc -l`/4)) > rawReads.csv """ } +// Extract number of raw reads metadata into channel +rawReadsInfer = Channel.create() +inferMetadata_rawReads.splitCsv(sep: ",", header: false).separate( + rawReadsInfer +) + /* *dataQC: calculate transcript integrity numbers (TIN) and bin as well as calculate innerdistance of PE replicates */ @@ -895,7 +947,8 @@ process dataQC { val ends from endsInfer_dataQC output: - path "${repRID}.tin.hist.tsv" into tin + path "${repRID}.tin.hist.tsv" into tinHist + path "${repRID}.tin.med.csv" into inferMetadata_tinMed path "${repRID}.insertSize.inner_distance_freq.txt" into innerDistance script: @@ -928,12 +981,19 @@ process dataQC { """ } +// Extract median TIN metadata into channel +tinMedInfer = Channel.create() +inferMetadata_tinMed.splitCsv(sep: ",", header: false).separate( + tinMedInfer +) + /* *aggrQC: aggregate QC from processes as well as metadata and run MultiQC */ process aggrQC { tag "${repRID}" - publishDir "${outDir}/qc", mode: 'copy', pattern: "${repRID}.multiqc.html" + publishDir "${outDir}/report", mode: 'copy', pattern: "${repRID}.multiqc.html" + publishDir "${outDir}/qc", mode: 'copy', pattern: "${repRID}.multiqc_data.json" input: path multiqcConfig @@ -944,7 +1004,7 @@ process aggrQC { path dedupQC path countsQC path innerDistance - path tin + path tinHist path alignSampleQCs from alignSampleQC_aggrQC.collect() path inferExperiment val endsManual from endsManual_aggrQC @@ -956,11 +1016,17 @@ process aggrQC { val strandedI from strandedInfer_aggrQC val spikeI from spikeInfer_aggrQC val speciesI from speciesInfer_aggrQC + val readLengthM from readLengthMeta + val readLengthI from readLengthInfer + val rawReadsI from rawReadsInfer + val assignedReadsI from assignedReadsInfer + val tinMedI from tinMedInfer val expRID val studyRID output: path "${repRID}.multiqc.html" into multiqc + path "${repRID}.multiqc_data.json" into multiqcJSON script: """ @@ -974,14 +1040,14 @@ process aggrQC { # make metadata table echo -e "LOG: creating metadata table" >> ${repRID}.aggrQC.log - echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in" > metadata.tsv - echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}" >> metadata.tsv - echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}" >> metadata.tsv - echo -e "Manual\t-\t${endsManual}\t-\t-" >> metadata.tsv + echo -e "Source\tSpecies\tEnds\tStranded\tSpike-in\tRaw Reads\tAssigned Reads\tMedian Read Length\tMedian TIN" > metadata.tsv + echo -e "Submitter\t${speciesM}\t${endsM}\t${strandedM}\t${spikeM}\t-\t-\t'${readLengthM}'\t-" >> metadata.tsv + echo -e "Infered\t${speciesI}\t${endsI}\t${strandedI}\t${spikeI}\t-\t-\t-\t-" >> metadata.tsv + echo -e "Measured\t-\t${endsManual}\t-\t-\t'${rawReadsI}'\t'${assignedReadsI}'\t'${readLengthI}'\t'${tinMedI}'" >> metadata.tsv # remove inner distance report if it is empty (SE repRID) echo -e "LOG: removing dummy inner distance file" >> ${repRID}.aggrQC.log - if [ wc -l ${innerDistance} | awk '{print\${1}}' -eq 0 ] + if [ "${endsM}" == "se" ] then rm -f ${innerDistance} fi @@ -989,5 +1055,6 @@ process aggrQC { # run MultiQC echo -e "LOG: running multiqc" >> ${repRID}.aggrQC.log multiqc -c ${multiqcConfig} . -n ${repRID}.multiqc.html + cp ${repRID}.multiqc_data/multiqc_data.json ${repRID}.multiqc_data.json """ -} +} \ No newline at end of file diff --git a/workflow/scripts/convertGeneSymbols.R b/workflow/scripts/convertGeneSymbols.R new file mode 100644 index 0000000000000000000000000000000000000000..7b05b92e5d251050a78e7f546aa8b042d994c623 --- /dev/null +++ b/workflow/scripts/convertGeneSymbols.R @@ -0,0 +1,24 @@ +gc() +library(optparse) + +option_list=list( + make_option("--repRID",action="store",type='character',help="Replicate RID") +) +opt=parse_args(OptionParser(option_list=option_list)) +rm(option_list) + +countTable <- read.csv(paste0(opt$repRID,".countData.countTable.csv"), stringsAsFactors=FALSE) +geneID <- read.delim("geneID.tsv", header=FALSE, stringsAsFactors=FALSE) +Entrez <- read.delim("Entrez.tsv", header=FALSE, stringsAsFactors=FALSE) + +convert <- data.frame(geneID=countTable$Geneid) +convert <- merge(x=convert,y=geneID[,1:2],by.x="geneID",by.y="V2",all.x=TRUE) +convert <- merge(x=convert,y=Entrez,by.x="V1",by.y="V1",all.x=TRUE) +convert[is.na(convert$V2),3] <- "" +convert <- convert[,-1] +colnames(convert) <- c("GeneID","EntrezID") +convert <- unique(convert) + +output <- merge(x=convert,y=countTable,by.x="GeneID",by.y="Geneid",all.x=TRUE) + +write.table(output,file=paste0(opt$repRID,".tpmTable.csv"),sep=",",row.names=FALSE,quote=FALSE) \ No newline at end of file diff --git a/workflow/scripts/parseMeta.py b/workflow/scripts/parseMeta.py index fd73cf34c7084110dfeae52b74e63e2eb96af9a0..500054264f7c8c50310da92d9995f432930318d3 100644 --- a/workflow/scripts/parseMeta.py +++ b/workflow/scripts/parseMeta.py @@ -102,5 +102,10 @@ def main(): exit(1) print(species) + # Get read length metadata from 'Experiment Settings.csv' + if (args.parameter == "readLength"): + readLength = metaFile.Read_Length.unique() + print(str(readLength).strip('[]')) + if __name__ == '__main__': main() diff --git a/workflow/scripts/splitStudy.py b/workflow/scripts/splitStudy.py new file mode 100644 index 0000000000000000000000000000000000000000..82ffc2881857dd5d1d27eee5ea6a381b02d0e9f5 --- /dev/null +++ b/workflow/scripts/splitStudy.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 + +import argparse +import pandas as pd +import warnings +warnings.simplefilter(action='ignore', category=FutureWarning) + +def get_args(): + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--studyRID',help="The study RID.",required=True) + args = parser.parse_args() + return args + +def main(): + args = get_args() + studyRID=pd.read_json(args.studyRID+"_studyRID.json") + if studyRID["RID"].count() > 0: + studyRID["RID"].to_csv(args.studyRID+"_studyRID.csv",header=False,index=False) + else: + raise Exception("No associated replicates found: %s" % + studyRID) + +if __name__ == '__main__': + main() diff --git a/workflow/scripts/splitStudy.sh b/workflow/scripts/splitStudy.sh new file mode 100644 index 0000000000000000000000000000000000000000..a64b6d9e4cde818d1c6f91fd84144b821febc536 --- /dev/null +++ b/workflow/scripts/splitStudy.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# query GUDMAP/RBK for study RID +echo "curl --location --request GET 'https://www.gudmap.org/ermrest/catalog/2/entity/RNASeq:Replicate/Study_RID="${1}"'" | bash > $1_studyRID.json + +# extract replicate RIDs +module load python/3.6.4-anaconda +python3 ./workflow/scripts/splitStudy.py -s $1 + +# run pipeline on replicate RIDs in parallel +module load nextflow/20.01.0 +module load singularity/3.5.3 +while read repRID; do echo ${repRID}; done < "$1_studyRID.csv" | xargs -P 5 -I {} nextflow run workflow/rna-seq.nf --repRID {} + +# cleanup study RID files +rm $1_studyRID.json +rm $1_studyRID.csv diff --git a/workflow/scripts/tinHist.py b/workflow/scripts/tinHist.py index df32a985c165e2b266fb26fc347bd13eb1580999..3d292c2eb8cadb3b16466c6b19d0574184d439d7 100644 --- a/workflow/scripts/tinHist.py +++ b/workflow/scripts/tinHist.py @@ -15,6 +15,7 @@ def get_args(): def main(): args = get_args() tin = pd.read_csv(args.repRID + '.sorted.deduped.tin.xls',sep="\t",header=0) + hist = pd.cut(tin['TIN'],bins=pd.interval_range(start=0,freq=10,end=100,closed='right')).value_counts(sort=False) labels = ["{0} - {1}".format(i, i + 9) for i in range(1, 100, 10)] #labels[0] = '0 - 10' @@ -34,6 +35,9 @@ def main(): hist = hist.T.fillna(0.0).astype(int) #hist = hist.apply(lambda x: x/x.sum()*100, axis=1) hist.to_csv(args.repRID + '.tin.hist.tsv',sep='\t') + medFile = open(args.repRID + '.tin.med.csv',"w") + medFile.write(str(round(tin['TIN'][(tin['TIN']!=0)].median(),2))) + medFile.close() if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/workflow/tests/test_consistency.py b/workflow/tests/test_consistency.py index 0aecc02000f301eaf237a7e6ae6270761e01687e..073b12826b798ac94d16fda4291dfba2c1a42203 100644 --- a/workflow/tests/test_consistency.py +++ b/workflow/tests/test_consistency.py @@ -24,7 +24,7 @@ def readAssigned(fileAssigned,fileExpectAssigned): expect = open(fileExpectAssigned, "r") lineAssigned = assigned.readline() lineExpect = expect.readline() - if lineAssigned.strip() == lineExpect.strip(): + if int(lineAssigned.strip()) < (int(lineExpect.strip())+(int(lineExpect.strip())*0.00001)) and int(lineAssigned.strip()) > (int(lineExpect.strip())-(int(lineExpect.strip())*0.00001)): data = True return data diff --git a/workflow/tests/test_parseMetadata.py b/workflow/tests/test_parseMetadata.py index 31a9674be28708b08fa54fb75fde541949b5278a..59677bbba7d40058bdeb78ccceeeeddba4565a14 100644 --- a/workflow/tests/test_parseMetadata.py +++ b/workflow/tests/test_parseMetadata.py @@ -17,7 +17,7 @@ def readLine(fileName): data = False file = open(fileName, "r") line = file.readline() - if line.strip() == "uk,se,unstranded,no,Homo sapiens,Experiment_RID,Study_RID,Replicate_RID": + if line.strip() == "uk,se,unstranded,no,Homo sapiens,75,Experiment_RID,Study_RID,Replicate_RID": data = True return data