Skip to content
Snippets Groups Projects
Commit 12996d71 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Clean inference, add tin

parent 4a68865f
3 merge requests!37v0.0.1,!24Develop,!23Resolve "process_RSeQC"
Pipeline #6247 failed with stages
in 2 minutes and 21 seconds
......@@ -27,6 +27,9 @@ process {
withName:fastqc {
queue = 'super'
}
withName:inferMetadata {
queue = 'super'
}
}
singularity {
......
......@@ -39,6 +39,7 @@ referenceBase = "/project/BICF/BICF_Core/shared/gudmap/references"
script_bdbagFetch = Channel.fromPath("${baseDir}/scripts/bdbagFetch.sh")
script_parseMeta = Channel.fromPath("${baseDir}/scripts/parseMeta.py")
script_inferMeta = Channel.fromPath("${baseDir}/scripts/inferMeta.sh")
script_aggregateInference = Channel.fromPath("${baseDir}/scripts/aggregateInference.R")
/*
* splitData: split bdbag files by replicate so fetch can occure in parallel, and rename files to replicate rid
......@@ -195,11 +196,9 @@ stranded.into {
}
spike.into{
spike_getRef
spike_rseqc
}
species.into {
species_getRef
species_rseqc
}
/*
......@@ -417,12 +416,13 @@ process inferMetadata {
input:
path script_inferMeta
path script_aggregateInference
path reference_rseqc
set val (repRID), path (inBam), path (inBai) from dedupBam_rseqc
val species_rseqc
val spike_rseqc
output:
path "infer.csv" into inferMetadata
script:
"""
......@@ -434,29 +434,28 @@ process inferMetadata {
endness=`bash inferMeta.sh endness ${repRID}.rseqc.log`
fail=`bash inferMeta.sh fail ${repRID}.rseqc.log`
echo \${endness}
echo \${fail}
if [ \${endness} == "PairEnd" ]
then
percentF=`bash inferMeta.sh pef ${repRID}.rseqc.log`
percentR=`bash inferMeta.sh per ${repRID}.rseqc.log`
inner_distance.py -i "${inBam}" -o ${repRID}.insertSize -r ./bed/genome.bed
junction_saturation.py -i "${inBam}" -r ./bed/genome.bed -o ${repRID}.junctionSaturation
elif [ \${endness} == "SingleEnd" ]
then
percentF=`bash inferMeta.sh sef ${repRID}.rseqc.log`
percentR=`bash inferMeta.sh ser ${repRID}.rseqc.log`
fi
if [ \$percentF > 0.25 ] && [ \$percentR < 0.25 ]
if [ \$percentF -gt 0.25 ] && [ \$percentR -lt 0.25 ]
then
stranded="forward"
if [ \$endness == "PairEnd" ]
then
strategy="1++,1--,2+-,2-+"
else
strategy="++,--"
fi
elif [ \$percentR > 0.25 ] && [ \$percentF < 0.25 ]
elif [ \$percentR -gt 0.25 ] && [ \$percentF -lt 0.25 ]
then
stranded="reverse"
if [ \$endness == "PairEnd" ]
then
strategy="1+-,1-+,2++,2--"
......@@ -464,15 +463,38 @@ process inferMetadata {
strategy="+-,-+"
fi
else
stranded="unstranded"
strategy="us"
fi
if [ \$strategy != "us" ]
then
FPKM_count.py -i "${inBam}" -o ${repRID}.rpkmCount -r ./bed/genome.bed -d \$strategy
RPKM_saturation.py -i "${inBam}" -o ${repRID}.rpkmCount -r ./bed/genome.bed -d \$strategy
else
FPKM_count.py -i "${inBam}" -o ${repRID}.rpkmCount -r ./bed/genome.bed
RPKM_saturation.py -i "${inBam}" -o ${repRID}.rpkmCount -r ./bed/genome.bed
fi
# calcualte TIN values per feature
tin.py -i "${inBam}" -r ./bed/genome.bed
# aggregate infered metadata (including generate TIN stats)
Rscript aggregateInference.R --endness "\${endness}" --stranded "\${stranded}" --strategy "\${strategy}" --percentF \${percentF} --percentR \${percentR} --percentFail \${fail} --tin "${inBam.baseName}.tin.xls"
"""
}
\ No newline at end of file
}
// Split infered metadata into separate channels
endsMetaI = Channel.create()
strandedI = Channel.create()
strategyI = Channel.create()
percentFI = Channel.create()
percentRI = Channel.create()
percentFailI = Channel.create()
tinMinI = Channel.create()
tinMedI = Channel.create()
tinMaxI = Channel.create()
tinSDI = Channel.create()
inferMetadata.splitCsv(sep: ",", header: false).separate(
endsMetaI,
strandedI,
strategyI,
percentFI,
percentRI,
percentFailI,
tinMinI,
tinMedI,
tinMaxI,
tinSDI
)
gc()
library(optparse)
option_list=list(
make_option("--endness",action="store",type='character',help="Infered Endness"),
make_option("--stranded",action="store",type='character',help="Infered Strandedness"),
make_option("--strategy",action="store",type='character',help="Infered Sequencing Strategy"),
make_option("--percentF",action="store",type='double',help="Percent Forward Aligned Reads"),
make_option("--percentR",action="store",type='double',help="Percent Reverse Aligned Reads"),
make_option("--percentFail",action="store",type='double',help="Percent Failed Aligned Reads"),
make_option("--tin",action="store",type='character',help="TIN File")
)
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
if (length(setdiff(c("endness","stranded","strategy","percentF","percentR","percentFail","tin","help"),names(opt))) != 0){
stop(paste0("No input missing ",setdiff(c("endness","stranded","strategy","percentF","percentR","percentFail","tin","help"),names(opt)),", exiting."))
} else if (!file.exists(opt$tin)){
stop("No tin file passed, exiting.")
}
if (opt$endness == "PairEnd"){
endness <- "pe"
} else if (opt$endness == "SingleEnd"){
endness <- "se"
}
percentF <- round(opt$percentF,digits=2)
percentR <- round(opt$percentR,digits=2)
percentFail <- round(opt$percentFail,digits=2)
repRID <- basename(gsub(".sorted.deduped.tin.xls","",opt$tin))
tin <- read.delim(opt$tin,sep="\t")
tin.min <- round(min(tin$TIN),digits=2)
tin.med <- round(median(tin$TIN),digits=2)
tin.max <- round(max(tin$TIN),digits=2)
tin.sd <- round(sd(tin$TIN),digits=2)
output <- paste(endness,opt$stranded,opt$strategy,percentF,percentR,percentFail,tin.min,tin.med,tin.max,tin.sd,sep=",")
write(output,file="infer.csv")
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment