Skip to content
Snippets Groups Projects
Commit d5c23e26 authored by Gervaise Henry's avatar Gervaise Henry :cowboy:
Browse files

Update tpm table #73

parent 64c587de
Branches
Tags
2 merge requests!43Develop,!420.0.3
Pipeline #7989 failed with stages
in 1 minute and 48 seconds
......@@ -875,10 +875,10 @@ process countData {
echo -e "LOG: counting ${ends} features" >> ${repRID}.countData.log
if [ "${ends}" == "se" ]
then
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
elif [ "${ends}" == "pe" ]
then
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
featureCounts -T `nproc` -a ./genome.gtf -G ./genome.fna -g 'gene_name' --extraAttributes 'gene_id' -o ${repRID}.countData -s \${stranding} -p -B -R SAM --primary --ignoreDup ${repRID}.sorted.deduped.bam
fi
echo -e "LOG: counted" >> ${repRID}.countData.log
......
......@@ -13,10 +13,13 @@ if (!("count" %in% names(opt))){
stop("Count file doesn't exist, exiting.")
}
repRID <- basename(gsub(".featureCounts","",opt$count))
repRID <- basename(gsub(".countData","",opt$count))
count <- read.delim(opt$count, comment.char="#") # if featureCounts file changes structure, be sure to update count and Length columns below
colnames(count)[7] <- "count"
colnames(count)[1] <- "gene_name"
colnames(count)[7] <- "gene_id"
colnames(count)[8] <- "count"
count <- count[,c(1,7,2:6,8)]
rpk <- count$count/count$Length/1000
......
......@@ -7,18 +7,20 @@ option_list=list(
opt=parse_args(OptionParser(option_list=option_list))
rm(option_list)
countTable <- read.csv(paste0(opt$repRID,".countData.countTable.csv"), stringsAsFactors=FALSE)
countTable <- read.csv(paste0(opt$repRID,".countTable.csv"), stringsAsFactors=FALSE)
geneID <- read.delim("geneID.tsv", header=FALSE, stringsAsFactors=FALSE)
Entrez <- read.delim("Entrez.tsv", header=FALSE, stringsAsFactors=FALSE)
convert <- data.frame(geneID=countTable$Geneid)
convert <- merge(x=convert,y=geneID[,1:2],by.x="geneID",by.y="V2",all.x=TRUE)
convert <- data.frame(gene_name=countTable$gene_name)
convert <- merge(x=convert,y=geneID[,1:2],by.x="gene_name",by.y="V2",all.x=TRUE)
convert <- merge(x=convert,y=Entrez,by.x="V1",by.y="V1",all.x=TRUE)
convert[is.na(convert$V2),3] <- ""
convert <- convert[,-1]
colnames(convert) <- c("GeneID","EntrezID")
convert <- unique(convert)
output <- merge(x=convert,y=countTable[,c("Geneid","count","tpm")],by.x="GeneID",by.y="Geneid",all.x=TRUE)
output <- merge(x=convert,y=countTable[,c("gene_name","gene_id","count","tpm")],by.x="GeneID",by.y="gene_name",all.x=TRUE)
colnames(output) <- c("GENCODE_Gene_Symbol","NCBI_GeneID","Ensembl_GeneID","count","tpm")
output <- output[,c(1,3,2,4:5)]
write.table(output,file=paste0(opt$repRID,".tpmTable.csv"),sep=",",row.names=FALSE,quote=FALSE)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment