Commit 547c341d authored by Venkat Malladi's avatar Venkat Malladi

Merge branch 'issue3087-annotation_fix' into 'dev'

Issue3087 annotation fix

Closes #87

See merge request !72
parents 27540804 c78024de
Pipeline #7831 passed with stages
in 1175 minutes and 41 seconds
......@@ -43,13 +43,14 @@ names(files) <- design$Condition
peaks <- lapply(files, readPeakFile, as = "GRanges", header = FALSE)
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb, tssRegion=c(-3000, 3000), verbose=FALSE)
column_names <- c("chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
column_names <- c("geneId","chr", "start", "end", "width", "strand_1", "name", "score", "strand", "signalValue",
"pValue", "qValue", "peak", "annotation", "geneChr", "geneStart", "geneEnd",
"geneLength" ,"geneStrand", "geneId", "transcriptId", "distanceToTSS", "symbol")
"geneLength" ,"geneStrand", "transcriptId", "distanceToTSS", "symbol")
for(index in c(1:length(peakAnnoList))) {
filename <- paste(names(peaks)[index], ".chipseeker_annotation.tsv", sep="")
df <- as.data.frame(peakAnnoList[[index]])
df$geneId <- sapply(strsplit(as.character(df$geneId), split = "\\."), "[[", 1)
df_final <- merge(df, sym, by.x="geneId", by.y="ensembl", all.x=T)
colnames(df_final) <- column_names
write.table(df_final[ , !(names(df_final) %in% c('strand_1'))], filename, sep="\t" ,quote=F, row.names=F)
......
......@@ -25,6 +25,10 @@ def test_annotation_singleend():
annotation_file = test_output_path + 'ENCSR238SGC.chipseeker_annotation.tsv'
assert os.path.exists(annotation_file)
assert utils.count_lines(annotation_file) >= 149284
df = pd.read_csv(annotation_file, sep = "\t", header = 0)
print(df.head())
#assert df['symbol'].notna().all()
assert not(df['symbol'].isnull().values.any())
@pytest.mark.pairedend
......@@ -42,3 +46,7 @@ def test_annotation_pairedend():
annotation_file = test_output_path + 'ENCSR729LGA.chipseeker_annotation.tsv'
assert os.path.exists(annotation_file)
assert utils.count_lines(annotation_file) >= 25367
df = pd.read_csv(annotation_file, sep = "\t", header = 0)
print(df.head())
#assert df['symbol'].notna().all()
assert not(df['symbol'].isnull().values.any())
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment