Commit 66071e02 authored by zhanxw's avatar zhanxw

fix test lib/test to incorporate htslib

parent a3192827
This diff is collapsed.
......@@ -4,19 +4,18 @@
#include "base/RangeList.h"
#include "third/htslib/include/htslib/vcf.h"
// static void write_header(bcf_hdr_t *h);
class BCFReader {
public:
BCFReader(const std::string& fn)
: cannotOpen(false),
hasIndex(false),
readyToRead(false),
bp(0),
b(0),
bout(0) // ,
// off(0) // ,
// str2id(0)
{
hin(0),
idx(0),
iter(0) {
ks = {0, 0, 0};
open(fn);
};
......@@ -87,16 +86,8 @@ class BCFReader {
// destroy range iterator
// close index
bcf_hdr_destroy(hin);
bcf_destroy(b); // bcf_destroy(blast);
hts_close(bp); // close bcf handle for input
hts_close(bout); // close bcf handle for output
/* // resume stdout */
/* stdout = fdopen(this->origStdout, "w"); */
/* assert(stdout); */
// if (str2id) {
// hts_str2id_destroy(str2id);
// }
bcf_destroy(b); // bcf_destroy(blast);
hts_close(bp); // close bcf handle for input
closeIndex();
};
void resetRangeIterator() {
......@@ -119,21 +110,10 @@ class BCFReader {
// bcftools part
htsFile* bp;
bcf1_t* b;
htsFile* bout;
bcf_hdr_t* hin;
bcf_hdr_t* hout;
hts_idx_t* idx;
hts_itr_t* iter;
// int tid, begin, end;
// uint64_t off;
kstring_t ks;
// void* str2id;
/* BCF_t* BCFHandle; */
/* ti_iter_t iter; */
// const char* line;
// int line_len;
// int origStdout;
std::string header;
};
......
......@@ -3,6 +3,7 @@
#include "base/RangeList.h"
#include "third/htslib/include/htslib/hts.h"
#include "third/htslib/include/htslib/kseq.h" // defined KS_SEP_LINE
#include "third/htslib/include/htslib/tbx.h"
#include "third/htslib/include/htslib/vcf.h"
......@@ -32,25 +33,41 @@ class TabixReader {
// check read mode
if (range.empty()) {
// read line by line
if (!iter) {
// iter = ti_query(this->tabixHandle, 0, 0, 0);
iter = tbx_itr_querys(tabixIndex, ".");
if (!iter) return false;
}
// if (!iter) {
// // iter = ti_query(this->tabixHandle, 0, 0, 0);
// iter = tbx_itr_querys(tabixIndex, ".");
// if (!iter) return false;
// }
// if (!this->firstLine.empty()) {
// (*line) = this->firstLine;
// this->firstLine.clear();
// return true;
// }
// // while ((ti_line = ti_read(this->tabixHandle, iter, &ti_line_len)) !=
// 0)
// // {
// while (tbx_itr_next(tabixHandle, tabixIndex, iter, &tabixLine) >= 0) {
// // need to skip header here
// if (tabixLine.l > 0 &&
// (int)(tabixLine.s[0]) == this->tabixIndex->conf.meta_char)
// continue;
// // (*line) = (ti_line);
// return true;
// }
// return false;
if (!this->firstLine.empty()) {
(*line) = this->firstLine;
this->firstLine.clear();
return true;
}
// while ((ti_line = ti_read(this->tabixHandle, iter, &ti_line_len)) != 0)
// {
while (tbx_itr_next(tabixHandle, tabixIndex, iter, &tabixLine) >= 0) {
// need to skip header here
while (hts_getline(this->tabixHandle, KS_SEP_LINE, &tabixLine) >= 0) {
// skip header lines
if (tabixLine.l > 0 &&
(int)(tabixLine.s[0]) == this->tabixIndex->conf.meta_char)
continue;
// (*line) = (ti_line);
(*line) = tabixLine.s;
return true;
}
return false;
......@@ -69,7 +86,7 @@ class TabixReader {
// if (this->ti_line) {
if (tbx_itr_next(tabixHandle, tabixIndex, iter, &tabixLine) > 0) {
// (*line) = ti_line;
(*line) = tabixLine.s;
return true;
}
}
......@@ -87,7 +104,7 @@ class TabixReader {
// continue;
// }
// ti_iter_destroy(iter);
tbx_destroy(this->tabixIndex);
// do not destroy index: tbx_destroy(this->tabixIndex);
// iter = 0;
// this->iter = ti_queryi(this->tabixHandle, tid, beg, end);
this->iter = tbx_itr_querys(this->tabixIndex, rangeBuffer);
......@@ -167,7 +184,7 @@ class TabixReader {
this->hasIndex = true;
return true;
};
}
void closeIndex() {
// fpritnf(stderr, "close index...");
......@@ -215,31 +232,40 @@ class TabixReader {
if (!this->hasIndex) {
return -1;
}
// do not attempt to read
// // this->iter = ti_query(this->tabixHandle, 0, 0, 0);
// this->iter = tbx_itr_querys(this->tabixIndex, ".");
// if (!this->iter) {
// return -1;
// }
// this->iter = ti_query(this->tabixHandle, 0, 0, 0);
this->iter = tbx_itr_querys(this->tabixIndex, ".");
if (!this->iter) {
return -1;
}
// while ((ti_line = ti_read(this->tabixHandle, this->iter,
// &this->ti_line_len)) != 0) {
// if ((int)(*ti_line) != idxconf->meta_char) {
// this->firstLine = ti_line;
// // while ((ti_line = ti_read(this->tabixHandle, this->iter,
// // &this->ti_line_len)) != 0) {
// // if ((int)(*ti_line) != idxconf->meta_char) {
// // this->firstLine = ti_line;
// // break;
// // }
// // // fputs(ti_line, stdout); fputc('\n', stdout);
// // this->header += ti_line;
// // this->header += "\n";
// // }
// while (tbx_itr_next(this->tabixHandle, this->tabixHandle, this->iter,
// &this->tabixLine) >= 0) {
// if (tabixLine.s[0] == this->tabixIndex->conf.meta_char) {
// this->firstLine = tabixLine.s;
// break;
// }
// // fputs(ti_line, stdout); fputc('\n', stdout);
// this->header += ti_line;
// this->header += "\n";
// this->header += tabixLine.s;
// this->header += '\n';
// }
while (tbx_itr_next(this->tabixHandle, this->tabixHandle, this->iter,
&this->tabixLine) >= 0) {
if (tabixLine.s[0] == this->tabixIndex->conf.meta_char) {
while (hts_getline(this->tabixHandle, KS_SEP_LINE, &tabixLine) >= 0) {
if (tabixLine.l > 0 &&
(int)(tabixLine.s[0]) != this->tabixIndex->conf.meta_char) {
this->firstLine = tabixLine.s;
break;
}
this->header += tabixLine.s;
this->header += '\n';
this->header += "\n";
}
cannotOpen = false;
......
......@@ -18,13 +18,12 @@ CXX_FLAGS = -O0 -ggdb -fopenmp \
-I../../third/samtools/bcftools \
../../libVcf/lib-dbg-vcf.a \
../../base/lib-dbg-base.a ../../libsrc/lib-dbg-goncalo.a \
../../third/tabix/libtabix.a \
../../third/htslib/lib/libhts.a \
../../third/libdeflate/lib/libdeflate.a \
../../third/pcre/lib/libpcreposix.a ../../third/pcre/lib/libpcre.a \
-L ../../third/samtools/bcftools -lbcf\
../../third/samtools/libbam.a \
-L ../../third/zlib \
-L ../../third/bzip2 \
-lz -lbz2 -lpthread
-lz -lbz2 -lpthread -llzma
LIBS=../lib-dbg-vcf.a
lib:
......
#include "VCFUtil.h"
int main() {
VCFInputFile vin("noindex.bcf.gz");
VCFInputFile vin("noindex.v2.bcf");
vin.setRangeList("1:0");
int lineNo = 0;
while (vin.readRecord()){
lineNo ++;
while (vin.readRecord()) {
lineNo++;
VCFRecord& r = vin.getVCFRecord();
VCFPeople& people = r.getPeople();
VCFIndividual* indv;
......@@ -18,15 +18,17 @@ int main() {
// e.g.: Loop each (selected) people in the same order as in the VCF
for (int i = 0; i < people.size(); i++) {
indv = people[i];
// get GT index. if you are sure the index will not change, call this function only once!
// get GT index. if you are sure the index will not change, call this
// function only once!
int GTidx = r.getFormatIndex("GT");
if (GTidx >= 0)
printf("%s ", indv->justGet(0).toStr()); // [0] meaning the first field of each individual
printf("%s ",
indv->justGet(0)
.toStr()); // [0] meaning the first field of each individual
else
fprintf(stderr, "Cannot find GT field!\n");
}
printf("\n");
};
fprintf(stdout, "Total %d VCF records have converted successfully\n", lineNo);
};
......@@ -5,7 +5,7 @@
int main(int argc, char* argv[]) {
{
std::string fn = "all.anno.filtered.extract.bcf.gz";
std::string fn = "all.anno.filtered.extract.v2.bcf";
std::string r = "1:196621007-196716634";
VCFExtractor vin(fn);
vin.setRangeList(r);
......@@ -34,7 +34,7 @@ int main(int argc, char* argv[]) {
return 0;
#endif
const char* fn = "test.bcf.gz";
const char* fn = "test.v2.bcf";
{
BCFReader tr(fn);
int n = 0;
......@@ -94,7 +94,7 @@ int main(int argc, char* argv[]) {
if (h[i] == '\n') count++;
}
fprintf(stdout, "header has %d lines.\n", count);
assert(count == 60);
assert(count == 84);
}
return 0;
}
......@@ -5,4 +5,4 @@ Read 14 lines
Read 2 lines
Read 3 lines
Read 0 lines
header has 60 lines.
header has 84 lines.
#include "VCFUtil.h"
int main() {
VCFInputFile vin("test.bcf.gz");
VCFInputFile vin("test.v2.bcf");
vin.setRangeList("1:196341364-196341449");
while (vin.readRecord()){
while (vin.readRecord()) {
VCFRecord& r = vin.getVCFRecord();
VCFPeople& people = r.getPeople();
VCFIndividual* indv;
printf("%s:%d\t", r.getChrom(), r.getPos());
bool tagMissing;
VCFInfo& info = r.getVCFInfo();
std::string anno = info.getTag("ANNO", &tagMissing).toStr();
printf("ANNO=%s\t", anno.c_str());
// assert(tagMissing); // all variant has tagMissing == true
bool missingGenotype; // missing indicator
bool missingGenotype; // missing indicator
int GTidx = r.getFormatIndex("GT");
for (int i = 0; i < people.size(); i++){
for (int i = 0; i < people.size(); i++) {
indv = people[i];
const VCFValue& gt = indv->justGet(GTidx);
missingGenotype = gt.isMissingGenotype();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment