Commit 8fd40db3 authored by zhanxw's avatar zhanxw
Browse files

clean vcf parsing codes

parent 21e55ff3
......@@ -39,7 +39,7 @@ THIRD_INC = $(BCF_INC) $(TABIX_INC) $(PCRE_INC) $(BZIP2_INC) $(ZLIB_INC) $(GSL_I
THIRD_INC_FLAGS = $(addprefix -I,$(THIRD_INC))
### put BCF_INC ahead of TABIX, so correct kseq.h can be included
DEFAULT_CXXFLAGS = -std=c++0x -D__STDC_LIMIT_MACROS -I. -I.. -I../base $(THIRD_INC_FLAGS) -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE
DEFAULT_CXXFLAGS = -std=c++0x -D__STDC_LIMIT_MACROS -I. -I.. -I../base $(THIRD_INC_FLAGS) -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -fopenmp
# 'make release' will: clean current build; build with -O2 flags;
# NOTE: don't use -j flag!
......
......@@ -30,19 +30,18 @@ class VCFIndividual {
return;
}
// this->self = vcfValue;
// this->parsed = vcfValue.toStr();
this->parsed.attach(&vcfValue.line[vcfValue.beg],
vcfValue.end - vcfValue.beg);
size_t idx = 0;
int beg = 0;
int ret;
while (true) {
if (idx == fdLen) {
VCFValue v;
fd.push_back(v);
// VCFValue v;
// fd.push_back(v);
++fdLen;
fd.resize(fdLen);
}
VCFValue& v = fd[idx++];
ret = v.parseTill(this->parsed, beg, ':');
......
#ifndef _VCFRECORD_H_
#define _VCFRECORD_H_
#include <omp.h>
#include "VCFBuffer.h"
#include "VCFFunction.h"
#include "VCFHeader.h"
......@@ -124,21 +126,23 @@ class VCFRecord {
int parseIndividual() {
int ret;
// now comes each individual genotype
unsigned int idx = 0; // peopleIdx
int idx = 0; // peopleIdx
// VCFIndividual* p;
const size_t NumAllIndvSize = allIndv.size();
std::vector<VCFValue> indv(NumAllIndvSize);
const int NumAllIndvSize = allIndv.size();
indv.resize(NumAllIndvSize);
int beg = this->format.end + 1;
while ((ret = indv[idx].parseTill(this->parsed, beg, '\t')) == 0) {
if (idx >= NumAllIndvSize) {
if (idx >= (int)NumAllIndvSize) {
fprintf(stderr,
"Expected %d individual but already have %d individual\n",
(int)NumAllIndvSize, idx);
NumAllIndvSize, idx);
fprintf(stderr, "VCF header have LESS people than VCF content!\n");
return -1;
}
this->parsed[indv[idx].end] = '\0';
// defer parsing individual fields (e.g. GT,DS) later.
// p = this->allIndv[idx];
// p->parse(indv);
......@@ -146,32 +150,34 @@ class VCFRecord {
idx++;
}
#pragma openmp parallel for
for (int i = 0; i < idx; ++i) {
this->allIndv[i]->parse(indv[i]);
}
// if ret == 1 menas reaches end of this->parsed
if (ret != 1) {
fputs("Parsing error in line: ", stderr);
this->parsed.output(stderr);
fputc('\n', stderr);
return -1;
} else {
} else { // reach the end
this->parsed[indv[idx].end] = '\0';
// p = this->allIndv[idx];
// p->parse(indv);
this->allIndv[idx]->parse(indv[idx]);
// this->allIndv[idx]->parse();
idx++;
}
assert(idx == NumAllIndvSize);
#pragma omp parallel for
for (int i = 0; i < idx; ++i) {
this->allIndv[i]->parse(indv[i]);
}
if (idx > NumAllIndvSize) {
fprintf(stderr, "Expected %d individual but already have %d individual\n",
(int)NumAllIndvSize, idx);
NumAllIndvSize, idx);
REPORT("VCF header have MORE people than VCF content!");
} else if (idx < NumAllIndvSize) {
fprintf(stderr, "Expected %d individual but only have %d individual\n",
(int)NumAllIndvSize, idx);
NumAllIndvSize, idx);
REPORT("VCF header have LESS people than VCF content!");
return -1;
}
......@@ -453,6 +459,8 @@ class VCFRecord {
VCFInfo vcfInfo;
std::vector<VCFValue> indv; // store individual fields
// indicates if getPeople() has been called
bool hasAccess;
......
......@@ -12,7 +12,8 @@ EXE = testReadVCF testReadVCFInfo testReadVCFByIndividual testReadVCFByRange \
all: $(EXE)
debug: all
CXX_FLAGS = -O0 -ggdb -I.. -I../.. -I../../libsrc -I../../base -I../../third/tabix/ -I../../third/pcre/include \
CXX_FLAGS = -O0 -ggdb -fopenmp \
-I.. -I../.. -I../../libsrc -I../../base -I../../third/tabix/ -I../../third/pcre/include \
-I../../third/samtools/bcftools \
../../libVcf/lib-dbg-vcf.a \
../../base/lib-dbg-base.a ../../libsrc/lib-dbg-goncalo.a \
......
......@@ -10,6 +10,7 @@
#include "base/Argument.h"
#include "base/ParRegion.h"
#include "regression/BoltLMM.h"
#include "regression/EigenMatrixInterface.h"
#include "regression/FamSkat.h"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment