Commit cdd07dc6 authored by zhanxw's avatar zhanxw
Browse files

fix bugs, add debug function for bolt model

parents e39b01f1 704f4c23
......@@ -11,24 +11,36 @@ before_script:
stages:
- build
- test
- deploy
jobBuild:
- buildExtra
jobBuildRelease:
stage: build
script:
- make
- FN=/dist/rvtests-`date +"%Y%m%d"`-`git rev-parse HEAD|cut -c 1-6`-linux64-static.tar.gz; cd .. && tar zvcf ${FN} rvtests/executable/rvtest rvtests/executable/vcf2kinship rvtests/example rvtests/README.md && echo "Output file name is ${FN}"
jobTest:
stage: test
script:
- make debug
- (for i in base libVcf regression; do echo "Task = ${i}"; make -C ${i}/test; done)
- (for i in base libVcf regression; do echo "Task = ${i}"; make check -C ${i}/test; done)
jobDeploy:
stage: deploy
jobBuildDebug:
stage: buildExtra
script:
- echo "Packing RVTESTS"
- make
- FN=/dist/rvtests-`date +"%Y%m%d"`-`git rev-parse HEAD|cut -c 1-6`-linux64-static.tar.gz; cd .. && tar zvcf ${FN} rvtests/executable/rvtest rvtests/executable/vcf2kinship rvtests/example rvtests/README.md && echo "Output file name is ${FN}"
environment:
name: staging
url: http:staging-rvtests.bunny.swmed.edu
when: "manual"
- make debug
- FN=/dist/rvtests-debug-`date +"%Y%m%d"`-`git rev-parse HEAD|cut -c 1-6`-linux64-static.tar.gz; cd .. && tar zvcf ${FN} rvtests/executable/dbg/rvtest rvtests/executable/dbg/vcf2kinship rvtests/example rvtests/README.md && echo "Output file name is ${FN}"
jobBuildProfile:
stage: buildExtra
script:
- make profile
- FN=/dist/rvtests-profile-`date +"%Y%m%d"`-`git rev-parse HEAD|cut -c 1-6`-linux64-static.tar.gz; cd .. && tar zvcf ${FN} rvtests/executable/dbg/rvtest rvtests/executable/dbg/vcf2kinship rvtests/example rvtests/README.md && echo "Output file name is ${FN}"
allow_failure: true
#jobDeploy:
# stage: deploy
# script:
# - echo "Packing RVTESTS"
# - make
# environment:
# name: staging
# url: http:staging-rvtests.bunny.swmed.edu
# when: "manual"
2017-07-13 Xiaowei Zhan <zhanxw@fantasia.csgstat.sph.umich.edu>
* Fix beta estimation in binary score tests for unrelated individuals
2017-06-13 Xiaowei Zhan <zhanxw@fantasia.csgstat.sph.umich.edu>
* Fix a bug related Wald test outputs (fix #30)
2017-06-09 Xiaowei Zhan <zhanxw@fantasia.csgstat.sph.umich.edu>
* Add "--outputID" to output VCF ID to association results (fix #14, #24)
* Improve error messages when loading decomposed kinship file (fix #29)
* Add "RVTESTS successfully exit" to stdout (fix #31)
* Add FASTLMM_DEBUG environment variable to ease debugging (refer to #32)
* Fix strtol related warning (possibly) (refer to #33)
* Clean codes
2017-04-27 Xiaowei Zhan <zhanxw@fantasia.csgstat.sph.umich.edu>
* Add boltCov model
......
......@@ -2,7 +2,7 @@
// cannot forward declare an typdef anonymous struct
// http://stackoverflow.com/questions/804894/forward-declaration-of-a-typedef-in-c
// so include the header file
#include "bgzf.h"
#include "third/samtools/bgzf.h"
#include <algorithm>
......
......@@ -337,7 +337,11 @@ int KinshipHolder::loadDecomposed() {
s = "u";
s += toString(i + 1);
if (headerMap.count(s) == 0) {
logger->error("Missing '%s' column!", s.c_str());
logger->warn(
"Missing [ %s ] column in the header line of file [ %s ] when we "
"are analyzing [ %d ] "
"samples!",
s.c_str(), this->eigenFileName.c_str(), NumSample);
return -1;
}
columnToExtract.push_back(headerMap[s]);
......
......@@ -216,7 +216,6 @@ int SimpleMatrix::dropRow(const std::vector<std::string>& name) {
int SimpleMatrix::dropRow(const std::vector<int>& index) {
removeByIndex(index, &mat);
removeByIndex(index, &rowName);
removeByIndex(index, &colName);
return 0;
}
......
......@@ -3,6 +3,140 @@
#include <algorithm>
class atoi_func {
public:
atoi_func() : value_() {}
inline int value() const { return value_; }
inline bool operator()(const char* str, size_t len) {
value_ = 0;
int sign = 1;
if (str[0] == '-') { // handle negative
sign = -1;
++str;
--len;
}
switch (len) { // handle up to 10 digits, assume we're 32-bit
case 10:
value_ += (str[len - 10] - '0') * 1000000000;
case 9:
value_ += (str[len - 9] - '0') * 100000000;
case 8:
value_ += (str[len - 8] - '0') * 10000000;
case 7:
value_ += (str[len - 7] - '0') * 1000000;
case 6:
value_ += (str[len - 6] - '0') * 100000;
case 5:
value_ += (str[len - 5] - '0') * 10000;
case 4:
value_ += (str[len - 4] - '0') * 1000;
case 3:
value_ += (str[len - 3] - '0') * 100;
case 2:
value_ += (str[len - 2] - '0') * 10;
case 1:
value_ += (str[len - 1] - '0');
value_ *= sign;
return value_ > 0;
default:
return false;
}
}
private:
int value_;
};
// convert std::string to integer
// @return true if conversion succeed
bool str2int(const char* input, int* output) {
// Limitation: this method only works for 32-bit integer
// We did not use strtol, as it is slower althought it can convert to
// `long` instead of `int`.
// The implementaiton may be more compatible than strtol() on musl
size_t len = 0;
unsigned char neg = 0;
unsigned long value = 0;
if (!input) {
assert(input && "null input string for str2int()");
goto err;
}
// skip spaces
while (*input == ' ') {
input++;
}
// check sign
if (*input == '-') {
neg = 1;
input++;
}
if (*input == '\0') {
goto err;
}
while (*input == ' ') {
input++;
}
while (input[len] >= '0' && input[len] <= '9') {
len++;
}
// unroll the computation to speed up
switch (len) { // handle up to 10 digits, assume we're 32-bit
case 10:
value += (input[len - 10] - '0') * 1000000000;
case 9:
value += (input[len - 9] - '0') * 100000000;
case 8:
value += (input[len - 8] - '0') * 10000000;
case 7:
value += (input[len - 7] - '0') * 1000000;
case 6:
value += (input[len - 6] - '0') * 100000;
case 5:
value += (input[len - 5] - '0') * 10000;
case 4:
value += (input[len - 4] - '0') * 1000;
case 3:
value += (input[len - 3] - '0') * 100;
case 2:
value += (input[len - 2] - '0') * 10;
case 1:
value += (input[len - 1] - '0');
// check range
// valid 32bit int is from [-2147483648, 2147483647]
if (neg) {
if (value > (unsigned long)INT_MAX + 1) {
goto err;
} else {
*output = -value;
return true;
}
} else {
if (value > (unsigned long)INT_MAX) {
goto err;
}
*output = value;
return true;
}
default:
return false;
}
err:
*output = 0;
return false;
}
int chrom2int(const std::string& chrom) {
int b = 0;
if (hasLeadingChr(chrom)) b = 3;
......
......@@ -81,28 +81,7 @@ std::string toStringWithComma(int in);
// convert std::string to integer
// @return true if conversion succeed
inline bool str2int(const char* input, int* output) {
char* endptr;
long val;
errno = 0;
val = strtol(input, &endptr, 10);
if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN)) ||
(errno != 0 && val == 0)) {
#ifndef NDEBUG
perror("strtol");
#endif
return false;
}
if (endptr == input) {
// no digits found
return false;
}
*output = val;
return true;
}
bool str2int(const char* input, int* output);
// convert std::string to integer
// @return true if conversion succeed
......@@ -119,12 +98,28 @@ inline bool str2double(const char* input, double* output) {
errno = 0;
val = strtod(input, &endptr);
if ((errno == ERANGE && (val == HUGE_VALF || val == HUGE_VALL)) ||
(errno != 0 && val == 0.)) {
if (errno == ERANGE) {
#ifndef NDEBUG
fprintf(stderr, "Over/under flow happened: %s\n", input);
perror("strtod");
#endif
return false;
}
if (errno == EINVAL) {
// Ignore error here to avoid displaying:
// "strtod: Invalid argument" (issue #32)
// Reason: musl has different implementaiton of strtod,
// musl set errno = 22 in strtod("NA")
// glibc set errno = 0 in strtod("NA")
return false;
}
if (errno != 0 && val == 0.) {
#ifndef NDEBUG
fprintf(stderr, "Unknown conversion error happened: %s\n", input);
perror("strtod");
#endif
return false;
}
if (endptr == input) {
// no digits found
return false;
......
......@@ -23,7 +23,7 @@ echo "Meta-analysis (generate summary statistics)"
../executable/rvtest --pheno pheno --inVcf example.vcf --meta score,cov,dominant,recessive --covar covar --covar-name c1,c2 --useResidualAsPhenotype --inverseNormal --kinship output.kinship --out out8
## binary related individuals are supported, binary phenotypes are automatically recognized (e.g. `y4` is a binary trait)
../executable/rvtest --pheno pheno --inVcf example.vcf --meta score,cov --kinship output.kinship --pheno-name y4 --out out7b
## NOTE RECOMMENDED, just for the demonstration purpuse, binary traits can be analyzed by treating it as quantitative traits using '--qtl'
## NOT RECOMMENDED, this is just to demonstrate traits can be analyzed as quantitative traits using '--qtl'
../executable/rvtest --pheno pheno --inVcf example.vcf --meta score,cov --kinship output.kinship --pheno-name y4 --qtl --out out7q
echo "Rare-variant analysis"
......
#ifndef _BCFREADER_H_
#define _BCFREADER_H_
#include "base/RangeList.h"
#include "third/samtools/bcftools/bcf.h"
#include "RangeList.h"
// static void write_header(bcf_hdr_t *h);
......
#include "PeopleSet.h"
#include "IO.h"
#include "base/IO.h"
// class PeopleSet
void PeopleSet::readID(const char* allPeopleID) {
......
#ifndef _PEOPLESET_H_
#define _PEOPLESET_H_
#include "Utils.h"
#include <string>
#include <set>
#include <string>
#include <vector>
#include "base/Utils.h"
// this class provides which column to filtered (0-based index)
class PeopleSet {
......
#include "PlinkInputFile.h"
#include "SimpleMatrix.h"
#include "base/SimpleMatrix.h"
#ifndef UNUSED
#define UNUSED(x) (void)(x)
......
......@@ -2,6 +2,35 @@
#include "base/SimpleMatrix.h"
#include "libVcf/PlinkInputFile.h"
#include "libVcf/VCFHeader.h"
#include "libVcf/VCFRecord.h"
void PlinkOutputFile::init(const char* fnPrefix) {
std::string prefix = fnPrefix;
this->fpBed = fopen((prefix + ".bed").c_str(), "wb");
this->fpBim = fopen((prefix + ".bim").c_str(), "wt");
this->fpFam = fopen((prefix + ".fam").c_str(), "wt");
if (!this->fpBed || !this->fpBim || !this->fpFam) {
REPORT("Cannot create binary PLINK file!");
abort();
}
// write Bed header
char c;
// magic number
c = 0x6c; // 0b01101100;
fwrite(&c, sizeof(char), 1, this->fpBed);
c = 0x1b; // 0b00011011;
fwrite(&c, sizeof(char), 1, this->fpBed);
// snp major mode
c = 0x01; // 0b00000001;
fwrite(&c, sizeof(char), 1, this->fpBed);
}
void PlinkOutputFile::writeHeader(const VCFHeader* h) {
std::vector<std::string> people;
h->getPeopleName(&people);
this->writeFAM(people);
}
int PlinkOutputFile::isMultiAllelic(const char* r) {
if (strchr(r, ',') == NULL) return 0;
......
#ifndef _PLINKOUTPUTFILE_H_
#define _PLINKOUTPUTFILE_H_
#include "VCFUtil.h"
#include <string>
#include <vector>
class SimpleMatrix;
class PlinkInputFile;
class VCFHeader;
class VCFRecord;
/****************************/
/* Binary PLINK format */
......@@ -32,26 +35,7 @@ class PlinkOutputFile {
public:
PlinkOutputFile(const char* fnPrefix) { init(fnPrefix); }
PlinkOutputFile(const std::string& fnPrefix) { init(fnPrefix.c_str()); }
void init(const char* fnPrefix) {
std::string prefix = fnPrefix;
this->fpBed = fopen((prefix + ".bed").c_str(), "wb");
this->fpBim = fopen((prefix + ".bim").c_str(), "wt");
this->fpFam = fopen((prefix + ".fam").c_str(), "wt");
if (!this->fpBed || !this->fpBim || !this->fpFam) {
REPORT("Cannot create binary PLINK file!");
abort();
}
// write Bed header
char c;
// magic number
c = 0x6c; // 0b01101100;
fwrite(&c, sizeof(char), 1, this->fpBed);
c = 0x1b; // 0b00011011;
fwrite(&c, sizeof(char), 1, this->fpBed);
// snp major mode
c = 0x01; // 0b00000001;
fwrite(&c, sizeof(char), 1, this->fpBed);
}
void init(const char* fnPrefix);
~PlinkOutputFile() { close(); }
void close() {
if (this->fpFam) {
......@@ -67,11 +51,7 @@ class PlinkOutputFile {
this->fpBed = NULL;
}
}
void writeHeader(const VCFHeader* h) {
std::vector<std::string> people;
h->getPeopleName(&people);
this->writeFAM(people);
}
void writeHeader(const VCFHeader* h);
// @pos is from 0 to 3
void setGenotype(unsigned char* c, const int pos, const int geno) {
(*c) |= (geno << (pos << 1));
......
#include "SiteSet.h"
#include "IO.h"
#include "TypeConversion.h"
#include "base/IO.h"
#include "base/TypeConversion.h"
/**
* Load column 1 as chromosome, column 2 as position.
......
#ifndef _TABIXREADER_H_
#define _TABIXREADER_H_
#include "tabix.h"
#include "RangeList.h"
#include "base/RangeList.h"
#include "third/tabix/tabix.h"
class TabixReader {
public:
......
#ifndef _VCFFILTER_H_
#define _VCFFILTER_H_
#include "Regex.h"
#include "base/Regex.h"
class ParRegion;
/**
......
#ifndef _VCFHEADER_H_
#define _VCFHEADER_H_
#include "Utils.h"
#include <vector>
#include "base/Utils.h"
class VCFHeader {
public:
......
......@@ -2,8 +2,8 @@
#define _VCFINFO_H_
#include <string>
#include "OrderedMap.h"
#include "VCFValue.h"
#include "base/OrderedMap.h"
class VCFInfo {
public:
......
#ifndef _VCFRECORD_H_
#define _VCFRECORD_H_
#include "CommonFunction.h"
#include "Exception.h"
#include "IO.h"
#include "IndexMap.h"
#include "RangeList.h"
#include "Utils.h"
#include "VCFBuffer.h"
#include "VCFFunction.h"
#include "VCFHeader.h"
#include "VCFIndividual.h"
#include "VCFInfo.h"
#include "base/CommonFunction.h"
#include "base/Exception.h"
#include "base/IO.h"
#include "base/IndexMap.h"
#include "base/RangeList.h"
#include "base/Utils.h"
typedef IndexMap<VCFIndividual*> VCFPeople;
......@@ -100,15 +100,16 @@ class VCFRecord {
this->parsed[this->format.end] = '\0';
// now comes each individual genotype
unsigned int idx = 0; // peopleIdx
size_t idx = 0; // peopleIdx
const size_t numAllIndv = this->allIndv.size();
VCFIndividual* p;
VCFValue indv;
int beg = this->format.end + 1;
while ((ret = indv.parseTill(this->parsed, beg, '\t')) == 0) {
if (idx >= this->allIndv.size()) {
if (idx >= numAllIndv) {
fprintf(stderr,
"Expected %d individual but already have %d individual\n",
(int)this->allIndv.size(), idx);
(int)this->allIndv.size(), (int)idx);
fprintf(stderr, "VCF header have LESS people than VCF content!\n");
return -1;
}
......@@ -136,11 +137,11 @@ class VCFRecord {
if (idx > this->allIndv.size()) {
fprintf(stderr, "Expected %d individual but already have %d individual\n",
(int)this->allIndv.size(), idx);
(int)this->allIndv.size(), (int)idx);
REPORT("VCF header have MORE people than VCF content!");
} else if (idx < this->allIndv.size()) {
fprintf(stderr, "Expected %d individual but only have %d individual\n",
(int)this->allIndv.size(), idx);
(int)this->allIndv.size(), (int)idx);
REPORT("VCF header have LESS people than VCF content!");
return -1;
}
......@@ -157,6 +158,14 @@ class VCFRecord {
return;
}
for (unsigned int i = 9; i < sa.size(); i++) {
// do not allow empty name
if (sa[i].empty()) {
fprintf(stderr,
"One inddividual (column %d, or extra tab at the line end) has "
"an empty column header, please check file format\n",
(int)i);
exit(1);
}
int idx = i - 9;
VCFIndividual* p = new VCFIndividual;
this->allIndv[idx] = p;
......
#ifndef _VCFUTIL_H_
#define _VCFUTIL_H_
#include "Exception.h"
#include "RangeList.h"
#include "Utils.h"
#include "base/Exception.h"
#include "base/RangeList.h"
#include "base/Utils.h"
#include "VCFConstant.h"
#include "VCFFunction.h"
#include "VCFHeader.h"
#include "VCFInputFile.h"
#include "VCFExtractor.h"
#include "VCFOutputFile.h"
#include "PlinkOutputFile.h"
#include "libVcf/PlinkOutputFile.h"
#include "libVcf/VCFConstant.h"
#include "libVcf/VCFExtractor.h"
#include "libVcf/VCFFunction.h"
#include "libVcf/VCFHeader.h"
#include "libVcf/VCFInputFile.h"
#include "libVcf/VCFOutputFile.h"
#endif /* _VCFUTIL_H_ */
......@@ -3,10 +3,10 @@
#include <cassert>
#include <string>
#include "Exception.h"
#include "TypeConversion.h"
#include "VCFBuffer.h"
#include "VCFConstant.h"
#include "base/Exception.h"
#include "base/TypeConversion.h"
class FileWriter;
......
#pragma GCC diagnostic ignored "-Wint-in-bool-context"
#include "regression/BoltLMM.h"