Commit 48cb7f3e authored by zhanxw's avatar zhanxw
Browse files

clean codes, speed up metacov

parent 8fd40db3
......@@ -55,7 +55,7 @@ GONCALO_LIB_DBG = $(ROOT)/libsrc/lib-dbg-goncalo.a
##################################################
## Common compiler options
CXX ?= g++
DEFAULT_CXXFLAGS = -D__STDC_LIMIT_MACROS -std=c++0x -Wall -Wno-unused-function $(OPENMP_FLAG) -DGIT_VERSION="\"$(GIT_VERSION)\""
DEFAULT_CXXFLAGS = -D__STDC_LIMIT_MACROS -std=c++0x -Wall -Wno-unused-function $(OPENMP_FLAG) -DGIT_VERSION="\"$(GIT_VERSION)\"" -Wno-error=int-in-bool-context
INCLUDE = $(THIRD_INC) $(REGRESSION_INC) $(VCF_INC) $(BASE_INC) $(GONCALO_INC)
LIB = $(REGRESSION_LIB) $(VCF_LIB) $(BASE_LIB) $(GONCALO_LIB) $(THIRD_LIB)
......
#ifndef _RINGMEMORYPOOL_H_
#define _RINGMEMORYPOOL_H_
#include <assert.h>
#include <string.h>
#include <vector>
class RingMemoryPool {
public:
RingMemoryPool() {
capacity_ = 0;
base_ = 0;
head_ = 0;
tail_ = 0;
numElementsInChunk_ = 0;
}
RingMemoryPool(int numElementsInChunk, int numChunk = 64)
: numElementsInChunk_(numElementsInChunk) {
memory_.resize(numElementsInChunk * numChunk);
capacity_ = numChunk;
base_ = 0;
head_ = 0;
tail_ = 0;
}
int allocate() {
if (tail_ == capacity_) {
const size_t oldSize = capacity_;
capacity_ = 2 * oldSize;
memory_.resize(capacity_ * numElementsInChunk_);
if (head_ != 0) { // need to move data from head_ to capacity
// move [head ... oldSize ] to [ (head+oldSize) ... capacity_];
memcpy(memory_.data() + (head_ + oldSize) * numElementsInChunk_,
memory_.data() + head_ * numElementsInChunk_,
sizeof(float) * (oldSize - head_) * numElementsInChunk_);
base_ += oldSize;
}
}
const int idx = tail_;
++tail_;
return idx;
}
void deallocate(int idx) {
assert(idx == head_); // only dealloc the head elment
if (head_ < tail_) {
++head_;
}
assert(head_ <= tail_);
}
float* chunk(int idx) {
if (idx >= tail_ || idx < head_) {
return NULL;
}
const int pos = base_ + idx;
float* ret = NULL;
if (pos < capacity_) {
ret = memory_.data() + pos * numElementsInChunk_;
} else {
ret = memory_.data() + (pos - capacity_) * numElementsInChunk_;
}
return ret;
}
// internal data may be not kept
void setChunkSize(const int s) {
assert(s >= 0);
if (s == numElementsInChunk_) {
return;
}
numElementsInChunk_ = s;
if (memory_.size() == 0) {
capacity_ = 64;
memory_.resize(s * capacity_);
base_ = head_ = tail_ = 0;
} else if (memory_.size() % s == 0) {
capacity_ = memory_.size() / s;
base_ = head_ = tail_ = 0;
} else {
memory_.resize(s * (memory_.size() / s));
capacity_ = memory_.size() / s;
base_ = head_ = tail_ = 0;
}
}
private:
std::vector<float> memory_; // hold some memory
int capacity_; // how many indice can be hold
int base_; // the index for the first element
int head_; // index of chunk
int tail_; // index of chunk
int numElementsInChunk_;
};
#endif /* _RINGMEMORYPOOL_H_ */
EXE = testLogger testIO testIONet testRangeList testUtils testRegex testSimpleMatrix \
testPedigree testKinship testTabixReader testParRegion testKinshipToKinInbcoef \
testCommonFunction testTypeConversion testSimpleTimer testProfiler testVersionChecker \
testSocket testHttp testIndexer testSimpleString \
testSocket testHttp testIndexer testSimpleString testRingMemoryPool\
Argument_Example_1 Argument_Example_2
all: $(EXE) testArgument
debug: all
......@@ -44,7 +44,8 @@ check: kinship
./testTypeConversion > test.TypeConversion.output && diff test.TypeConversion.output test.TypeConversion.correct
./testSimpleTimer
./testIndexer
./testSimpleString
./testSimpleString
./testRingMemoryPool
echo "All tests passed!"
kinship:
......
#include <stdio.h>
#include <stdlib.h>
#include "base/RingMemoryPool.h"
int main() {
float* p;
{
RingMemoryPool mp(100, 64);
for (int i = 0; i < 32; ++i) {
int idx = mp.allocate();
p = mp.chunk(idx);
for (int j = 0; j < 100; ++j) {
p[j] = j;
}
}
for (int i = 0; i < 32; ++i) {
mp.deallocate(i);
}
}
{
RingMemoryPool mp(10, 64);
for (int i = 0; i < 128; ++i) {
int idx = mp.allocate();
p = mp.chunk(idx);
for (int j = 0; j < 10; ++j) {
p[j] = j + 1;
}
}
// expect sum = 128 * (1+2+ ...+10)
float sum = 0;
for (int i = 0; i < 128; ++i) {
p = mp.chunk(i);
float subSum = 0;
for (int j = 0; j < 10; ++j) {
subSum += p[j];
}
assert((int)subSum == 55);
sum += subSum;
}
assert(int(sum) == 128 * 55);
for (int i = 0; i < 128; ++i) {
mp.deallocate(i);
}
}
{
RingMemoryPool mp(4, 2);
for (int i = 0; i < 8; ++i) {
int idx = mp.allocate();
p = mp.chunk(idx);
for (int j = 0; j < 4; ++j) {
p[j] = rand() % 1024;
}
}
for (int i = 0; i < 8; ++i) {
mp.deallocate(i);
}
float expectedSum = 0.0;
for (int i = 0; i < 8; ++i) {
int idx = mp.allocate();
assert(idx == 8 + i);
p = mp.chunk(idx);
for (int j = 0; j < 4; ++j) {
p[j] = rand() % 1024;
expectedSum += p[j];
// printf("set chunk = %d, offset = %d, value = %g, expectedSum = %g
// \n",
// idx, j, p[j], expectedSum);
}
}
// expect sum = 128 * (1+2+ ...+10)
float sum = 0;
for (int i = 0; i < 8; ++i) {
p = mp.chunk(i);
assert(p == NULL);
}
for (int i = 8; i < 8 + 8; ++i) { // since we have allocated 8 times before
p = mp.chunk(i);
for (int j = 0; j < 4; ++j) {
sum += p[j];
// printf("get chunk = %d, offset = %d, value = %g, sum = %g\n", i, j,
// p[j], sum);
}
}
assert(sum == expectedSum);
for (int i = 0; i < 8; ++i) {
mp.deallocate(8 + i);
}
}
}
......@@ -12,6 +12,7 @@
#include "libsrc/Random.h"
#include "regression/EigenMatrix.h"
#include "regression/EigenMatrixInterface.h"
#include "regression/MatrixRef.h"
// 0: no debug info
// 1: some debug info
......@@ -763,6 +764,32 @@ class BoltLMM::BoltLMMImpl {
// scale
(*out) *= xVx_xx_ratio_;
}
void GetCovXX(const FloatMatrixRef& g1, const FloatMatrixRef& g2,
float* out) {
assert(g1.nrow_ == g2.nrow_ && g1.ncol_ == g2.ncol_);
assert(g1.nrow_ == N_);
REF_TO_EIGEN(g1, g1E);
REF_TO_EIGEN(g2, g2E);
Eigen::MatrixXf g(N_ + C_, 2); //
// copy in data
// for (size_t i = 0; i != N_; ++i) {
// g(i, 0) = g1[i];
// g(i, 1) = g2[i];
// }
g.col(0).head(N_) = g1E.col(0);
g.col(1).head(N_) = g2E.col(0);
// project
pl.projectCovariate(&g);
// calculate
(*out) = projDot(g.col(0), g.col(1))(0);
// scale
(*out) *= xVx_xx_ratio_;
}
private:
int EstimateHeritability() {
......@@ -1382,6 +1409,10 @@ void BoltLMM::GetCovXX(const std::vector<double>& g1,
const std::vector<double>& g2, double* out) {
impl_->GetCovXX(g1, g2, out);
}
void BoltLMM::GetCovXX(const FloatMatrixRef& g1, const FloatMatrixRef& g2,
float* out) {
impl_->GetCovXX(g1, g2, out);
}
//////////////////////////////////////////////////
// BoltLMM::BoltLMMImpl class
......
......@@ -5,6 +5,7 @@
#include <vector>
class Matrix;
class FloatMatrixRef;
class BoltLMM {
class BoltLMMImpl;
......@@ -33,6 +34,7 @@ class BoltLMM {
// calculate covariances
void GetCovXX(const std::vector<double>& g1, const std::vector<double>& g2,
double* out);
void GetCovXX(const FloatMatrixRef& g1, const FloatMatrixRef& g2, float* out);
private:
// don't copy
......
......@@ -16,7 +16,7 @@ void G_to_Eigen(Matrix& GM, Eigen::MatrixXf* EigenM);
void Eigen_to_G(const Eigen::MatrixXd& EigenM, Matrix* GM);
void Eigen_to_G(const Eigen::VectorXd& EigenV, Vector* GV);
void G_to_Eigen(Vector& GV,
Eigen::VectorXd* EigenV); // convert to n by 1 matrix
Eigen::VectorXd* EigenV); // convert to n by 1 matrix (double)
void G_to_Eigen(Matrix& GM, Eigen::MatrixXd* EigenM);
// EigenM = cbind( GM1, GM2 )
......
......@@ -535,6 +535,22 @@ class FastLMM::Impl {
.sum() /
this->sigma2;
}
void GetCovXX(FloatMatrixRef& g1, FloatMatrixRef& g2,
const EigenMatrix& kinshipU, const EigenMatrix& kinshipS,
float* out) {
// const int n = g1.size();
// Eigen::Map<const Eigen::MatrixXd> g1D(g1.data(), n, 1);
// Eigen::MatrixXf g1E = g1D.cast<float>();
// Eigen::Map<const Eigen::MatrixXd> g2D(g2.data(), n, 1);
// Eigen::MatrixXf g2E = g2D.cast<float>();
REF_TO_EIGEN(g1, g1E);
REF_TO_EIGEN(g2, g2E);
*out =
(g1E.array() * (this->lambda.array() + delta).inverse() * g2E.array())
.sum() /
this->sigma2;
}
void GetCovXZ(const std::vector<double>& g, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS, std::vector<double>* out) {
// const Eigen::MatrixXf& U = kinshipU.mat;
......@@ -554,6 +570,29 @@ class FastLMM::Impl {
(*out)[i] = (double)res(0, i);
}
}
void GetCovXZ(FloatMatrixRef& g, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS, FloatMatrixRef& out) {
// // const Eigen::MatrixXf& U = kinshipU.mat;
// const int n = g.size();
// Eigen::Map<const Eigen::MatrixXd> gD(g.data(), n, 1);
// Eigen::MatrixXf gE = gD.cast<float>();
REF_TO_EIGEN(g, gE);
REF_TO_EIGEN(out, gOut);
// res: 1 by nCov matrix
// const int nCov = ux.cols();
assert(gOut.rows() == 1 && gOut.cols() == ux.cols());
// Eigen::MatrixXf res =
gOut = gE.transpose() *
(this->lambda.array() + delta).inverse().matrix().asDiagonal() * ux /
sigma2;
// out->resize(nCov);
// for (int i = 0; i < nCov; ++i) {
// (*out)[i] = (double)res(0, i);
// }
}
int TransformCentered(std::vector<double>* geno, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
......@@ -569,6 +608,21 @@ class FastLMM::Impl {
gD = g.cast<double>();
return 0;
}
int TransformCentered(FloatMatrixRef& geno, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
// // cast type double to float
// int n = geno->size();
// Eigen::Map<Eigen::MatrixXd> gD(geno->data(), n, 1);
// Eigen::MatrixXf g = gD.cast<float>();
REF_TO_EIGEN(geno, g);
Eigen::RowVectorXf g_mean = g.colwise().mean();
const Eigen::MatrixXf& U = kinshipU.mat;
g = U.transpose() * (g.rowwise() - g_mean);
// // cast type back
// gD = g.cast<double>();
return 0;
}
int Transform(std::vector<double>* geno, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
// type conversion
......@@ -582,6 +636,20 @@ class FastLMM::Impl {
gD = g.cast<double>();
return 0;
}
int Transform(FloatMatrixRef& geno, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
// // type conversion
// int n = geno->size();
// Eigen::Map<Eigen::MatrixXd> gD(geno->data(), n, 1);
// Eigen::MatrixXf g = gD.cast<float>();
REF_TO_EIGEN(geno, g);
const Eigen::MatrixXf& U = kinshipU.mat;
g = U.transpose() * g;
// // cast type back
// gD = g.cast<double>();
return 0;
}
int GetWeight(Vector* out) const {
const int n = lambda.rows();
out->Dimension(n);
......@@ -705,20 +773,37 @@ void FastLMM::GetCovXX(const std::vector<double>& g1,
double* out) {
return this->impl->GetCovXX(g1, g2, kinshipU, kinshipS, out);
}
void FastLMM::GetCovXX(FloatMatrixRef& g1, FloatMatrixRef& g2,
const EigenMatrix& kinshipU, const EigenMatrix& kinshipS,
float* out) {
return this->impl->GetCovXX(g1, g2, kinshipU, kinshipS, out);
}
void FastLMM::GetCovXZ(const std::vector<double>& g,
const EigenMatrix& kinshipU, const EigenMatrix& kinshipS,
std::vector<double>* out) {
return this->impl->GetCovXZ(g, kinshipU, kinshipS, out);
}
void FastLMM::GetCovXZ(FloatMatrixRef& g, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS, FloatMatrixRef& out) {
return this->impl->GetCovXZ(g, kinshipU, kinshipS, out);
}
int FastLMM::TransformCentered(std::vector<double>* x,
const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
return this->impl->TransformCentered(x, kinshipU, kinshipS);
}
int FastLMM::TransformCentered(FloatMatrixRef& x, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
return this->impl->TransformCentered(x, kinshipU, kinshipS);
}
int FastLMM::Transform(std::vector<double>* x, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
return this->impl->Transform(x, kinshipU, kinshipS);
}
int FastLMM::Transform(FloatMatrixRef& x, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS) {
return this->impl->Transform(x, kinshipU, kinshipS);
}
int FastLMM::GetWeight(Vector* out) const { return this->impl->GetWeight(out); }
void FastLMM::disableCenterGenotype() { this->impl->disableCenterGenotype(); }
// need to negaive the MLE to minize it
......
......@@ -2,6 +2,7 @@
#define _FASTLMM_H_
#include <vector>
#include "regression/MatrixRef.h"
class EigenMatrix;
class Matrix;
......@@ -68,17 +69,27 @@ class FastLMM {
void GetCovXX(const std::vector<double>& g1, const std::vector<double>& g2,
const EigenMatrix& kinshipU, const EigenMatrix& kinshipS,
double* out);
void GetCovXX(FloatMatrixRef& g1, FloatMatrixRef& g2,
const EigenMatrix& kinshipU, const EigenMatrix& kinshipS,
float* out);
// NOTE: here assume @param g is transformed. e.g. U' * g
void GetCovXZ(const std::vector<double>& g, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS, std::vector<double>* out);
void GetCovXZ(FloatMatrixRef& g, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS, FloatMatrixRef& out);
// transform genotype (e.g. in MetaCov)
// x <- U' * ( x - center(x) )
int TransformCentered(std::vector<double>* x, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS);
int TransformCentered(FloatMatrixRef& x, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS);
// transform genotype (e.g. in MetaCov)
// x <- U' * x
int Transform(std::vector<double>* x, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS);
int Transform(FloatMatrixRef& geno, const EigenMatrix& kinshipU,
const EigenMatrix& kinshipS);
// @param out = sigma2_g * (lambda + delta) = sigma2_g * lambda + sigma2_e;
int GetWeight(Vector* out) const;
......
......@@ -16,6 +16,7 @@ EXE = testGSLIntegration \
testFastMultipleTraitScoreTest.NA \
testBoltLMM \
testGSLMinimizer \
testMatrixRef \
all: $(EXE)
debug: $(EXE)
......
......@@ -36,7 +36,7 @@ BASE += SingleDummy
OBJ = $(BASE:%=%.o)
OBJ_DBG = $(BASE:%=%_dbg.o)
DEFAULT_CXXFLAGS ?= -std=c++0x -I. $(addprefix -I, $(THIRD_INC)) -DGIT_VERSION="\"$(GIT_VERSION)\"" -fopenmp
DEFAULT_CXXFLAGS ?= -std=c++0x -I. $(addprefix -I, $(THIRD_INC)) -DGIT_VERSION="\"$(GIT_VERSION)\"" -fopenmp -Wno-error=int-in-bool-context
# enable read over HTTP and FTP
DEFAULT_CXXFLAGS += -D_USE_KNETFILE
......
......@@ -358,6 +358,13 @@ int obtainB(double alpha, double* out) {
return 0;
}
int obtainB(float alpha, float* out) {
double ret;
obtainB((double)alpha, &ret);
*out = ret;
return 0;
}
void makeVariableThreshodlGenotype(
DataConsolidator* dc, Matrix& in, Matrix* out, std::vector<double>* freqOut,
void (*collapseFunc)(DataConsolidator*, Matrix&, const std::vector<int>&,
......@@ -392,12 +399,629 @@ void MetaScoreTest::MetaFamBinary::calculateB() {
// fprintf(stderr, "alpha = %g, b = %g\n", alpha, b);
return;
}
void MetaCovTest::MetaCovFamBinary::calculateB() {
void MetaScoreTest::MetaUnrelatedBinary::calculateB() {
obtainB(this->alpha, &this->b);
return;
}
void MetaScoreTest::MetaUnrelatedBinary::calculateB() {
//////////////////////////////////////////////////
// MetaCov
//////////////////////////////////////////////////
class MetaCovBase {
public:
MetaCovBase() : needToFitNullModel(true) {}
virtual ~MetaCovBase() {}
virtual int FitNullModel(Matrix& genotype, DataConsolidator* dc) = 0;
// virtual int transformGenotype(Genotype* out, DataConsolidator* dc) = 0;
// virtual int calculateXX(const Genotype& x1, const Genotype& x2,
// float* covXX) = 0;
// virtual int calculateXZ(const Genotype& x, std::vector<float>* covXZ) = 0;
virtual int transformGenotype(FloatMatrixRef& out, DataConsolidator* dc) = 0;
virtual int calculateXX(FloatMatrixRef& x1, FloatMatrixRef& x2,
float* covXX) = 0;
virtual int calculateXZ(FloatMatrixRef& inGeno, FloatMatrixRef& outXZ) = 0;
virtual int calculateZZ(Matrix* covZZ) = 0;
bool needToFitNullModel;
bool hemiRegion;
protected:
Matrix cov;
};
class MetaCovFamQtl : public MetaCovBase {
public:
MetaCovFamQtl() : metaCov(FastLMM::SCORE, FastLMM::MLE) {}
int FitNullModel(Matrix& genotype, DataConsolidator* dc) {
Matrix& phenotype = dc->getPhenotype();
Matrix& covariate = dc->getCovariate();
copyCovariateAndIntercept(genotype.rows, covariate, &cov);
bool fitOK;
if (!hemiRegion) {
fitOK = (0 ==
metaCov.FitNullModel(cov, phenotype, *dc->getKinshipUForAuto(),
*dc->getKinshipSForAuto()));
this->U = dc->getKinshipUForAuto();
this->S = dc->getKinshipSForAuto();
} else {
if (!dc->hasKinshipForX()) {
fitOK = false;
return -1;
}
fitOK = (0 ==
metaCov.FitNullModel(cov, phenotype, *dc->getKinshipUForX(),
*dc->getKinshipSForX()));
this->U = dc->getKinshipUForX();
this->S = dc->getKinshipSForX();
}
if (fitOK) {
needToFitNullModel = false;
return 0;
}
return -1;
}
int transformGenotype(FloatMatrixRef& geno, DataConsolidator* dc) {
if (!hemiRegion) {
metaCov.TransformCentered(geno, *dc->getKinshipUForAuto(),
*dc->getKinshipSForAuto());
} else {
metaCov.TransformCentered(geno, *dc->getKinshipUForX(),
*dc->getKinshipSForX());
}
return 0;
}
int calculateXX(FloatMatrixRef& g1, FloatMatrixRef& g2, float* covXX) {
// const int nSample = g1.size();
metaCov.GetCovXX(g1, g2, *U, *S, covXX);
// (*covXX) /= nSample;
return 0;
}
int calculateXZ(FloatMatrixRef& g, FloatMatrixRef& covXZ) {
metaCov.GetCovXZ(g, *U, *S, covXZ);
return 0;
}
int calculateZZ(Matrix* covZZ) {
metaCov.GetCovZZ(covZZ);
return 0;
}
private:
const EigenMatrix* U;
const EigenMatrix* S;
FastLMM metaCov;
}; // class MetaCovFamQtl
class MetaCovUnrelatedQtl : public MetaCovBase {
int FitNullModel(Matrix& genotype, DataConsolidator* dc) {
Matrix& phenotype = dc->getPhenotype();
Matrix& covariate = dc->getCovariate();
copyCovariateAndIntercept(genotype.rows, covariate, &cov);
bool fitOK = linear.FitLinearModel(cov, phenotype);
if (!fitOK) {
return -1;
}