Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
xiaowei zhan
RVTESTS
Commits
31df5d39
Commit
31df5d39
authored
Sep 05, 2017
by
zhanxw
Browse files
support kgg input
parent
c6022595
Changes
7
Hide whitespace changes
Inline
Side-by-side
libBgen/.gitignore
0 → 100644
View file @
31df5d39
*.o
*.a
libVcf/KGGInputFile.cpp
View file @
31df5d39
...
...
@@ -2,6 +2,7 @@
#include "base/Exception.h"
#include "base/IO.h"
#include "base/TypeConversion.h"
#ifndef UNUSED
#define UNUSED(x) (void)(x)
...
...
@@ -209,6 +210,143 @@ bool KGGInputFile::readRecord() {
return
true
;
}
//////////////////////////////////////////////////
// Sample inclusion/exclusion
void
KGGInputFile
::
setPeopleMask
(
const
std
::
string
&
s
,
bool
b
)
{
for
(
size_t
i
=
0
;
i
!=
indv
.
size
();
++
i
)
{
if
(
indv
[
i
]
==
s
)
{
sampleMask
[
i
]
=
b
;
}
}
buildEffectiveIndex
();
}
void
KGGInputFile
::
includePeople
(
const
std
::
string
&
s
)
{
setPeopleMask
(
s
,
false
);
}
void
KGGInputFile
::
includePeople
(
const
std
::
vector
<
std
::
string
>&
v
)
{
for
(
size_t
i
=
0
;
i
!=
v
.
size
();
++
i
)
{
includePeople
(
v
[
i
].
c_str
());
}
}
void
KGGInputFile
::
setPeopleMaskFromFile
(
const
char
*
fn
,
bool
b
)
{
if
(
!
fn
||
strlen
(
fn
)
==
0
)
{
return
;
}
LineReader
lr
(
fn
);
std
::
vector
<
std
::
string
>
fd
;
while
(
lr
.
readLineBySep
(
&
fd
,
"
\t
"
))
{
for
(
unsigned
int
i
=
0
;
i
<
fd
.
size
();
i
++
)
{
setPeopleMask
(
fd
[
i
].
c_str
(),
b
);
}
}
buildEffectiveIndex
();
}
void
KGGInputFile
::
includePeopleFromFile
(
const
char
*
fn
)
{
setPeopleMaskFromFile
(
fn
,
false
);
}
void
KGGInputFile
::
includeAllPeople
()
{
std
::
fill
(
sampleMask
.
begin
(),
sampleMask
.
end
(),
false
);
buildEffectiveIndex
();
}
void
KGGInputFile
::
excludePeople
(
const
std
::
string
&
s
)
{
setPeopleMask
(
s
,
true
);
}
void
KGGInputFile
::
excludePeople
(
const
std
::
vector
<
std
::
string
>&
v
)
{
for
(
size_t
i
=
0
;
i
!=
v
.
size
();
++
i
)
{
excludePeople
(
v
[
i
]);
}
}
void
KGGInputFile
::
excludePeopleFromFile
(
const
char
*
fn
)
{
setPeopleMaskFromFile
(
fn
,
true
);
}
void
KGGInputFile
::
excludeAllPeople
()
{
std
::
fill
(
sampleMask
.
begin
(),
sampleMask
.
end
(),
true
);
buildEffectiveIndex
();
}
//////////////////////////////////////////////////
// Adjust range collections
#if 0
void KGGInputFile::enableAutoMerge() { warnUnsupported("enableAutoMerge"); }
void KGGInputFile::disableAutoMerge() { warnUnsupported("disableAutoMerge"); }
// void clearRange();
void KGGInputFile::setRangeFile(const char* fn) {
warnUnsupported("setRangeFile");
}
// @param l is a string of range(s)
void KGGInputFile::setRange(const char* chrom, int begin, int end) {
warnUnsupported("setRange");
}
void KGGInputFile::setRange(const RangeList& rl) {
warnUnsupported("setRange");
}
void KGGInputFile::setRangeList(const std::string& l) {
warnUnsupported("setRangeList");
}
// this function the entry point for all function add/change region list
void KGGInputFile::setRangeList(const RangeList& rl) {
warnUnsupported("setRangeList");
}
void KGGInputFile::setRangeMode() { warnUnsupported("setRangeMode"); }
#endif
int
KGGInputFile
::
setSiteFile
(
const
std
::
string
&
fn
)
{
if
(
fn
.
empty
())
return
0
;
std
::
vector
<
std
::
string
>
fd
;
LineReader
lr
(
fn
);
int
pos
;
std
::
string
chromPos
;
while
(
lr
.
readLineBySep
(
&
fd
,
"
\t
"
))
{
if
(
fd
.
empty
())
continue
;
if
(
fd
[
0
].
find
(
':'
)
!=
std
::
string
::
npos
)
{
this
->
allowedSite
.
insert
(
fd
[
0
]);
continue
;
}
if
(
fd
.
size
()
>=
2
&&
str2int
(
fd
[
1
],
&
pos
)
&&
pos
>
0
)
{
chromPos
=
fd
[
0
];
chromPos
+=
':'
;
chromPos
+=
fd
[
1
];
this
->
allowedSite
.
insert
(
chromPos
);
continue
;
}
}
return
0
;
}
int
KGGInputFile
::
getNumEffectiveSample
()
const
{
size_t
ret
=
0
;
for
(
size_t
i
=
0
;
i
!=
sampleMask
.
size
();
++
i
)
{
if
(
sampleMask
[
i
])
continue
;
ret
++
;
}
return
ret
;
}
void
KGGInputFile
::
getIncludedSampleName
(
std
::
vector
<
std
::
string
>*
p
)
const
{
if
(
!
p
)
return
;
p
->
clear
();
for
(
size_t
i
=
0
;
i
!=
sampleMask
.
size
();
++
i
)
{
if
(
sampleMask
[
i
])
continue
;
p
->
push_back
(
getSampleName
()[
i
]);
}
}
void
KGGInputFile
::
buildEffectiveIndex
()
{
effectiveIndex
.
resize
(
0
);
const
size_t
N
=
getNumSample
();
for
(
size_t
i
=
0
;
i
!=
N
;
++
i
)
{
if
(
sampleMask
[
i
])
continue
;
effectiveIndex
.
push_back
(
i
);
}
}
int
KGGInputFile
::
getEffectiveIndex
(
int
idx
)
const
{
return
this
->
effectiveIndex
[
idx
];
}
int
KGGInputFile
::
getGenotype
(
int
indvIdx
)
{
const
int
nAllele
=
alt
[
variantIdx
].
size
()
+
1
;
...
...
@@ -266,3 +404,7 @@ void KGGInputFile::buildPhasedTable(int allele) {
m
[
val
].
x
[
0
]
=
-
9
;
m
[
val
].
x
[
1
]
=
-
9
;
}
void
KGGInputFile
::
warnUnsupported
(
const
char
*
tag
)
{
fprintf
(
stderr
,
"Please remove unsupported features related to %s"
,
tag
);
}
libVcf/KGGInputFile.h
View file @
31df5d39
...
...
@@ -2,6 +2,7 @@
#define _KGGINPUTFILE_H_
#include <map>
#include <set>
#include <string>
#include <vector>
...
...
@@ -17,6 +18,20 @@ class KGGInputFile {
// @return false if reached end
bool
readRecord
();
//////////////////////////////////////////////////
// Sample inclusion/exclusion
void
includePeople
(
const
std
::
string
&
s
);
void
includePeople
(
const
std
::
vector
<
std
::
string
>&
v
);
void
includePeopleFromFile
(
const
char
*
fn
);
void
includeAllPeople
();
void
excludePeople
(
const
std
::
string
&
s
);
void
excludePeople
(
const
std
::
vector
<
std
::
string
>&
v
);
void
excludePeopleFromFile
(
const
char
*
fn
);
void
excludeAllPeople
();
// No range related function
int
setSiteFile
(
const
std
::
string
&
fn
);
int
getGenotype
(
int
indvIdx
);
void
getAllele
(
int
indvIdx
,
int
*
a1
,
int
*
a2
);
...
...
@@ -40,9 +55,13 @@ class KGGInputFile {
}
int
getNumIndv
()
const
{
return
this
->
indv
.
size
();
}
int
getNumSample
()
const
{
return
this
->
indv
.
size
();
}
int
getNumEffectiveSample
()
const
;
int
getNumMarker
()
const
{
return
this
->
snp2Idx
.
size
();
}
const
std
::
vector
<
std
::
string
>&
getIndv
()
const
{
return
this
->
indv
;
}
const
std
::
vector
<
std
::
string
>&
getSampleName
()
const
{
return
this
->
indv
;
}
void
getIncludedSampleName
(
std
::
vector
<
std
::
string
>*
p
)
const
;
int
getEffectiveIndex
(
int
idx
)
const
;
const
std
::
vector
<
std
::
string
>&
getIID
()
const
{
return
this
->
indv
;
}
const
std
::
vector
<
std
::
string
>&
getChrom
()
const
{
return
this
->
chrom
;
}
const
std
::
vector
<
std
::
string
>&
getMarkerName
()
const
{
return
this
->
snp
;
}
...
...
@@ -71,6 +90,14 @@ class KGGInputFile {
void
buildUnphasedTable
(
int
numAllele
);
void
buildPhasedTable
(
int
numAllele
);
// sample inclusion/exclusion related
void
setPeopleMask
(
const
std
::
string
&
s
,
bool
b
);
void
setPeopleMaskFromFile
(
const
char
*
fn
,
bool
b
);
void
setRangeMode
();
// range list related
void
buildEffectiveIndex
();
void
warnUnsupported
(
const
char
*
tag
);
private:
typedef
struct
TwoChar
{
unsigned
char
x
[
2
];
}
TwoChar
;
std
::
map
<
std
::
string
,
int
>
snp2Idx
;
...
...
@@ -89,6 +116,11 @@ class KGGInputFile {
bool
phased
;
std
::
map
<
int
,
std
::
map
<
char
,
TwoChar
>
>
unphasedTable
;
std
::
map
<
int
,
std
::
map
<
char
,
TwoChar
>
>
phasedTable
;
std
::
vector
<
bool
>
sampleMask
;
// true means exclusion
std
::
vector
<
int
>
effectiveIndex
;
// allow chromosomal sites
std
::
set
<
std
::
string
>
allowedSite
;
};
#endif
/* _KGGINPUTFILE_H_ */
src/KGGGenotypeExtractor.cpp
0 → 100644
View file @
31df5d39
#include "KGGGenotypeExtractor.h"
#include "GenotypeCounter.h"
#include "Result.h"
#include "base/Argument.h"
#include "base/Logger.h"
#include "libVcf/KGGInputFile.h"
#include "libVcf/VCFConstant.h"
#include "libsrc/MathMatrix.h"
#include "libsrc/MathVector.h"
DECLARE_BOOL_PARAMETER
(
outputID
);
extern
Logger
*
logger
;
KGGGenotypeExtractor
::
KGGGenotypeExtractor
(
const
std
::
string
&
fn
)
:
GenotypeExtractor
(
fn
),
kggIn
(
NULL
),
altAlleleToParse
(
-
1
),
currentVariant
(
0
)
{
this
->
kggIn
=
new
KGGInputFile
(
fn
.
c_str
(),
".gz"
);
}
KGGGenotypeExtractor
::~
KGGGenotypeExtractor
()
{
if
(
this
->
kggIn
)
{
delete
this
->
kggIn
;
this
->
kggIn
=
NULL
;
}
}
int
KGGGenotypeExtractor
::
extractMultipleGenotype
(
Matrix
*
g
)
{
warnUnsupported
(
"extractMultipleGenotype"
);
return
-
1
;
#if 0
assert(g);
assert(g->rows >= 0 && g->cols >= 0);
g->Dimension(0, 0);
int row = 0;
this->genotype.clear();
this->altAlleleToParse = -1;
const bool useDosage = false; // TODO: later may support genotype calls
while (true) {
if (this->altAlleleToParse <= 0) {
if (!this->kggIn->readRecord()) {
// reached the end
break;
} else {
// const char* alt = this->kggIn->getKGGRecord().getAlt();
if (multiAllelicMode) {
// parseAltAllele(alt);
// this->altAlleleToParse = altAllele.size();
this->altAlleleToParse =
var.alleles.size() -
1; // num of alternative alleles to be parsed/processed
} else {
// this->altAllele.resize(1);
// this->altAllele[0] = alt;
this->altAlleleToParse = 1;
}
}
}
assert(this->altAlleleToParse > 0);
// parse alt allele one at a time
this->sampleSize = this->kggIn->getNumEffectiveSample();
row++;
this->variantName.resize(row);
this->counter.resize(row);
this->counter.back().reset();
this->hemiRegion.resize(row);
assert(this->parRegion);
const bool isHemiRegion =
this->parRegion->isHemiRegion(var.chrom.c_str(), var.pos);
// e.g.: Loop each (selected) people in the same order as in the KGG
double geno;
/// const int altAlleleGT = this->altAllele.size() - this->altAlleleToParse;
const int altAlleleGT =
var.alleles.size() -
this->altAlleleToParse; // when allele = K, alleleGT = 1, 2. ..., K-1
for (int i = 0; i < sampleSize; i++) {
// indv = people[i];
const int indvIdx = this->kggIn->getEffectiveIndex(i);
if (multiAllelicMode) {
geno = getGenotypeForAltAllele(var, indvIdx, useDosage, isHemiRegion,
(*sex)[i], altAlleleGT);
} else {
geno = getGenotype(var, indvIdx, useDosage, isHemiRegion, (*sex)[i]);
}
this->genotype.push_back(geno);
this->counter.back().add(geno);
} // end for i
// check frequency cutoffs
const double maf = counter.back().getMAF();
if ((this->freqMin > 0. && this->freqMin > maf) ||
(this->freqMax > 0. && this->freqMax < maf)) {
// undo loaded contents
row--;
this->variantName.resize(row);
this->counter.resize(row);
this->hemiRegion.resize(row);
this->genotype.resize(this->genotype.size() - this->sampleSize);
this->altAlleleToParse--;
continue;
}
this->variantName.back() = var.chrom;
this->variantName.back() += ":";
this->variantName.back() += var.pos;
if (multiAllelicMode) {
this->variantName.back() += var.alleles[0];
this->variantName.back() += "/";
this->variantName.back() +=
var.alleles[altAllele.size() - this->altAlleleToParse];
}
this->hemiRegion.back() = (isHemiRegion);
this->altAlleleToParse--;
} // end while (this->kggIn->readRecord())
// now transpose (marker by people -> people by marker)
if (row > 0) {
assert((int)genotype.size() == this->sampleSize * row);
assign(this->genotype, sampleSize, row, g);
for (int i = 0; i < row; ++i) {
g->SetColumnLabel(i, variantName[i].c_str());
}
}
return SUCCEED;
#endif
}
// end extractMultipleGenotype(Matrix* g)
int
KGGGenotypeExtractor
::
extractSingleGenotype
(
Matrix
*
g
,
Result
*
b
)
{
this
->
genotype
.
clear
();
Result
&
buf
=
*
b
;
const
bool
useDosage
=
true
;
int
numAltAllele
=
this
->
kggIn
->
getAlt
()[
currentVariant
].
size
();
if
(
this
->
altAlleleToParse
<=
0
)
{
if
(
!
this
->
kggIn
->
readRecord
())
{
return
FILE_END
;
}
else
{
if
(
multiAllelicMode
)
{
// parseAltAllele(alt);
this
->
altAlleleToParse
=
numAltAllele
-
1
;
}
else
{
// this->altAllele.resize(1);
// this->altAllele[0] = alt;
this
->
altAlleleToParse
=
1
;
}
}
}
assert
(
this
->
altAlleleToParse
>=
0
);
buf
.
updateValue
(
"CHROM"
,
this
->
kggIn
->
getChrom
()[
currentVariant
]);
buf
.
updateValue
(
"POS"
,
this
->
kggIn
->
getPosition
()[
currentVariant
]);
if
(
FLAG_outputID
)
{
buf
.
updateValue
(
"ID"
,
this
->
kggIn
->
getMarkerName
()[
currentVariant
]);
}
buf
.
updateValue
(
"REF"
,
this
->
kggIn
->
getRef
()[
currentVariant
]);
buf
.
updateValue
(
"ALT"
,
this
->
kggIn
->
getAlt
()[
currentVariant
]
[
numAltAllele
-
this
->
altAlleleToParse
]);
// genotype.Dimension(people.size(), 1);
this
->
sampleSize
=
this
->
kggIn
->
getNumEffectiveSample
();
this
->
variantName
.
resize
(
1
);
this
->
counter
.
resize
(
1
);
this
->
counter
.
back
().
reset
();
this
->
hemiRegion
.
resize
(
1
);
bool
isHemiRegion
=
this
->
parRegion
->
isHemiRegion
(
this
->
kggIn
->
getChrom
()[
currentVariant
].
c_str
(),
this
->
kggIn
->
getPosition
()[
currentVariant
]);
// e.g.: Loop each (selected) people in the same order as in the KGG
double
geno
;
// const int altAlleleGT = this->altAllele.size() - this->altAlleleToParse +
// 1;
const
int
altAlleleGT
=
numAltAllele
-
this
->
altAlleleToParse
;
for
(
int
i
=
0
;
i
<
sampleSize
;
i
++
)
{
// indv = people[i];
const
int
indvIdx
=
this
->
kggIn
->
getEffectiveIndex
(
i
);
if
(
multiAllelicMode
)
{
geno
=
getGenotypeForAltAllele
(
indvIdx
,
useDosage
,
isHemiRegion
,
(
*
sex
)[
i
],
altAlleleGT
);
}
else
{
geno
=
getGenotype
(
indvIdx
,
useDosage
,
isHemiRegion
,
(
*
sex
)[
i
]);
}
genotype
.
push_back
(
geno
);
counter
.
back
().
add
(
geno
);
}
// check frequency cutoffs
const
double
maf
=
counter
[
0
].
getMAF
();
if
((
this
->
freqMin
>
0.
&&
this
->
freqMin
>
maf
)
||
(
this
->
freqMax
>
0.
&&
this
->
freqMax
<
maf
))
{
--
this
->
altAlleleToParse
;
return
FAIL_FILTER
;
}
variantName
.
back
()
=
this
->
kggIn
->
getChrom
()[
currentVariant
];
variantName
.
back
()
+=
':'
;
variantName
.
back
()
+=
toString
(
this
->
kggIn
->
getPosition
()[
currentVariant
]);
hemiRegion
.
back
()
=
isHemiRegion
;
assert
((
int
)
genotype
.
size
()
==
sampleSize
);
assign
(
genotype
,
sampleSize
,
1
,
g
);
g
->
SetColumnLabel
(
0
,
variantName
.
back
().
c_str
());
--
this
->
altAlleleToParse
;
if
(
this
->
altAlleleToParse
==
0
)
{
++
currentVariant
;
}
return
SUCCEED
;
}
// end extractSingleGenotype()
#if 0
int loadMarkerFromKGG(const std::string& fileName, const std::string& marker,
std::vector<std::string>* rowLabel, Matrix* genotype) {
if (!rowLabel || !genotype) {
// invalid parameter
return -1;
}
Matrix& m = *genotype;
int col = 0;
KGGInputFile kggIn(fileName);
kggIn.setRangeList(marker);
while (kggIn.readRecord()) {
KGGRecord& r = kggIn.getKGGRecord();
KGGPeople& people = r.getPeople();
KGGIndividual* indv;
m.Dimension(people.size(), col + 1);
int GTidx = r.getFormatIndex("GT");
for (int i = 0; i < (int)people.size(); i++) {
indv = people[i];
// get GT index. if you are sure the index will not change,
// call this function only once!
if (GTidx >= 0) {
// printf("%s ", indv->justGet(0).toStr()); // [0] meaning the first
// field of each individual
m[i][col] = indv->justGet(GTidx).getGenotype();
} else {
logger->error("Cannot find GT field!");
return -1;
}
}
if (col == 0) {
// set-up names
rowLabel->resize(people.size());
for (size_t i = 0; i < people.size(); ++i) {
(*rowLabel)[i] = people[i]->getName();
}
}
std::string colLabel = r.getChrom();
colLabel += ":";
colLabel += r.getPosStr();
m.SetColumnLabel(col, colLabel.c_str());
++col;
}
return 0;
}
#endif
bool
KGGGenotypeExtractor
::
setSiteFreqMin
(
const
double
f
)
{
if
(
f
<
0.0
||
f
>
1.0
)
{
return
false
;
}
this
->
freqMin
=
f
-
1e-10
;
// allow rounding error
return
true
;
}
bool
KGGGenotypeExtractor
::
setSiteFreqMax
(
const
double
f
)
{
if
(
f
<
0.0
||
f
>
1.0
)
{
return
false
;
}
this
->
freqMax
=
f
+
1e-10
;
// allow rounding error
return
true
;
}
void
KGGGenotypeExtractor
::
setSiteDepthMin
(
int
d
)
{
warnUnsupported
(
"site depth"
);
}
void
KGGGenotypeExtractor
::
setSiteDepthMax
(
int
d
)
{
warnUnsupported
(
"site depth"
);
}
// // @return true if GD is valid
// // if GD is missing, we will take GD = 0
// bool KGGGenotypeExtractor::checkGD(KGGIndividual& indv, int gdIdx) {
// warnUnsupported("GD");
// return true;
// }
// bool KGGGenotypeExtractor::checkGQ(KGGIndividual& indv, int gqIdx) {
// warnUnsupported("GQ");
// return true;
// }
void
KGGGenotypeExtractor
::
setGDmin
(
int
m
)
{
warnUnsupported
(
"GD"
);
}
void
KGGGenotypeExtractor
::
setGDmax
(
int
m
)
{
warnUnsupported
(
"GD"
);
}
void
KGGGenotypeExtractor
::
setGQmin
(
int
m
)
{
warnUnsupported
(
"GQ"
);
}
void
KGGGenotypeExtractor
::
setGQmax
(
int
m
)
{
warnUnsupported
(
"GQ"
);
}
void
KGGGenotypeExtractor
::
setSiteFile
(
const
std
::
string
&
fn
)
{
this
->
kggIn
->
setSiteFile
(
fn
);
}
void
KGGGenotypeExtractor
::
setSiteQualMin
(
int
q
)
{
warnUnsupported
(
"SiteQual"
);
}
void
KGGGenotypeExtractor
::
setSiteMACMin
(
int
n
)
{
warnUnsupported
(
"SiteMAC"
);
}
int
KGGGenotypeExtractor
::
setAnnoType
(
const
std
::
string
&
s
)
{
warnUnsupported
(
"anno"
);
return
-
1
;
}
void
KGGGenotypeExtractor
::
setRange
(
const
RangeList
&
l
)
{
warnUnsupported
(
"setRange"
);
}
void
KGGGenotypeExtractor
::
setRangeList
(
const
std
::
string
&
l
)
{
warnUnsupported
(
"setRangeList"
);
}
void
KGGGenotypeExtractor
::
setRangeFile
(
const
std
::
string
&
fn
)
{
warnUnsupported
(
"setRangeFile"
);
}
void
KGGGenotypeExtractor
::
includePeople
(
const
std
::
string
&
v
)
{
this
->
kggIn
->
includePeople
(
v
.
c_str
());
}
void
KGGGenotypeExtractor
::
includePeople
(
const
std
::
vector
<
std
::
string
>&
v
)
{
this
->
kggIn
->
includePeople
(
v
);
}
void
KGGGenotypeExtractor
::
includePeopleFromFile
(
const
std
::
string
&
fn
)
{
this
->
kggIn
->
includePeopleFromFile
(
fn
.
c_str
());
}
void
KGGGenotypeExtractor
::
excludePeople
(
const
std
::
string
&
v
)
{
this
->
kggIn
->
excludePeople
(
v
.
c_str
());
}
void
KGGGenotypeExtractor
::
excludePeople
(
const
std
::
vector
<
std
::
string
>&
v
)
{
this
->
kggIn
->
excludePeople
(
v
);
}
void
KGGGenotypeExtractor
::
excludePeopleFromFile
(
const
std
::
string
&
fn
)
{
this
->
kggIn
->
excludePeopleFromFile
(
fn
.
c_str
());
}
void
KGGGenotypeExtractor
::
excludePeople
(
const
std
::
vector
<
std
::
string
>&
sample
,
const
std
::
vector
<
int
>&
index
)
{
for
(
size_t
i
=
0
;
i
<
index
.
size
();
++
i
)
{
this
->
excludePeople
(
sample
[
i
]);
}
}
void
KGGGenotypeExtractor
::
excludeAllPeople
()
{
this
->
kggIn
->
excludeAllPeople
();
}
void
KGGGenotypeExtractor
::
enableAutoMerge
()
{
warnUnsupported
(
"enableAutoMerge"
);
}
void
KGGGenotypeExtractor
::
getPeopleName
(
std
::
vector
<
std
::
string
>*
p
)
{
*
p
=
this
->
kggIn
->
getSampleName
();
return
;
}
void
KGGGenotypeExtractor
::
getIncludedPeopleName
(
std
::
vector
<
std
::
string
>*
p
)
const
{
this
->
kggIn
->
getIncludedSampleName
(
p
);
return
;
}
void
KGGGenotypeExtractor
::
parseAltAllele
(
const
char
*
s
)
{
stringTokenize
(
s
,
","
,
&
altAllele
);
}
double
KGGGenotypeExtractor
::
getGenotype
(
int
indvIdx
,
const
bool
useDosage
,
const
bool
hemiRegion
,
const
int
sex
)
{
const
int
idx
=
this
->
kggIn
->
getEffectiveIndex
(
indvIdx
);
double
ret
;
assert
(
!
useDosage
);
int
a1
,
a2
;
this
->
kggIn
->
getAllele
(
idx
,
&
a1
,
&
a2
);
if
(
a1
<
0
||
a2
<
0
)
{
return
MISSING_GENOTYPE
;
}
if
(
hemiRegion
&&
sex
==
PLINK_MALE
&&
a1
!=
a2
)
{
logger
->
error
(
"In hemizygous region, male have different alleles, treated as missing "