Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
xiaowei zhan
RVTESTS
Commits
96be9b56
Commit
96be9b56
authored
Aug 30, 2017
by
zhanxw
Browse files
add VCFGenotypeExtractor
parent
2fb11df8
Changes
2
Hide whitespace changes
Inline
Side-by-side
src/VCFGenotypeExtractor.cpp
0 → 100644
View file @
96be9b56
#include "VCFGenotypeExtractor.h"
#include "GenotypeCounter.h"
#include "Result.h"
#include "base/Argument.h"
#include "base/Logger.h"
#include "libVcf/VCFUtil.h"
#include "libsrc/MathMatrix.h"
#include "libsrc/MathVector.h"
DECLARE_BOOL_PARAMETER
(
outputID
);
extern
Logger
*
logger
;
VCFGenotypeExtractor
::
VCFGenotypeExtractor
(
const
std
::
string
&
fn
)
:
GenotypeExtractor
(
fn
),
vin
(
NULL
),
altAlleleToParse
(
-
1
)
{
this
->
vin
=
new
VCFExtractor
(
fn
.
c_str
());
}
VCFGenotypeExtractor
::~
VCFGenotypeExtractor
()
{
if
(
this
->
vin
)
{
delete
this
->
vin
;
this
->
vin
=
NULL
;
}
}
int
VCFGenotypeExtractor
::
extractMultipleGenotype
(
Matrix
*
g
)
{
assert
(
g
);
assert
(
g
->
rows
>=
0
&&
g
->
cols
>=
0
);
g
->
Dimension
(
0
,
0
);
int
row
=
0
;
this
->
genotype
.
clear
();
this
->
altAlleleToParse
=
-
1
;
while
(
true
)
{
if
(
this
->
altAlleleToParse
<=
0
)
{
if
(
!
this
->
vin
->
readRecord
())
{
// reached the end
break
;
}
else
{
const
char
*
alt
=
this
->
vin
->
getVCFRecord
().
getAlt
();
if
(
multiAllelicMode
)
{
parseAltAllele
(
alt
);
this
->
altAlleleToParse
=
altAllele
.
size
();
}
else
{
this
->
altAllele
.
resize
(
1
);
this
->
altAllele
[
0
]
=
alt
;
this
->
altAlleleToParse
=
1
;
}
}
}
assert
(
this
->
altAlleleToParse
>
0
);
// parse alt allele one at a time
VCFRecord
&
r
=
this
->
vin
->
getVCFRecord
();
VCFPeople
&
people
=
r
.
getPeople
();
VCFIndividual
*
indv
;
this
->
sampleSize
=
people
.
size
();
row
++
;
this
->
variantName
.
resize
(
row
);
this
->
counter
.
resize
(
row
);
this
->
counter
.
back
().
reset
();
this
->
hemiRegion
.
resize
(
row
);
// get GT index and cannot assume it is a constant across variants
int
genoIdx
;
const
bool
useDosage
=
(
!
this
->
dosageTag
.
empty
());
if
(
useDosage
)
{
genoIdx
=
r
.
getFormatIndex
(
dosageTag
.
c_str
());
}
else
{
genoIdx
=
r
.
getFormatIndex
(
"GT"
);
}
if
(
useDosage
&&
multiAllelicMode
)
{
logger
->
error
(
"Unsupported scenario: multiple mode and use dosage as "
"genotypes! - we will only use dosage"
);
this
->
altAlleleToParse
=
1
;
}
const
int
GDidx
=
r
.
getFormatIndex
(
"GD"
);
const
int
GQidx
=
r
.
getFormatIndex
(
"GQ"
);
assert
(
this
->
parRegion
);
const
bool
isHemiRegion
=
this
->
parRegion
->
isHemiRegion
(
r
.
getChrom
(),
r
.
getPos
());
// e.g.: Loop each (selected) people in the same order as in the VCF
double
geno
;
const
int
altAlleleGT
=
this
->
altAllele
.
size
()
-
this
->
altAlleleToParse
+
1
;
for
(
int
i
=
0
;
i
<
sampleSize
;
i
++
)
{
indv
=
people
[
i
];
if
(
multiAllelicMode
)
{
geno
=
getGenotypeForAltAllele
(
*
indv
,
useDosage
,
isHemiRegion
,
(
*
sex
)[
i
],
genoIdx
,
GDidx
,
GQidx
,
altAlleleGT
);
}
else
{
geno
=
getGenotype
(
*
indv
,
useDosage
,
isHemiRegion
,
(
*
sex
)[
i
],
genoIdx
,
GDidx
,
GQidx
);
}
this
->
genotype
.
push_back
(
geno
);
this
->
counter
.
back
().
add
(
geno
);
}
// end for i
// check frequency cutoffs
const
double
maf
=
counter
.
back
().
getMAF
();
if
((
this
->
freqMin
>
0.
&&
this
->
freqMin
>
maf
)
||
(
this
->
freqMax
>
0.
&&
this
->
freqMax
<
maf
))
{
// undo loaded contents
row
--
;
this
->
variantName
.
resize
(
row
);
this
->
counter
.
resize
(
row
);
this
->
hemiRegion
.
resize
(
row
);
this
->
genotype
.
resize
(
this
->
genotype
.
size
()
-
this
->
sampleSize
);
this
->
altAlleleToParse
--
;
continue
;
}
this
->
variantName
.
back
()
=
r
.
getChrom
();
this
->
variantName
.
back
()
+=
":"
;
this
->
variantName
.
back
()
+=
r
.
getPosStr
();
if
(
multiAllelicMode
)
{
this
->
variantName
.
back
()
+=
r
.
getRef
();
this
->
variantName
.
back
()
+=
"/"
;
this
->
variantName
.
back
()
+=
altAllele
[
altAllele
.
size
()
-
this
->
altAlleleToParse
];
}
this
->
hemiRegion
.
back
()
=
(
isHemiRegion
);
this
->
altAlleleToParse
--
;
}
// end while (this->vin->readRecord())
// now transpose (marker by people -> people by marker)
if
(
row
>
0
)
{
assert
((
int
)
genotype
.
size
()
==
this
->
sampleSize
*
row
);
assign
(
this
->
genotype
,
sampleSize
,
row
,
g
);
for
(
int
i
=
0
;
i
<
row
;
++
i
)
{
g
->
SetColumnLabel
(
i
,
variantName
[
i
].
c_str
());
}
}
return
SUCCEED
;
}
// end VCFGenotypeExtractor
int
VCFGenotypeExtractor
::
extractSingleGenotype
(
Matrix
*
g
,
Result
*
b
)
{
this
->
genotype
.
clear
();
Result
&
buf
=
*
b
;
if
(
this
->
altAlleleToParse
<=
0
)
{
if
(
!
this
->
vin
->
readRecord
())
{
return
FILE_END
;
}
else
{
const
char
*
alt
=
this
->
vin
->
getVCFRecord
().
getAlt
();
if
(
multiAllelicMode
)
{
parseAltAllele
(
alt
);
this
->
altAlleleToParse
=
altAllele
.
size
();
}
else
{
this
->
altAllele
.
resize
(
1
);
this
->
altAllele
[
0
]
=
alt
;
this
->
altAlleleToParse
=
1
;
}
}
}
assert
(
this
->
altAlleleToParse
>=
0
);
VCFRecord
&
r
=
this
->
vin
->
getVCFRecord
();
VCFPeople
&
people
=
r
.
getPeople
();
VCFIndividual
*
indv
;
buf
.
updateValue
(
"CHROM"
,
r
.
getChrom
());
buf
.
updateValue
(
"POS"
,
r
.
getPosStr
());
if
(
FLAG_outputID
)
{
buf
.
updateValue
(
"ID"
,
r
.
getID
());
}
buf
.
updateValue
(
"REF"
,
r
.
getRef
());
buf
.
updateValue
(
"ALT"
,
altAllele
[
altAllele
.
size
()
-
this
->
altAlleleToParse
]);
// genotype.Dimension(people.size(), 1);
this
->
sampleSize
=
people
.
size
();
this
->
variantName
.
resize
(
1
);
this
->
counter
.
resize
(
1
);
this
->
counter
.
back
().
reset
();
this
->
hemiRegion
.
resize
(
1
);
// get GT index. if you are sure the index will not change, call this
// function only once!
const
bool
useDosage
=
(
!
this
->
dosageTag
.
empty
());
int
genoIdx
;
if
(
useDosage
)
{
genoIdx
=
r
.
getFormatIndex
(
dosageTag
.
c_str
());
}
else
{
genoIdx
=
r
.
getFormatIndex
(
"GT"
);
}
const
int
GDidx
=
r
.
getFormatIndex
(
"GD"
);
const
int
GQidx
=
r
.
getFormatIndex
(
"GQ"
);
bool
isHemiRegion
=
this
->
parRegion
->
isHemiRegion
(
r
.
getChrom
(),
r
.
getPos
());
// e.g.: Loop each (selected) people in the same order as in the VCF
double
geno
;
const
int
altAlleleGT
=
this
->
altAllele
.
size
()
-
this
->
altAlleleToParse
+
1
;
for
(
int
i
=
0
;
i
<
sampleSize
;
i
++
)
{
indv
=
people
[
i
];
if
(
multiAllelicMode
)
{
geno
=
getGenotypeForAltAllele
(
*
indv
,
useDosage
,
isHemiRegion
,
(
*
sex
)[
i
],
genoIdx
,
GDidx
,
GQidx
,
altAlleleGT
);
}
else
{
geno
=
getGenotype
(
*
indv
,
useDosage
,
isHemiRegion
,
(
*
sex
)[
i
],
genoIdx
,
GDidx
,
GQidx
);
}
genotype
.
push_back
(
geno
);
counter
.
back
().
add
(
geno
);
}
// check frequency cutoffs
const
double
maf
=
counter
[
0
].
getMAF
();
if
((
this
->
freqMin
>
0.
&&
this
->
freqMin
>
maf
)
||
(
this
->
freqMax
>
0.
&&
this
->
freqMax
<
maf
))
{
--
this
->
altAlleleToParse
;
return
FAIL_FILTER
;
}
variantName
.
back
()
=
r
.
getChrom
();
variantName
.
back
()
+=
':'
;
variantName
.
back
()
+=
r
.
getPosStr
();
hemiRegion
.
back
()
=
isHemiRegion
;
assert
((
int
)
genotype
.
size
()
==
sampleSize
);
assign
(
genotype
,
sampleSize
,
1
,
g
);
g
->
SetColumnLabel
(
0
,
variantName
.
back
().
c_str
());
--
this
->
altAlleleToParse
;
return
SUCCEED
;
}
// end extractSingleGenotype()
int
loadMarkerFromVCF
(
const
std
::
string
&
fileName
,
const
std
::
string
&
marker
,
std
::
vector
<
std
::
string
>*
rowLabel
,
Matrix
*
genotype
)
{
if
(
!
rowLabel
||
!
genotype
)
{
// invalid parameter
return
-
1
;
}
Matrix
&
m
=
*
genotype
;
int
col
=
0
;
VCFInputFile
vin
(
fileName
);
vin
.
setRangeList
(
marker
);
while
(
vin
.
readRecord
())
{
VCFRecord
&
r
=
vin
.
getVCFRecord
();
VCFPeople
&
people
=
r
.
getPeople
();
VCFIndividual
*
indv
;
m
.
Dimension
(
people
.
size
(),
col
+
1
);
int
GTidx
=
r
.
getFormatIndex
(
"GT"
);
for
(
int
i
=
0
;
i
<
(
int
)
people
.
size
();
i
++
)
{
indv
=
people
[
i
];
// get GT index. if you are sure the index will not change,
// call this function only once!
if
(
GTidx
>=
0
)
{
// printf("%s ", indv->justGet(0).toStr()); // [0] meaning the first
// field of each individual
m
[
i
][
col
]
=
indv
->
justGet
(
GTidx
).
getGenotype
();
}
else
{
logger
->
error
(
"Cannot find GT field!"
);
return
-
1
;
}
}
if
(
col
==
0
)
{
// set-up names
rowLabel
->
resize
(
people
.
size
());
for
(
size_t
i
=
0
;
i
<
people
.
size
();
++
i
)
{
(
*
rowLabel
)[
i
]
=
people
[
i
]
->
getName
();
}
}
std
::
string
colLabel
=
r
.
getChrom
();
colLabel
+=
":"
;
colLabel
+=
r
.
getPosStr
();
m
.
SetColumnLabel
(
col
,
colLabel
.
c_str
());
++
col
;
}
return
0
;
}
bool
VCFGenotypeExtractor
::
setSiteFreqMin
(
const
double
f
)
{
if
(
f
<
0.0
||
f
>
1.0
)
{
return
false
;
}
this
->
freqMin
=
f
-
1e-10
;
// allow rounding error
return
true
;
}
bool
VCFGenotypeExtractor
::
setSiteFreqMax
(
const
double
f
)
{
if
(
f
<
0.0
||
f
>
1.0
)
{
return
false
;
}
this
->
freqMax
=
f
+
1e-10
;
// allow rounding error
return
true
;
}
void
VCFGenotypeExtractor
::
setSiteDepthMin
(
int
d
)
{
this
->
vin
->
setSiteDepthMin
(
d
);
}
void
VCFGenotypeExtractor
::
setSiteDepthMax
(
int
d
)
{
this
->
vin
->
setSiteDepthMax
(
d
);
}
// @return true if GD is valid
// if GD is missing, we will take GD = 0
bool
VCFGenotypeExtractor
::
checkGD
(
VCFIndividual
&
indv
,
int
gdIdx
)
{
if
(
!
needGD
)
return
true
;
int
gd
=
indv
.
justGet
(
gdIdx
).
toInt
();
if
(
this
->
GDmin
>
0
&&
gd
<
this
->
GDmin
)
return
false
;
if
(
this
->
GDmax
>
0
&&
gd
>
this
->
GDmax
)
return
false
;
return
true
;
}
bool
VCFGenotypeExtractor
::
checkGQ
(
VCFIndividual
&
indv
,
int
gqIdx
)
{
if
(
!
needGQ
)
return
true
;
int
gq
=
indv
.
justGet
(
gqIdx
).
toInt
();
if
(
this
->
GQmin
>
0
&&
gq
<
this
->
GQmin
)
return
false
;
if
(
this
->
GQmax
>
0
&&
gq
>
this
->
GQmax
)
return
false
;
return
true
;
}
void
VCFGenotypeExtractor
::
setGDmin
(
int
m
)
{
this
->
needGD
=
true
;
this
->
GDmin
=
m
;
}
void
VCFGenotypeExtractor
::
setGDmax
(
int
m
)
{
this
->
needGD
=
true
;
this
->
GDmax
=
m
;
}
void
VCFGenotypeExtractor
::
setGQmin
(
int
m
)
{
this
->
needGQ
=
true
;
this
->
GQmin
=
m
;
}
void
VCFGenotypeExtractor
::
setGQmax
(
int
m
)
{
this
->
needGQ
=
true
;
this
->
GQmax
=
m
;
}
void
VCFGenotypeExtractor
::
setSiteFile
(
const
std
::
string
&
fn
)
{
this
->
vin
->
setSiteFile
(
fn
);
}
void
VCFGenotypeExtractor
::
setSiteQualMin
(
int
q
)
{
this
->
vin
->
setSiteQualMin
(
q
);
}
void
VCFGenotypeExtractor
::
setSiteMACMin
(
int
n
)
{
this
->
vin
->
setSiteMACMin
(
n
);
}
int
VCFGenotypeExtractor
::
setAnnoType
(
const
std
::
string
&
s
)
{
return
this
->
vin
->
setAnnoType
(
s
.
c_str
());
}
void
VCFGenotypeExtractor
::
setRange
(
const
RangeList
&
l
)
{
this
->
vin
->
setRange
(
l
);
}
void
VCFGenotypeExtractor
::
setRangeList
(
const
std
::
string
&
l
)
{
this
->
vin
->
setRangeList
(
l
);
}
void
VCFGenotypeExtractor
::
setRangeFile
(
const
std
::
string
&
fn
)
{
this
->
vin
->
setRangeFile
(
fn
.
c_str
());
}
void
VCFGenotypeExtractor
::
includePeople
(
const
std
::
string
&
v
)
{
this
->
vin
->
includePeople
(
v
.
c_str
());
}
void
VCFGenotypeExtractor
::
includePeople
(
const
std
::
vector
<
std
::
string
>&
v
)
{
this
->
vin
->
includePeople
(
v
);
}
void
VCFGenotypeExtractor
::
includePeopleFromFile
(
const
std
::
string
&
fn
)
{
this
->
vin
->
includePeopleFromFile
(
fn
.
c_str
());
}
void
VCFGenotypeExtractor
::
excludePeople
(
const
std
::
string
&
v
)
{
this
->
vin
->
excludePeople
(
v
.
c_str
());
}
void
VCFGenotypeExtractor
::
excludePeople
(
const
std
::
vector
<
std
::
string
>&
v
)
{
this
->
vin
->
excludePeople
(
v
);
}
void
VCFGenotypeExtractor
::
excludePeopleFromFile
(
const
std
::
string
&
fn
)
{
this
->
vin
->
excludePeopleFromFile
(
fn
.
c_str
());
}
void
VCFGenotypeExtractor
::
excludePeople
(
const
std
::
vector
<
std
::
string
>&
sample
,
const
std
::
vector
<
int
>&
index
)
{
for
(
size_t
i
=
0
;
i
<
index
.
size
();
++
i
)
{
this
->
excludePeople
(
sample
[
i
]);
}
}
void
VCFGenotypeExtractor
::
excludeAllPeople
()
{
this
->
vin
->
excludeAllPeople
();
}
void
VCFGenotypeExtractor
::
enableAutoMerge
()
{
this
->
vin
->
enableAutoMerge
();
}
void
VCFGenotypeExtractor
::
getPeopleName
(
std
::
vector
<
std
::
string
>*
p
)
{
return
this
->
vin
->
getVCFHeader
()
->
getPeopleName
(
p
);
}
void
VCFGenotypeExtractor
::
getIncludedPeopleName
(
std
::
vector
<
std
::
string
>*
p
)
const
{
this
->
vin
->
getIncludedPeopleName
(
p
);
return
;
}
void
VCFGenotypeExtractor
::
parseAltAllele
(
const
char
*
s
)
{
stringTokenize
(
s
,
","
,
&
altAllele
);
}
double
VCFGenotypeExtractor
::
getGenotype
(
VCFIndividual
&
indv
,
const
bool
useDosage
,
const
bool
hemiRegion
,
const
int
sex
,
const
int
genoIdx
,
const
int
GDidx
,
const
int
GQidx
)
{
double
ret
;
if
(
genoIdx
>=
0
)
{
if
(
useDosage
)
{
if
(
!
hemiRegion
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
toDouble
());
}
else
{
// for male hemi region, imputated dosage is usually between 0 and 1
// need to multiply by 2.0
if
(
sex
==
PLINK_MALE
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
toDouble
()
*
2.0
);
}
else
{
ret
=
(
indv
.
justGet
(
genoIdx
).
toDouble
());
}
}
}
else
{
// use hard-coded genotypes
if
(
!
hemiRegion
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
getGenotype
());
}
else
{
if
(
sex
==
PLINK_MALE
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
getMaleNonParGenotype02
());
}
else
if
(
sex
==
PLINK_FEMALE
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
getGenotype
());
}
else
{
ret
=
(
MISSING_GENOTYPE
);
}
}
}
if
(
!
checkGD
(
indv
,
GDidx
)
||
!
checkGQ
(
indv
,
GQidx
))
{
return
MISSING_GENOTYPE
;
}
else
{
return
ret
;
}
}
else
{
logger
->
error
(
"Cannot find %s field!"
,
this
->
dosageTag
.
empty
()
?
"GT"
:
dosageTag
.
c_str
());
return
MISSING_GENOTYPE
;
}
}
double
VCFGenotypeExtractor
::
getGenotypeForAltAllele
(
VCFIndividual
&
indv
,
const
bool
useDosage
,
const
bool
hemiRegion
,
const
int
sex
,
const
int
genoIdx
,
const
int
GDidx
,
const
int
GQidx
,
const
int
alt
)
{
double
ret
;
if
(
genoIdx
>=
0
)
{
if
(
useDosage
)
{
if
(
!
hemiRegion
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
toDouble
());
}
else
{
// for male hemi region, imputated dosage is usually between 0 and 1
// need to multiply by 2.0
if
(
sex
==
PLINK_MALE
)
{
ret
=
(
indv
.
justGet
(
genoIdx
).
toDouble
()
*
2.0
);
}
else
{
ret
=
(
indv
.
justGet
(
genoIdx
).
toDouble
());
}
}
}
else
{
// use hard-coded genotypes
if
(
!
hemiRegion
)
{
ret
=
indv
.
justGet
(
genoIdx
).
countAltAllele
(
alt
);
}
else
{
if
(
sex
==
PLINK_MALE
)
{
// ret = (indv.justGet(genoIdx).getMaleNonParGenotype02());
ret
=
(
indv
.
justGet
(
genoIdx
).
countMaleNonParAltAllele2
(
alt
));
}
else
if
(
sex
==
PLINK_FEMALE
)
{
// ret = (indv.justGet(genoIdx).getGenotype());
ret
=
indv
.
justGet
(
genoIdx
).
countAltAllele
(
alt
);
}
else
{
ret
=
(
MISSING_GENOTYPE
);
}
}
}
if
(
!
checkGD
(
indv
,
GDidx
)
||
!
checkGQ
(
indv
,
GQidx
))
{
return
MISSING_GENOTYPE
;
}
else
{
return
ret
;
}
}
else
{
logger
->
error
(
"Cannot find %s field!"
,
this
->
dosageTag
.
empty
()
?
"GT"
:
dosageTag
.
c_str
());
return
MISSING_GENOTYPE
;
}
}
void
VCFGenotypeExtractor
::
assign
(
const
std
::
vector
<
double
>&
from
,
int
nrow
,
int
ncol
,
Matrix
*
to
)
{
assert
(
to
);
Matrix
&
out
=
*
to
;
out
.
Dimension
(
nrow
,
ncol
);
assert
((
int
)
from
.
size
()
==
nrow
*
ncol
);
for
(
int
i
=
0
;
i
<
nrow
;
++
i
)
{
for
(
int
j
=
0
;
j
<
ncol
;
++
j
)
{
out
[
i
][
j
]
=
from
[
nrow
*
j
+
i
];
}
}
}
src/VCFGenotypeExtractor.h
0 → 100644
View file @
96be9b56
#ifndef VCFGENOTYPEEXTRACTOR_H
#define VCFGENOTYPEEXTRACTOR_H
#include <string>
#include <vector>
#include "src/GenotypeExtractor.h"
// #include "base/ParRegion.h"
// #include "libsrc/MathMatrix.h"
// #include "libsrc/MathVector.h"
// class Matrix;
// class RangeList;
// class Result;
// class VCFExtractor;
// class VCFIndividual;
// class GenotypeCounter;
/**
* Extract genotype from file @param fileName at the marker @param marker,
* and store sample names in @param rowLabel and genotypes in @param genotype
* (dimension is numSample x 1)
* @return 0 if succeed
*/
int
loadMarkerFromVCF
(
const
std
::
string
&
fileName
,
const
std
::
string
&
marker
,
std
::
vector
<
std
::
string
>*
rowLabel
,
Matrix
*
genotype
);
class
VCFGenotypeExtractor
:
public
GenotypeExtractor
{
public:
explicit
VCFGenotypeExtractor
(
const
std
::
string
&
fn
);
virtual
~
VCFGenotypeExtractor
();
private:
VCFGenotypeExtractor
(
const
VCFGenotypeExtractor
&
);
VCFGenotypeExtractor
&
operator
=
(
const
VCFGenotypeExtractor
&
);
public:
/**
* @param g, store people by marker matrix
* @return 0 for success
*/
int
extractMultipleGenotype
(
Matrix
*
g
);
/**
* @return 0 for success
* @return -2 for reach end.
* @param g: people by 1 matrix, where column name is like "chr:pos"
* @param b: extract information, e.g. "1\t100\tA\tC"
*/
int
extractSingleGenotype
(
Matrix
*
g
,
Result
*
b
);
/* Site filters */
bool
setSiteFreqMin
(
const
double
f
);
bool
setSiteFreqMax
(
const
double
f
);
void
setSiteDepthMin
(
int
d
);
void
setSiteDepthMax
(
int
d
);
// @return true if GD is valid
// if GD is missing, we will take GD = 0
bool
checkGD
(
VCFIndividual
&
indv
,
int
gdIdx
);
bool
checkGQ
(
VCFIndividual
&
indv
,
int
gqIdx
);
void
setGDmin
(
int
m
);
void
setGDmax
(
int
m
);
void
setGQmin
(
int
m
);
void
setGQmax
(
int
m
);
void
setSiteFile
(
const
std
::
string
&
fn
);
void
setSiteQualMin
(
int
q
);
void
setSiteMACMin
(
int
n
);
int
setAnnoType
(
const
std
::
string
&
s
);
void
setRange
(
const
RangeList
&
l
);
void
setRangeList
(
const
std
::
string
&
l
);
void
setRangeFile
(
const
std
::
string
&
fn
);
void
includePeople
(
const
std
::
string
&
v
);
void
includePeople
(
const
std
::
vector
<
std
::
string
>&
v
);
void
includePeopleFromFile
(
const
std
::
string
&
fn
);
void
excludePeople
(
const
std
::
string
&
v
);
void
excludePeopleFromFile
(
const
std
::
string
&
fn
);
void
excludePeople
(
const
std
::
vector
<
std
::
string
>&
sample
);
void
excludePeople
(
const
std
::
vector
<
std
::
string
>&
sample
,
const
std
::
vector
<
int
>&
index
);
void
excludeAllPeople
();
void
enableAutoMerge
();
void
getPeopleName
(
std
::
vector
<
std
::
string
>*
p
);
void
getIncludedPeopleName
(
std
::
vector
<
std
::
string
>*
p
)
const
;
const
std
::
vector
<
GenotypeCounter
>&
getGenotypeCounter
()
const
{
return
this
->
counter
;
}
/**
* @return weigth, its length equals to # of markers
*/
// std::vector<double>& getWeight() { return this->weight; };
void
setDosageTag
(
const
std
::
string
&
tag
)
{
if
(
tag
.
empty
())
return
;
this
->
dosageTag
=
tag
;
}
void
unsetDosageTag
()
{
this
->
dosageTag
.
clear
();
}
bool
isDosage
()
const
{
return
!
this
->
dosageTag
.
empty
();
}
void
setParRegion
(
ParRegion
*
p
)
{
this
->
parRegion
=
p
;
}
// Sex (1=male; 2=female; other=unknown)
void
setSex
(
const
std
::
vector
<
int
>*
sex
)
{
this
->
sex
=
sex
;
}
// coding male chromX as 0/2 instead of 0/1
// similarly, for dosage, just multiply 2.0 from original dosage
// void enableClaytonCoding() { this->claytonCoding = true; }
// void disableClaytonCoding() { this->claytonCoding = false; }