Commit 98ae3b65 authored by Raquel Bromberg's avatar Raquel Bromberg
Browse files

cm.cpp and mctr.h sped up: In mctr, get_aa(...) replaced wtih 2 static arrays,...

cm.cpp and mctr.h sped up: In mctr, get_aa(...) replaced wtih 2 static arrays, vector .at() calls replaced by [], increment_bits_it0(...) rewritten with get_score(...) function called only once per sequence.
parent 85b8053f
......@@ -48,7 +48,7 @@ int main(int argc, char* argv[])
cout<<"Calling run on real data"<<endl;
mctr m(full_path,"",fs,fo,partial_run);
m.run_it0();
cout<<"Calling run on scrambled data"<<endl;
mctr mscr(full_path,"scr",fs,fo,partial_run);
mscr.run_it0();
......
......@@ -23,6 +23,7 @@ Writes the histograms for each pair to DATA/
using namespace std;
const int TAGLENGTH=20;
const bool MCTR_DEBUG=false;
const bool GENEIDRUN=true;
const int CON_MAP_CUTOFF=3; //"cmc" variable.
......@@ -67,6 +68,11 @@ class mctr
int offset;
int recursion_counter;
ofstream logstream;
char array_c[][TAGLENGTH]; //20 is the tag length.
int *gnums_arr;
int *gene_ids_arr;
vector<string> array;
vector<int> gnums;
vector<int> gene_ids;
......@@ -78,6 +84,8 @@ class mctr
int run_type; //0=normal run, 1=ref_set_run, 2=final pair run
bool partial_run;
char partial_run_lead;
int get_aa_int[30];
char get_aa_char[20];
//Variables necessary for HGT correction on subsequent passes through mctr.
int prob1; //ordinal of one of the 2 problematic proteomes in the whole run.
......@@ -120,19 +128,23 @@ class mctr
int calculate_max_bit_sum();
void open_info_file(ifstream& infostream);
void create_directories();
char get_aa_char(int i);
char get_aa_char_fn(int i);
void set_bitsv();
void set_get_aa_arrays();
void set_array_it0(int i);
void set_array_it1(int i);
void set_array_it2(int i);
void find_matches_it0(int col, int first_row, int last_row, string seq);//, ofstream& alalstream, ofstream& alstream);
void find_matches_it1(int col, int first_row, int last_row, string seq);//, ofstream& alalstream, ofstream& alstream);
void find_matches_it2(int col, int first_row, int last_row, string seq);//, ofstream& alalstream, ofstream& alstream);
void increment_bits_it0(int first_row, int last_row,string seq);//,ofstream& alalstream, ofstream& alstream);
void increment_bits_it1(int first_row, int last_row,string seq);//,ofstream& alalstream, ofstream& alstream);
void increment_bits_it2(int first_row, int last_row,string seq);//,ofstream& alalstream, ofstream& alstream);
int set_gh(bool gh[], vector<int>& gnums, int first_row, int last_row);
void set_score_array(double * score_array,string seq,int count,int gen_ord);
void find_matches_it0(int col, int first_row, int last_row, string seq);
void find_matches_it1(int col, int first_row, int last_row, string seq);
void find_matches_it2(int col, int first_row, int last_row, string seq);
void increment_bits_it0(int first_row, int last_row,string seq);
void increment_bits_it1(int first_row, int last_row,string seq);
void increment_bits_it2(int first_row, int last_row,string seq);
int get_aa(char aa);
void write_out_data_it0();
void write_out_data_it1();
......@@ -140,7 +152,6 @@ class mctr
void read_in_files();
double get_score(int i1,int i2, string seq);
double dither(double d);
void populate_bitterv();
int get_bitterv_index(int i1, int i2);
int get_sos_cutoff();
......@@ -169,6 +180,7 @@ mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int fi
cout<<"mctr::mctr(string,string,int,int)"<<endl;
run_type=0;
partial_run=partial_run_par;
set_get_aa_arrays();
//main variables.
u.extract_paths(full_path_par,path,bfn);
......@@ -226,6 +238,7 @@ mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int fi
//Constructor 2. Pair run against reference.
mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par, int prob1_par, int prob2_par,vector<string>& ref_tags_par)
{
set_get_aa_arrays();
run_type=1;
u.extract_paths(full_path_par,path,bfn);
u.get_rss(path+"/"+bfn+"/"+bfn+"_info.txt",refsetsize);
......@@ -301,6 +314,8 @@ mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int fi
//Constructor 3. Pair run against just itself.
mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par, int prob1_par, int prob2_par)
{
set_get_aa_arrays();
if(MCTR_DEBUG)
{
cout<<"mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par, int prob1_par, int prob2_par)"<<endl;
......@@ -352,7 +367,6 @@ mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int fi
recursion_counter=0;
}
void mctr::run_it0()
{
if(MCTR_DEBUG)
......@@ -369,15 +383,18 @@ void mctr::run_it0()
gene_ids.clear();
set_array_it0(i);
cout<<"First 3 rows of array: "<<endl;
cout<<array.at(0)<<" "<<gnums.at(0)<<" "<<gene_ids.at(0)<<endl;
cout<<array.at(1)<<" "<<gnums.at(1)<<" "<<gene_ids.at(1)<<endl;
cout<<array.at(2)<<" "<<gnums.at(2)<<" "<<gene_ids.at(2)<<endl;
//cout<<array.at(0)<<" "<<gnums.at(0)<<" "<<gene_ids.at(0)<<endl;
//cout<<array.at(1)<<" "<<gnums.at(1)<<" "<<gene_ids.at(1)<<endl;
//cout<<array.at(2)<<" "<<gnums.at(2)<<" "<<gene_ids.at(2)<<endl;
cout<<array[0]<<" "<<gnums[0]<<" "<<gene_ids[0]<<endl;
cout<<array[1]<<" "<<gnums[1]<<" "<<gene_ids[1]<<endl;
cout<<array[2]<<" "<<gnums[2]<<" "<<gene_ids[2]<<endl;
cout<<"Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<" "<<gnums.at(gnums.size()-1)<<" "<<gene_ids.at(gene_ids.size()-1)<<endl;
cout<<array[array.size()-1]<<" "<<gnums[gnums.size()-1]<<" "<<gene_ids[gene_ids.size()-1]<<endl;
cout<<"Second to last row of array: "<<endl;
cout<<array.at(array.size()-2)<<endl;
cout<<array[array.size()-2]<<endl;
cout<<"Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<endl;
cout<<array[array.size()-1]<<endl;
cout<<"Entering recursive function find_matches_it0(...)..."<<endl;
find_matches_it0(0,0,array.size()-1,"");//, aastr, astr);
cout<<"Escaped recursive function find_matches(...)!"<<endl;
......@@ -388,17 +405,18 @@ void mctr::run_it0()
array.clear();
gnums.clear();
gene_ids.clear();
set_array_it0(aas.get_aa(partial_run_lead));
// set_array_it0(aas.get_aa(partial_run_lead));
set_array_it0(get_aa_int[(int)partial_run_lead-65]);
cout<<"First 3 rows of array: "<<endl;
cout<<array.at(0)<<" "<<gnums.at(0)<<" "<<gene_ids.at(0)<<endl;
cout<<array.at(1)<<" "<<gnums.at(1)<<" "<<gene_ids.at(1)<<endl;
cout<<array.at(2)<<" "<<gnums.at(2)<<" "<<gene_ids.at(2)<<endl;
cout<<array[0]<<" "<<gnums[0]<<" "<<gene_ids[0]<<endl;
cout<<array[1]<<" "<<gnums[1]<<" "<<gene_ids[1]<<endl;
cout<<array[2]<<" "<<gnums[2]<<" "<<gene_ids[2]<<endl;
cout<<"Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<" "<<gnums.at(gnums.size()-1)<<" "<<gene_ids.at(gene_ids.size()-1)<<endl;
cout<<array[array.size()-1]<<" "<<gnums[gnums.size()-1]<<" "<<gene_ids[gene_ids.size()-1]<<endl;
cout<<"Second to last row of array: "<<endl;
cout<<array.at(array.size()-2)<<endl;
cout<<array[array.size()-2]<<endl;
cout<<"Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<endl;
cout<<array[array.size()-1]<<endl;
cout<<"Entering recursive function find_matches_it0(...)..."<<endl;
find_matches_it0(0,0,array.size()-1,"");//, aastr, astr);
cout<<"Escaped recursive function find_matches(...)!"<<endl;
......@@ -485,9 +503,9 @@ void mctr::run_it2()
if(MCTR_DEBUG)
{
cout<<prob1<<" "<<prob2<<" Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<" "<<gnums.at(gnums.size()-1)<<" "<<gene_ids.at(gene_ids.size()-1)<<endl;
cout<<array[array.size()-1]<<" "<<gnums[gnums.size()-1]<<" "<<gene_ids[gene_ids.size()-1]<<endl;
cout<<"Second to last row of array: "<<endl;
cout<<array.at(array.size()-2)<<" "<<gnums.at(gnums.size()-2)<<" "<<gene_ids.at(gene_ids.size()-2)<<endl;
cout<<array[array.size()-2]<<" "<<gnums[gnums.size()-2]<<" "<<gene_ids[gene_ids.size()-2]<<endl;
cout<<prob1<<" "<<prob2<<" "<<msc<<endl;
}
......@@ -793,10 +811,11 @@ void mctr::set_array_it1(int i)
logstream<<"void mctr::set_array_it1(int i)"<<endl;
logstream<<"i="<<i<<endl;
char caa = aas.get_aa(i);
// char caa = aas.get_aa(i);
char caa = get_aa_char[i];
string rs=ref_tags.at(ref_index); //set to 0 in constructor.
string ps=problem_tags.at(ppv_index);
string rs=ref_tags[ref_index]; //set to 0 in constructor.
string ps=problem_tags[ppv_index];
if(MCTR_DEBUG)
{
......@@ -847,7 +866,8 @@ void mctr::set_array_it1(int i)
ref_index++;
if(ref_index<ref_tags.size())
{
rs=ref_tags.at(ref_index);
// rs=ref_tags.at(ref_index);
rs=ref_tags[ref_index];
}
}
else
......@@ -863,7 +883,8 @@ void mctr::set_array_it1(int i)
ppv_index++;
if(ppv_index<problem_tags.size())
{
ps=problem_tags.at(ppv_index);
// ps=problem_tags.at(ppv_index);
ps=problem_tags[ppv_index];
}
}
}
......@@ -906,7 +927,8 @@ void mctr::set_array_it1(int i)
ref_index++;
if(ref_index<ref_tags.size())
{
rs=ref_tags.at(ref_index);
// rs=ref_tags.at(ref_index);
rs=ref_tags[ref_index];
}
}
while(ps[0]==caa && ppv_index<problem_tags.size())
......@@ -922,7 +944,8 @@ void mctr::set_array_it1(int i)
ppv_index++;
if(ppv_index<problem_tags.size())
{
ps=problem_tags.at(ppv_index);
// ps=problem_tags.at(ppv_index);
ps=problem_tags[ppv_index];
}
}
......@@ -947,8 +970,12 @@ set_array_2()
void mctr::set_array_it2(int i)
{
logstream<<"In mctr_optim1::set_array(int i). int i = "<<i<<endl;
char caa = aas.get_aa(i);
string ps=problem_tags.at(ppv_index);
// char caa = aas.get_aa(i);
char caa=get_aa_char[i];
// string ps=problem_tags.at(ppv_index);
string ps=problem_tags[ppv_index];
while(ps[0]==caa && ppv_index<problem_tags.size())
{
......@@ -963,7 +990,8 @@ void mctr::set_array_it2(int i)
ppv_index++;
if(ppv_index<problem_tags.size())
{
ps=problem_tags.at(ppv_index);
// ps=problem_tags.at(ppv_index);
ps=problem_tags[ppv_index];
}
}
......@@ -982,15 +1010,18 @@ void mctr::find_matches_it0(int col, int first_row, int last_row, string seq)
if(MCTR_DEBUG)
{
cout<<"find_matches_it0(...): col = "<<col<<" first_row = "<<first_row<<" last_row = "<<last_row<<" seq = "<<seq<<endl;
cout<<"array.at(first_row) = " <<array.at(first_row)<<endl;
cout<<"array.at(last_row) = "<<array.at(last_row)<<endl;
//cout<<"array.at(first_row) = " <<array.at(first_row)<<endl;
cout<<"array.at(first_row) = " <<array[first_row]<<endl;
// cout<<"array.at(last_row) = "<<array.at(last_row)<<endl;
cout<<"array.at(last_row) = "<<array[last_row]<<endl;
}
recursion_counter++;
if(recursion_counter%100000==0)
{
cout<<"first_row = "<<first_row<<endl;
cout<<array.at(first_row)<<" "<<gnums.at(first_row)<<" "<<gene_ids.at(first_row)<<endl;
// cout<<array.at(first_row)<<" "<<gnums.at(first_row)<<" "<<gene_ids.at(first_row)<<endl;
cout<<array[first_row]<<" "<<gnums[first_row]<<" "<<gene_ids[first_row]<<endl;
}
//IF WE'VE ONLY GOT ONE SEQUENCE
......@@ -1009,15 +1040,18 @@ void mctr::find_matches_it0(int col, int first_row, int last_row, string seq)
else
{
//SHORTCUT: IF THE ENTIRE BLOCK HAS THE SAME LEADING CHARACTER
if(array.at(last_row)[col]==array.at(first_row)[col])
// if(array.at(last_row)[col]==array.at(first_row)[col])
if(array[last_row][col]==array[first_row][col])
{
int aa_index=get_aa(array.at(first_row)[col]);
// int aa_index=get_aa(array.at(first_row)[col]);
// int aa_index=get_aa_int[(int)(array.at(first_row)[col])-65];
int aa_index=get_aa_int[(int)(array[first_row][col])-65];
//IF THE SINGLE-CHARACTER COL IS NOT A ^ COLUMN
if(aa_index>=0 && aa_index<20)
{
find_matches_it0(col+1,first_row,last_row,seq+array.at(first_row)[col]);
increment_bits_it0(first_row,last_row,seq+array.at(first_row)[col]);
find_matches_it0(col+1,first_row,last_row,seq+array[first_row][col]);
increment_bits_it0(first_row,last_row,seq+array[first_row][col]);
}
}
......@@ -1029,14 +1063,16 @@ void mctr::find_matches_it0(int col, int first_row, int last_row, string seq)
while(index_start<last_row)
{
char leader=array.at(index_start)[col];
// char leader=array.at(index_start)[col];
char leader=array[index_start][col];
while(index_end<=last_row && array.at(index_end)[col]==leader)
while(index_end<=last_row && array[index_end][col]==leader)
{
index_end++;
}
int aa_index = get_aa(leader);
// int aa_index = get_aa(leader);
int aa_index = get_aa_int[(int)leader-65];
//IF THEY'RE THE SAME VALUE (SHOULD NEVER HAPPEN)
if(index_end==index_start)
......@@ -1073,8 +1109,10 @@ void mctr::find_matches_it1(int col, int first_row, int last_row, string seq)
if(MCTR_DEBUG)
{
cout<<"col = "<<col<<" first_row = "<<first_row<<" last_row = "<<last_row<<" seq = "<<seq<<endl;
cout<<"array.at(first_row) = " <<array.at(first_row)<<endl;
cout<<"array.at(last_row) = "<<array.at(last_row)<<endl;
// cout<<"array.at(first_row) = " <<array.at(first_row)<<endl;
// cout<<"array.at(last_row) = "<<array.at(last_row)<<endl;
cout<<"array.at(first_row) = " <<array[first_row]<<endl;
cout<<"array.at(last_row) = "<<array[last_row]<<endl;
}
recursion_counter++;
......@@ -1095,15 +1133,20 @@ void mctr::find_matches_it1(int col, int first_row, int last_row, string seq)
else
{
//SHORTCUT: IF THE ENTIRE BLOCK HAS THE SAME LEADING CHARACTER
if(array.at(last_row)[col]==array.at(first_row)[col])
// if(array.at(last_row)[col]==array.at(first_row)[col])
if(array[last_row][col]==array[first_row][col])
{
int aa_index=get_aa(array.at(first_row)[col]);
// int aa_index=get_aa(array.at(first_row)[col]);
// int aa_index=get_aa_int[(int)(array.at(first_row)[col])-65];
int aa_index=get_aa_int[(int)(array[first_row][col])-65];
//IF THE SINGLE-CHARACTER COL IS NOT A ^ COLUMN
if(aa_index>=0 && aa_index<20)
{
find_matches_it1(col+1,first_row,last_row,seq+array.at(first_row)[col]);
increment_bits_it1(first_row,last_row,seq+array.at(first_row)[col]);
// find_matches_it1(col+1,first_row,last_row,seq+array.at(first_row)[col]);
// increment_bits_it1(first_row,last_row,seq+array.at(first_row)[col]);
find_matches_it1(col+1,first_row,last_row,seq+array[first_row][col]);
increment_bits_it1(first_row,last_row,seq+array[first_row][col]);
}
}//end of shortcut
......@@ -1115,14 +1158,16 @@ void mctr::find_matches_it1(int col, int first_row, int last_row, string seq)
while(index_start<last_row)
{
char leader=array.at(index_start)[col];
// char leader=array.at(index_start)[col];
char leader=array[index_start][col];
while(index_end<=last_row && array.at(index_end)[col]==leader)
while(index_end<=last_row && array[index_end][col]==leader)
{
index_end++;
}
int aa_index = get_aa(leader);
// int aa_index = get_aa(leader);
int aa_index = get_aa_int[(int)leader-65];
//IF THEY'RE THE SAME VALUE (SHOULD NEVER HAPPEN)
if(index_end==index_start)
......@@ -1152,15 +1197,18 @@ void mctr::find_matches_it2(int col, int first_row, int last_row, string seq)//,
if(MCTR_DEBUG)
{
cout<<"col = "<<col<<" first_row = "<<first_row<<" last_row = "<<last_row<<" seq = "<<seq<<endl;
cout<<"array.at(first_row) = " <<array.at(first_row)<<endl;
cout<<"array.at(last_row) = "<<array.at(last_row)<<endl;
// cout<<"array.at(first_row) = " <<array.at(first_row)<<endl;
// cout<<"array.at(last_row) = "<<array.at(last_row)<<endl;
cout<<"array.at(first_row) = " <<array[first_row]<<endl;
cout<<"array.at(last_row) = "<<array[last_row]<<endl;
}
recursion_counter++;
if(recursion_counter%100000==0)
{
cout<<"first_row = "<<first_row<<" for prob1="<<prob1<<" prob2="<<prob2<<endl;
cout<<array.at(first_row)<<" "<<gnums.at(first_row)<<" "<<gene_ids.at(first_row)<<endl;
// cout<<array.at(first_row)<<" "<<gnums.at(first_row)<<" "<<gene_ids.at(first_row)<<endl;
cout<<array[first_row]<<" "<<gnums[first_row]<<" "<<gene_ids[first_row]<<endl;
}
//IF WE'VE ONLY GOT ONE SEQUENCE
......@@ -1179,15 +1227,20 @@ void mctr::find_matches_it2(int col, int first_row, int last_row, string seq)//,
else
{
//SHORTCUT: IF THE ENTIRE BLOCK HAS THE SAME LEADING CHARACTER
if(array.at(last_row)[col]==array.at(first_row)[col])
// if(array.at(last_row)[col]==array.at(first_row)[col])
if(array[last_row][col]==array[first_row][col])
{
int aa_index=get_aa(array.at(first_row)[col]);
// int aa_index=get_aa(array.at(first_row)[col]);
// int aa_index=get_aa_int[(int)array.at(first_row)[col]-65];
int aa_index=get_aa_int[(int)array[first_row][col]-65];
//IF THE SINGLE-CHARACTER COL IS NOT A ^ COLUMN
if(aa_index>=0 && aa_index<20)
{
find_matches_it2(col+1,first_row,last_row,seq+array.at(first_row)[col]);
increment_bits_it2(first_row,last_row,seq+array.at(first_row)[col]);
// find_matches_it2(col+1,first_row,last_row,seq+array.at(first_row)[col]);
// increment_bits_it2(first_row,last_row,seq+array.at(first_row)[col]);
find_matches_it2(col+1,first_row,last_row,seq+array[first_row][col]);
increment_bits_it2(first_row,last_row,seq+array[first_row][col]);
}
}//end of shortcut
......@@ -1199,14 +1252,15 @@ void mctr::find_matches_it2(int col, int first_row, int last_row, string seq)//,
while(index_start<last_row)
{
char leader=array.at(index_start)[col];
char leader=array[index_start][col];
while(index_end<=last_row && array.at(index_end)[col]==leader)
while(index_end<=last_row && array[index_end][col]==leader)
{
index_end++;
}
int aa_index = get_aa(leader);
// int aa_index = get_aa(leader);
int aa_index = get_aa_int[(int)leader-65];
//IF THEY'RE THE SAME VALUE (SHOULD NEVER HAPPEN)
if(index_end==index_start)
......@@ -1241,17 +1295,38 @@ void mctr::increment_bits_it0(int first_row, int last_row,string seq)//,ofstream
cout<<"first_row="<<first_row<<" last_row="<<last_row<<" seq="<<seq<<endl;
}
set<int> gnum_holder; //using a set to deal with repeats within the same organism (sets are non-redundant)
bool store_gene_ids=false; //Used for storing gene numbers in bitterv because their score >= match_score_cutoff. Even when msc is true,
bool * gh; //replacing gnum_holder.
gh = new bool[number_of_genomes];
// cout<<"number of genomes="<<number_of_genomes<<endl;
for(int i=0; i<number_of_genomes; i++)
{
gh[i]=false;
}
bool full=false;
set<int> gnum_diag_hit;
pair<set<int>::iterator,bool> ret;
int num_uniq_genomes = set_gh(gh,gnums,first_row,last_row);
// cout<<"Top of increment_bits_it0..."<<endl;
// cout<<"size of gh = number_of_genomes = "<<number_of_genomes<<endl;
// cout<<"num_uniq_genomes = "<<num_uniq_genomes<<endl;
// cout<<"Contents of gh:"<<endl;
// for(int i=0; i<number_of_genomes; i++)
// {
/// cout<<i<<" "<<gh[i]<<endl;
// }
// set<int> gnum_holder; //using a set to deal with repeats within the same organism (sets are non-redundant)
//bool store_gene_ids=false; //Used for storing gene numbers in bitterv because their score >= match_score_cutoff.
// bool full=false;
//set<int> gnum_diag_hit;
//pair<set<int>::iterator,bool> ret;
//Store all ordinal IDs in a set.
for(int i=first_row; i<=last_row && !full; i++)
/* for(int i=first_row; i<=last_row && !full; i++)
{
int gnum=gnums.at(i);
// int gnum=gnums.at(i);
int gnum=gnums[i];
ret = gnum_holder.insert(gnum);
if(gnum_holder.size()>=number_of_genomes && gnum_diag_hit.size()>=number_of_genomes)
{
......@@ -1262,69 +1337,202 @@ void mctr::increment_bits_it0(int first_row, int last_row,string seq)//,ofstream
gnum_diag_hit.insert(gnum);
}
}
*/
if(MCTR_DEBUG)
{
cout<<"In increment_bits_it0(...)"<<endl;
cout<<"first_row = "<<first_row<<" last_row = "<<last_row<<" seq = "<<seq<<endl;
cout<<"ret = ";
set<int>::iterator it;
for(it=gnum_holder.begin(); it!=gnum_holder.end(); it++)
// cout<<"ret = ";
//set<int>::iterator it;
/* for(it=gnum_holder.begin(); it!=gnum_holder.end(); it++)
{
cout<<*it<<" ";
}
cout<<endl;
*/
//cout<<endl;
}
//Case 1: No matches between organisms or within organisms.
if(gnum_holder.size()<=1 && gnum_diag_hit.size()==0)
// if(gnum_holder.size()<=1 && gnum_diag_hit.size()==0)
if(num_uniq_genomes<=1)
{
// cout<<"Doing nothing"<<endl;
//do nothing
}
//Case 2: A match within an organism and not between organisms
else if(gnum_holder.size()<=1 && gnum_diag_hit.size()>=1)
/* else if(gnum_holder.size()<=1 && gnum_diag_hit.size()>=1)
{
set<int>::iterator it3;
for(it3=gnum_diag_hit.begin(); it3!=gnum_diag_hit.end(); it3++)
{
int it3_int = *it3;
double score = dither(get_score(it3_int,it3_int,seq));
double score = get_score(it3_int,it3_int,seq);
corr_array->increment_element(it3_int,it3_int,(int)(score+.5));
}
}
}*/
//Case 3: A match between ALL organisms
else if(gnum_holder.size()>=number_of_genomes) //shortcut
// else if(gnum_holder.size()>=number_of_genomes) //shortcut
else if(num_uniq_genomes==number_of_genomes)
{
// cout<<"full sized"<<endl;
double * score_array;
score_array = new double[seq.length()*num_uniq_genomes];
//set a vector of scores
int count=0;
for(int i=0; i<number_of_genomes; i++)
{
if(gh[i])
{
set_score_array(score_array,seq,count,i);
count++;
}
}
for(int i=0; i<number_of_genomes-1; i++)
{
for(int j=i+1; j<number_of_genomes; j++)
{
double score = dither(get_score(i,j,seq));
double score=0;
for(int k=0; k<seq.length(); k++)
{
score+=score_array[(i*seq.length())+k]+score_array[(j*seq.length())+k];
}
corr_array->increment_element(i,j,(int)(score+.5));
if(seq.length()>=mbkl)
{
bitterv.at(get_bitterv_index(i,j)).add_new_score(score);
bitterv[get_bitterv_index(i,j)].add_new_score(score);
}
}
}
if(gnum_diag_hit.size()>0)
/* for(int i=0; i<number_of_genomes-1; i++)
{
for(int j=i+1; j<number_of_genomes; j++)
{
double score = get_score(i,j,seq);
corr_array->increment_element(i,j,(int)(score+.5));
if(seq.length()>=mbkl)
{
// bitterv.at(get_bitterv_index(i,j)).add_new_score(score);
bitterv[get_bitterv_index(i,j)].add_new_score(score);
}
}
}
*/
/* if(gnum_diag_hit.size()>0)
{
set<int>::iterator it1;
for(it1=gnum_diag_hit.begin(); it1!=gnum_diag_hit.end(); it1++)
{
int it1_int = *it1;
double score = dither(get_score(it1_int,it1_int,seq));
double score = get_score(it1_int,it1_int,seq);
corr_array->increment_element(it1_int,it1_int,(int)(score+.5));
}
}
}*/
delete[] score_array;
}
//Case 4: A match between SOME non-identical organisms
else
{
set<int>::iterator it1;
/* cout<<"Case 4: Some but not all have matches."<<endl;
cout<<"num_uniq_genomes = "<<num_uniq_genomes<<endl;
cout<<"number_of_genomes = "<<number_of_genomes<<endl;
cout<<"seq="<<seq<<endl;
*/
//new code
double * score_array;
score_array = new double[seq.length()*num_uniq_genomes];
// cout<<"Size of score_array = "<<seq.length()*num_uniq_genomes<<endl;
//set a vector of scores
// for(int i=0; i<number_of_genomes; i++)
// cout<<"In Case 4: going through set_score_array loop..."<<endl;
int count=0;
for(int i=0; i<number_of_genomes; i++)
{