Commit 9db65c61 authored by Raquel Bromberg's avatar Raquel Bromberg

mdist.h now only writes out 2 matrices

parent 6b722927
......@@ -365,7 +365,7 @@ void mdist::shear_data()
*/
void mdist::calculate_all()
{
{
for(int i=0; i<plots.size(); i++)
{
plots.at(i).reset_all();
......@@ -380,7 +380,7 @@ void mdist::calculate_all()
#pragma omp parallel for
for(int i=0; i<plots.size(); i++)
{
// cout<<"Calculating slope for "<<i<<"/"<<plots.size()<<endl;
cout<<"Calculating slope for "<<i<<"/"<<plots.size()<<endl;
if(count%1000==0)
{
#pragma omp critical
......@@ -391,7 +391,7 @@ void mdist::calculate_all()
plots.at(i).calculate_slope(0, 1, WEIGHT,"raw");
}
write_out_distance_matrix("raw");
// write_out_distance_matrix("raw");
// cout<<"finished writing out distance matrices 1"<<endl;
//cout<<"here 1.1"<<endl;
......@@ -454,7 +454,7 @@ void mdist::calculate_all()
}
cout<<"Completed double exponential fit calculations."<<endl;
write_out_exp_dm(false);
// write_out_exp_dm(false);
write_out_fits_vector(false);
// write_out_msc(false);
}
......@@ -807,8 +807,8 @@ void mdist::write_out_distance_matrix(string details)
//string edmfile = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_linpolycomb_"+details+"_entropy.txt";
//string dmfile2 = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_linear_"+details+".txt";
//string edmfile2 = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_linear_"+details+"_entropy.txt";
string dmfile3 = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_polynomial_"+details+".txt";
string edmfile3 = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_polynomial_"+details+"_entropy.txt";
// string dmfile3 = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_polynomial_"+details+".txt";
// string edmfile3 = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_polynomial_"+details+"_entropy.txt";
string enlfile = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_polynomial_"+details+"_entropy_nonlinearity.txt";
string enlfiler = path+"/"+bfn+"/DISTANCE_MATRICES"+addon+"/dm_polynomial_"+details+"_entropy_nonlinearity_taxonomy.txt";
......@@ -816,33 +816,33 @@ void mdist::write_out_distance_matrix(string details)
//open_file(entoutstream,edmfile);
//open_file(linstream,dmfile2);
//open_file(elinstream,edmfile2);
open_file(polystream,dmfile3);
cout<<"dmfile3 = "<<dmfile3<<endl;
open_file(epolystream,edmfile3);
// open_file(polystream,dmfile3);
// cout<<"dmfile3 = "<<dmfile3<<endl;
// open_file(epolystream,edmfile3);
open_file(enlpolystream,enlfile);
open_file(enlpolystreamr,enlfiler);
logstream<<"Successfully opened streams for 8 distance matrices:"<<endl;
logstream<<"Successfully opened streams for 2 distance matrices:"<<endl;
//outstream<<num_orgs<<endl;
//linstream<<num_orgs<<endl;
//entoutstream<<num_orgs<<endl;
//elinstream<<num_orgs<<endl;
polystream<<num_orgs<<endl;
cout<<"just wrote num_orgs to polystream - please check. num_orgs = "<<num_orgs<<endl;
epolystream<<num_orgs<<endl;
// polystream<<num_orgs<<endl;
// cout<<"just wrote num_orgs to polystream - please check. num_orgs = "<<num_orgs<<endl;
// epolystream<<num_orgs<<endl;
enlpolystream<<num_orgs<<endl;
enlpolystreamr<<num_orgs<<endl;
min_slope=-1;
max_slope=-1;
// double w = 1.0-1.0/2.8;
double w = 1.0-1.0/2.8;
// double w = 1.0-1.0/3.0;
// double w = 1.0-1.0/10.0;
// double w = 1.0-1.0/20.0;
// double w = 1.0-1.0/15.0;
double w = 1.0-1.0/12.0;
//double w = 1.0-1.0/12.0;
cout<<"Attempting to get into the for loop"<<endl;
for(int i=0; i<organisms.size(); i++)
......@@ -852,8 +852,8 @@ void mdist::write_out_distance_matrix(string details)
//entoutstream<<organisms.at(i).get_directory();
//elinstream<<organisms.at(i).get_directory();
polystream<<organisms.at(i).get_directory();
epolystream<<organisms.at(i).get_directory();
// polystream<<organisms.at(i).get_directory();
// epolystream<<organisms.at(i).get_directory();
enlpolystream<<organisms.at(i).get_directory();
enlpolystreamr<<genome_mapped.at(i);
......@@ -864,8 +864,8 @@ void mdist::write_out_distance_matrix(string details)
//entoutstream<<"_ref";
//elinstream<<"_ref";
polystream<<"_ref";
epolystream<<"_ref";
// polystream<<"_ref";
// epolystream<<"_ref";
enlpolystream<<"_ref";
enlpolystreamr<<"_ref";
}
......@@ -873,8 +873,8 @@ void mdist::write_out_distance_matrix(string details)
//linstream<<" \t";
//entoutstream<<" \t";
//elinstream<<" \t";
polystream<<" \t";
epolystream<<" \t";
// polystream<<" \t";
// epolystream<<" \t";
enlpolystream<<" \t";
enlpolystreamr<<" \t";
......@@ -887,8 +887,8 @@ void mdist::write_out_distance_matrix(string details)
//linstream<<0<<" ";
//entoutstream<<0<<" ";
//elinstream<<0<<" ";
polystream<<0<<" ";
epolystream<<0<<" ";
// polystream<<0<<" ";
// epolystream<<0<<" ";
enlpolystream<<0<<" ";
enlpolystreamr<<0<<" ";
}
......@@ -909,7 +909,7 @@ void mdist::write_out_distance_matrix(string details)
fits_vector.at(index).quadratic_fit_b=d3;
fits_vector.at(index).quadratic_fit_a=plots.at(index).get_a();
fits_vector.at(index).qf_rmsdwt=plots.at(index).get_wtrmsd_quadratic();
polystream<<d3<<" ";
// polystream<<d3<<" ";
if(min_slope==-1 || min_slope>d3)
{
......@@ -922,7 +922,7 @@ void mdist::write_out_distance_matrix(string details)
//entoutstream<<d1*plots.at(index).get_entropy()<<" ";
//elinstream<<d2*plots.at(index).get_entropy()<<" ";
epolystream<<d3*plots.at(index).get_entropy()<<" ";
// epolystream<<d3*plots.at(index).get_entropy()<<" ";
double d4=-1.0*(2.0*P*plots.at(index).get_a()+(-1.0*d3));//slope of the quadratic function at P
enlpolystream<<-w*log( (w-d4*plots.at(index).get_entropy())/w)<<" ";
enlpolystreamr<<-w*log( (w-d4*plots.at(index).get_entropy())/w)<<" ";
......@@ -933,8 +933,8 @@ void mdist::write_out_distance_matrix(string details)
//linstream<<endl;
//entoutstream<<endl;
//elinstream<<endl;
polystream<<endl;
epolystream<<endl;
// polystream<<endl;
// epolystream<<endl;
enlpolystream<<endl;
enlpolystreamr<<endl;
}
......@@ -943,12 +943,12 @@ void mdist::write_out_distance_matrix(string details)
//linstream.close();
//entoutstream.close();
//elinstream.close();
polystream.close();
epolystream.close();
// polystream.close();
// epolystream.close();
enlpolystream.close();
enlpolystreamr.close();
logstream<<"8 distance matrices successfully written and streams closed."<<endl<<endl;
logstream<<"2 distance matrices successfully written and streams closed."<<endl<<endl;
/*
int system_return = system ( ("./rapidnj "+dmfile+" > "+dmfile+"_tree").c_str());
......@@ -1646,13 +1646,15 @@ void open_file(ofstream& outstream, string outfile)
void mdist::do_igmstream(vector<string>& genome_mapped)
{
cout<<"Size of organisms = "<<organisms.size()<<endl;
ifstream igmstream;
string igmfile=path+"/"+bfn+"/genome_mapped.txt";
igmstream.open(igmfile.c_str());
if(igmstream.fail())
{
cout<<"Failed to open genome_mapped.txt. Please create it. Creating genome_names.txt file in main run directory for you now. Move it to Taxonomy and run the script, then move genome_mapped.txt to the main directory please."<<endl;
/* ofstream gmstream;
/* cout<<"Failed to open genome_mapped.txt. Please create it. Creating genome_names.txt file in main run directory for you now. Move it to Taxonomy and run the script, then move genome_mapped.txt to the main directory please."<<endl;
ofstream gmstream;
string gmfile=path+"/"+bfn+"/genome_names.txt";
open_file(gmstream,gmfile);
for(int i=0; i<organisms.size(); i++)
......@@ -1822,179 +1824,186 @@ void mdist::read_in_distances(int num_bins_hgt)
void mdist::read_in_orgs()
{
ifstream instream;
string orgfile = path+"/"+bfn+"/"+bfn+"_saved_orgs.txt";
u.open_ifile(instream,orgfile,"orgfile");
cout << orgfile << endl;
string eat;
int ord;
cout<<"In read_in_orgs"<<endl;
int total_size = u.get_ts(path+"/"+bfn+"/");
cout<<"total_size = "<<total_size<<endl;
instream>>eat>>ord;
while(!instream.eof())
for(int numg=0; numg<total_size; numg++)
{
cout<<"numg="<<numg<<endl;
ifstream instream;
string orgfile = path+"/"+bfn+"/SAVEDORGS/"+bfn+"_"+u.int_to_string(numg)+"_saved_orgs.txt";
u.open_ifile(instream,orgfile,"orgfile");
cout << "orgfile: "<<orgfile << endl;
string eat;
int ord;
instream>>eat>>ord;
cout<<"ord = "<<ord<<endl;
org new_org;
new_org.set_filtering_steps(filtering_steps);
string s;
//Set the ordinal.
new_org.set_ordinal(ord);
cout << "Organism: " << ord << endl;
//Set the path.
instream>>eat>>s;
new_org.set_path(s);
//Set the directory.
instream>>eat>>s;
new_org.set_directory(s);
//Set the total number of amino acids.
int i;
instream>>eat>>i;
new_org.set_total_num_aa(i);
//Set the tag length.
instream>>eat>>i;
new_org.set_tag_length(i);
bool b;
instream>>eat>>b;
instream>>eat>>b;
new_org.set_conserved(b);
//Set the files.
instream>>eat>>i;
cout << "numfiles: " << i << endl;
cout << "numfiles: " << i << endl;
for(int q=0; q<i; q++)
{
instream>>s;
new_org.add_file(s);
}
//Set the file sizes.
instream>>eat>>i;
cout << "numfilesizes: " << i << endl;
cout << "numfilesizes: " << i << endl;
for(int q=0; q<i; q++)
{
int fs;
instream>>fs;
new_org.push_back_file_size(fs);
}
//Set the amino acid counts.
instream>>eat>>i;
cout << "numaacounts: " << i << endl;
cout << "numaacounts: " << i << endl;
for(int q=0; q<i; q++)
{
int aacount;
instream>>aacount;
new_org.add_aa_count(q,aacount);
}
instream>>eat>>eat;//"proteome data: ";
instream>>eat>>i; //"size_real_tags <int>"
new_org.setp_size_real_tags(i);
double d;
instream>>eat>>d; //"discrepancy <double>"
new_org.setp_discrepancy(d);
cout << "discrepancy: " << d << endl;
cout << "discrepancy: " << d << endl;
instream>>eat>>i; //"conserved <int>"
new_org.setp_conserved(i);
instream>>eat>>i; //"chosen_column <int>"
new_org.setp_chosen_column(i);
instream>>eat>>b;
new_org.set_ref(b);
instream>>eat>>i; //"proteins.size() <integer which should be 0>"
cout << "proteins.size: " << i << endl;
cout << "proteins.size: " << i << endl;
instream>>eat>>i; //"total_num_proteins <integer which should be greater than 0>"
cout << "total_num_proteins: " << i << endl;
new_org.set_total_num_proteins(i);
instream>>eat>>i; //"pv.size() <integer which should not be 0>"
new_org.setp_set_pv_size(i);
cout << "ppv_size: " << i << endl;
for(int q=0; q<i; q++)
{
int keep;
instream>>eat>>keep;
new_org.setp_set_pv_val(q,0,0,keep,0,0);
}
int fs; //filtering_steps
instream>>eat>>fs;
cout << "filtering_steps: " << fs << endl;
int pvv_size;
instream>>eat>>pvv_size; //"pvv.size() <integer which should match the size above>"
new_org.setp_set_pvv_size(pvv_size);
cout << "pvvsize: " << pvv_size << endl;
for(int q=0; q<pvv_size; q++)
{
instream>>eat>>i; //keeper
new_org.setp_set_keeper(q,i);
new_org.setp_set_deleter(q,0);
new_org.setp_set_internal_repeats(q,0);
vector<int> vals;
for(int k=0; k<=filtering_steps; k++)
{
vals.push_back(0);
}
new_org.setp_set_pvv_gnums_val(q,vals);
new_org.setp_set_pvv_gis_val(q,vals);
new_org.setp_set_pvv_gis_val(q,vals);
}
instream>>eat>>i; //"sosv.size() <int like 401>"
cout << "sosv_size: " << pvv_size << endl;
new_org.setp_set_sosv_size(i);
vector<int> sosv_vals;
for(int q=0; q<i; q++)
{
int sosi;
instream>>eat>>sosi;
// cout<<eat<<" "<<sosi<<endl;
sosv_vals.push_back(sosi);
}
new_org.setp_set_sosv_vals(sosv_vals);
new_org.set_filtering_steps(filtering_steps);
cout << "total_num_proteins: " << i << endl;
organisms.push_back(new_org);
instream>>eat>>ord;
}
new_org.set_total_num_proteins(i);
// instream>>eat>>i; //"pv.size() <integer which should not be 0>"
// new_org.setp_set_pv_size(i);
/* cout << "ppv_size: " << i << endl;
for(int q=0; q<i; q++)
{
int keep;
instream>>eat>>keep;
new_org.setp_set_pv_val(q,0,0,keep,0,0);
}
*/
int fs; //filtering_steps
instream>>eat>>fs;
cout << "filtering_steps: " << fs << endl;
int pvv_size;
instream>>eat>>pvv_size; //"pvv.size() <integer which should match the size above>"
new_org.setp_set_pvv_size(pvv_size);
cout << "pvvsize: " << pvv_size << endl;
for(int q=0; q<pvv_size; q++)
{
instream>>eat>>i; //keeper
new_org.setp_set_keeper(q,i);
new_org.setp_set_deleter(q,0);
new_org.setp_set_internal_repeats(q,0);
vector<int> vals;
for(int k=0; k<=filtering_steps; k++)
{
vals.push_back(0);
}
new_org.setp_set_pvv_gnums_val(q,vals);
new_org.setp_set_pvv_gis_val(q,vals);
new_org.setp_set_pvv_gis_val(q,vals);
}
instream>>eat>>i; //"sosv.size() <int like 401>"
cout << "sosv_size: " << pvv_size << endl;
new_org.setp_set_sosv_size(i);
vector<int> sosv_vals;
for(int q=0; q<i; q++)
{
int sosi;
instream>>eat>>sosi;
// cout<<eat<<" "<<sosi<<endl;
sosv_vals.push_back(sosi);
}
new_org.setp_set_sosv_vals(sosv_vals);
new_org.set_filtering_steps(filtering_steps);
organisms.push_back(new_org);
// instream>>eat>>ord;
instream.close();
}
cout<<"Bottom of mdist::read_in_orgs()"<<endl;
cout<<"organisms.size()="<<organisms.size()<<endl;
......@@ -2371,7 +2380,7 @@ void mdist::edit_distances()
cout<<"Done writing distances matrices."<<endl;
// write_out_sorted_as("restrained_hgt_fixed");
cout<<"Writing out double exponential distance matrix..."<<endl;
write_out_exp_dm(true);
// write_out_exp_dm(true);
cout<<"Done writing double exponential distance matrix."<<endl;
cout<<"Writing out fits vector..."<<endl;
write_out_fits_vector(true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment