Commit 1cadca46 authored by Raquel Bromberg's avatar Raquel Bromberg

Code cleaned up (commented code deleted)

parent 99830c1f
......@@ -22,7 +22,6 @@ class Coptim1_layer
Coptim1_layer::Coptim1_layer(int number_of_genomes)
{
//array_size = (((number_of_genomes*number_of_genomes)-number_of_genomes)/2.0);
array_size = get_index(number_of_genomes-2,number_of_genomes-1)+1;
diagonal_size = number_of_genomes;
array = new int[array_size];
......
......@@ -22,7 +22,6 @@ amino_acids::amino_acids()
{
}
char amino_acids::get_aa(int i)
{
if(i==0)
......
#include <iostream>
#include <fstream>
#include <cstdlib>
//#include <omp.h>
#include <getopt.h>
#include "mctr.h"
......@@ -22,11 +21,6 @@ int main(int argc, char* argv[])
int fs=-1;
int fo=-1;
/* if(argc<2)
{
cout<<"Wrong input to cm.cpp: ./cm -p <full path> -f <filtering steps, =0 for unfiltered (e.g. all) data> -o <filtering option, =0 for unfiltered (e.g. all) data>"<<endl;
}*/
cout<<"Values stored in argv:"<<endl;
for(int i=0; i<argc; i++)
{
......@@ -40,9 +34,6 @@ int main(int argc, char* argv[])
case 'o':
fo=atoi(optarg);
break;
/* case 'p':
full_path = optarg;
break;*/
case 'f':
fs=atoi(optarg);
break;
......@@ -54,16 +45,12 @@ int main(int argc, char* argv[])
}
}
// cout<<"fo="<<fo<<endl;
// cout<<"p="<<full_path<<endl;
// cout<<"fs="<<fs<<endl;
cout<<"Calling run on real data"<<endl;
mctr m(full_path,"",fs,fo,partial_run);
m.run_it0();
cout<<"Calling run on scrambled data"<<endl;
mctr mscr(full_path,"scr",fs,fo,partial_run);//scrambled
mctr mscr(full_path,"scr",fs,fo,partial_run);
mscr.run_it0();
return 0;
......
......@@ -21,7 +21,6 @@ struct avg
struct Mproteome
{
vector<string> proteins;
//vector<double> bit_scores;
double bit_scores[20];
vector<string> files;
int ordinal;
......@@ -47,7 +46,6 @@ struct v
};
void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array, int& max, int& min,string& most_unlikely, int& uscore,int SOS_CUTOFF,bool partial_run,char partial_run_char);
avg calc_running_avgs(int array[], int max, int min);
bool check_composition(string s,int tag_length,amino_acids& aas,int SOSCUTOFF);
int get_num_bins(string path,string bfn);
int calc_sos_cutoff(string path,string bfn);
......@@ -93,11 +91,6 @@ int main(int argc, char* argv[])
}
}
//string FM = argv[3]; //always set to true
// int num_bins=atoi(argv[4]); //set it from within
// string PS = argv[5]; //always set to P
//int SOS_CUTOFF=atoi(argv[6]); //set it from within
amino_acids aas;
if(partial_run)
{
......@@ -136,16 +129,11 @@ int main(int argc, char* argv[])
if(ordinal==proteomes.at(i).ordinal && org_name==proteomes.at(i).org_name)
{
proteomes.at(i).files.push_back(faa_path);
//cout<<"Already there. ordinal="<<ordinal<<" org_name="<<org_name<<endl;
//cout<<"pushing back "<<faa_path<<endl;
already_there=true;
}
}
if(!already_there)
{
//cout<<"Not already there."<<endl;
//cout<<"ordinal = "<<ordinal<<" org_name = "<<org_name<<endl;
//cout<<"adding file "<<faa_path<<endl;
Mproteome mp_temp;
mp_temp.ordinal=ordinal;
mp_temp.org_name=org_name;
......@@ -191,11 +179,9 @@ int main(int argc, char* argv[])
instream>>eater;
}
proteomes.at(i).proteins.push_back(protein);
//cout<<"info_line = "<<info_line<<endl<<protein<<endl;
}
}
instream.close();
// cout<<"Size of "<<i<<" proteins = "<<proteomes.at(i).proteins.size()<<endl;
}
for(int j=0; j<20; j++)
......@@ -229,26 +215,19 @@ int main(int argc, char* argv[])
for(int i=0; i<proteomes.size(); i++)
{
outstream2<<i<<" "<<proteomes.at(i).total<<endl;
//cout<<"Doing bitting for proteome i="<<i<<endl;
for(int j=0; j<20; j++)
{
//cout<<j<<" "<<proteomes.at(i).bit_scores[j]<<" ";
//outstream2<<aas.get_aa(j)<<" "<<proteomes.at(i).bit_scores[j]<<" ";
proteomes.at(i).bit_scores[j]/=proteomes.at(i).total; //now frequencies.
//cout<<proteomes.at(i).bit_scores[j]<<" ";
//outstream2<<proteomes.at(i).bit_scores[j]<<" ";
proteomes.at(i).bit_scores[j]=abs(log(proteomes.at(i).bit_scores[j]));
//outstream2<<proteomes.at(i).bit_scores[j]<<" ";
proteomes.at(i).bit_scores[j]/=2.0;
outstream2<<proteomes.at(i).bit_scores[j]<<endl;
//cout<<proteomes.at(i).bit_scores[j]<<endl;
}
outstream2<<endl;
}
ofstream outstream;
string outfile = path+"/"+bfn+"/bin_corrections.txt";
outstream.open(outfile.c_str());//, ios::out | ios::app);
outstream.open(outfile.c_str());
if(outstream.fail())
{
cout<<"Failed to open outstream to "<<outfile<<endl;
......@@ -266,13 +245,9 @@ int main(int argc, char* argv[])
outstream<<num_bins<<endl<<endl;
// int i_starter=32;
//int j_starter=1687;
//bool start_writing=false;
if(PS=="P")
{
cout<<"Using omp...presumably..."<<endl;
cout<<"Using omp..."<<endl;
vector<v> stored_counts;
......@@ -294,8 +269,6 @@ int main(int argc, char* argv[])
int uscore=0;
do_counts(proteomes.at(i),proteomes.at(j),tag_length,array,max,min,most_unlikely,uscore,SOS_CUTOFF,partial_run,partial_run_lead);
do_counts(proteomes.at(j),proteomes.at(i),tag_length,array,max,min,most_unlikely,uscore,SOS_CUTOFF,partial_run,partial_run_lead);
//avg result;
//result = calc_running_avgs(array,max,min);
v vtemp;
for(int f=0; f<array.size(); f++)
......@@ -349,13 +322,8 @@ int main(int argc, char* argv[])
int max(0),min(0);
string most_unlikely="";
int uscore=0;
//do_counts(proteomes.at(i),proteomes.at(j), tag_length, array,max,min,most_unlikely,uscore);
//do_counts(proteomes.at(j),proteomes.at(i), tag_length, array,max,min,most_unlikely,uscore);
//avg result;
//result = calc_running_avgs(array,max,min);
outstream<<i<<" to "<<j<<endl;
//outstream<<result.index<<endl<<result.avg<<endl;
for(int f=0; f<100; f++)
{
outstream<<array[f]<<endl;
......@@ -368,22 +336,14 @@ int main(int argc, char* argv[])
}
}
}
//b. read them in from file
else if(FM=="F")
{
vector<Fproteome> proteomes;
}
else
{
cout<<"Invalid option for File/Memory (FM)."<<endl;
exit(1);
}
//3. do all comparisons and write out.
}
//void do_counts(Mproteome p1, Mproteome p2, int tag_length, int array[],int& max, int& min,string& most_unlikely,int& uscore)
void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,int& max, int& min,string& most_unlikely,int& uscore,int SOS_CUTOFF,bool partial_run,char partial_run_lead)
{
int sum=0;
......@@ -408,9 +368,6 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
double score=0.0;
for(int k=0; k<tag_length && (j+k)<p1.proteins.at(i).length(); k++)
{
// bool composition_ok=check_composition(p1.proteins.at(i).substr(k,tag_length),tag_length,aas,SOS_CUTOFF);
// if(composition_ok)
// {
s+=p1.proteins.at(i)[j+k];
int index=aas.get_aa(p1.proteins.at(i)[j+k]);
......@@ -421,12 +378,10 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
if( (score+.5)<min || min==0)
{
min=score+.5;
//cout<<"min score = "<<min<<" sequence = "<<s<<endl;
}
if( (score+.5)>max)
{
max=score+.5;
//cout<<"max score = "<<max<<" sequence = "<<s<<endl;
uscore=max;
most_unlikely=s;
}
......@@ -436,12 +391,6 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
k=tag_length;
j+=k-1;
}
// }
// else
// {
// k=tag_length;
// j+=k-1;
// }
}
}
}
......@@ -458,9 +407,6 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
double score=0.0;
for(int k=0; k<tag_length && (j+k)<p1.proteins.at(i).length(); k++)
{
// bool composition_ok=check_composition(p1.proteins.at(i).substr(k,tag_length),tag_length,aas,SOS_CUTOFF);
// if(composition_ok)
// {
s+=p1.proteins.at(i)[j+k];
int index=aas.get_aa(p1.proteins.at(i)[j+k]);
......@@ -471,12 +417,10 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
if( (score+.5)<min || min==0)
{
min=score+.5;
//cout<<"min score = "<<min<<" sequence = "<<s<<endl;
}
if( (score+.5)>max)
{
max=score+.5;
//cout<<"max score = "<<max<<" sequence = "<<s<<endl;
uscore=max;
most_unlikely=s;
}
......@@ -486,12 +430,6 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
k=tag_length;
j+=k-1;
}
// }
// else
// {
// k=tag_length;
// j+=k-1;
// }
}
}
}
......@@ -539,56 +477,6 @@ bool check_composition(string s,int tag_length,amino_acids& aas,int SOSCUTOFF)
return true;
}
avg calc_running_avgs(int array[],int max, int min)
{
/*
avg chosen_avg;
chosen_avg.avg=0;
chosen_avg.index=0;
cout<<"In calc_running_avgs"<<endl;
for(int i=0; i<1000; i++)
{
cout<<i<<" "<<array[i]<<endl;
}
vector<avg> rolling_avg;
//go forwards
double denom=0;
double sum=0;
double cur_avg=0;
for(int i=min; i<max; i++)
{
if(array[i]>0)
{
sum+=array[i];
denom+=1;
cur_avg=sum/denom;
avg atemp;
atemp.avg=log(cur_avg);
atemp.index=i;
rolling_avg.push_back(atemp);
}
}
for(int i=rolling_avg.size()-1; i>=0; i--)
{
double diff=log(array[rolling_avg.at(i).index])-rolling_avg.at(i).avg;
//double diff=log(counts.at(rolling_avg.at(i).index))-rolling_avg.at(i).avg;
//cout<<i<<" "<<rolling_avg.at(i).index<<" "<<rolling_avg.at(i).avg<<" "<<diff<<endl;
if(abs(diff)<0.08)
{
chosen_avg=rolling_avg.at(i);
//cout<<"Choosing the avg. This better only appear once."<<endl;
break;
}
}
//cout<<"chosen avg = "<<chosen_avg.index<<" "<<chosen_avg.avg<<endl;
return chosen_avg;
*/
}
int get_num_bins(string path,string bfn)
{
ifstream instream;
......
......@@ -50,8 +50,8 @@ int main(int argc, char* argv[])
for(int j=i+1; j<ref_set_size+main_set_size; j++)
{
int total_size_j;// = organisms.at(j).get_total_num_aa();
vector<int> aa_array_j;// = organisms.at(j).get_aa_array();
int total_size_j;
vector<int> aa_array_j;
get_hist_data(u.int_to_string(j),total_size_j,aa_array_j,path_frag);
double entropy=0;
......
......@@ -43,7 +43,6 @@ vector<string> directory_reader::load_names(string dir)
}
else
{
//cout<<"Rejecting directory named "<<temp<<endl;
}
}
closedir(dp);
......
......@@ -63,14 +63,6 @@ void exp_grid::get_exps(double min_slope_par, double max_slope_par, vector<float
yvals = y_fit;
wvals = w_fit;
/* //for debugging
for(int i=0; i<x_fit.size(); i++)
{
cout<<i<<" "<<xvals.at(i)<<" "<<yvals.at(i)<<" "<<wvals.at(i)<<endl;
}
exit(1);
*/
min_slope_par=0;
max_slope_par=0.3;
......@@ -88,15 +80,6 @@ void exp_grid::get_exps(double min_slope_par, double max_slope_par, vector<float
cout<<"wvals.size(0 = "<<wvals.size()<<endl;
exit(1);
}
/* else
{
cout<<"Writing out values for xvals, yvals, and wvals"<<endl;
cout<<"Size of xvals = "<<xvals.size()<<endl;
for(int i=0; i<xvals.size(); i++)
{
cout<<i<<" "<<xvals.at(i)<<" "<<yvals.at(i)<<" "<<wvals.at(i)<<endl;
}
}*/
bool done=false;
int previous_val=-1;
......@@ -166,27 +149,17 @@ void exp_grid::get_exps(double min_slope_par, double max_slope_par, vector<float
}
}
//cout<<"Calculating match_score_cutoff: ";
//cout<<"xvals.at(0) = "<<xvals.at(0)<<endl;
double n1=0.1*F/G;
//cout<<"n1="<<n1<<endl;
double n2=b1-b2;
//cout<<"n2="<<n2<<endl;
if(n1<=0 || n2==0)
{
//cout<<"setting match_score_cutoff to -1"<<endl;
match_score_cutoff=-1;
}
else
{
// cout<<"n1 and n2 acceptable."<<endl;
match_score_cutoff = log(n1)/(n2)+xvals.at(0);
//cout<<"match_score_cutoff="<<match_score_cutoff<<endl;
}
msc_par=match_score_cutoff;
/* int staller;
cout<<"Stalling: ";
cin>>staller;*/
}
void exp_grid::assign_array(double min_b1, double max_b1, double min_b2, double max_b2, double stepper)
......@@ -226,14 +199,6 @@ void exp_grid::calc_constants(double& alpha,double& alpha1,double& beta,double&
gamma+=wvals.at(i)*yvals.at(i)*exp(-1.0*b1*xvals.at(i));
gamma1+=wvals.at(i)*yvals.at(i)*exp(-1.0*b2*xvals.at(i));
}
/*
alpha*=2.0;
alpha1*=2.0;
beta*=2.0;
gamma*=2.0;
gamma1*=2.0;
*/
}
double exp_grid::get_genomic_distance()
......@@ -296,7 +261,7 @@ double exp_grid::get_wt_rmsd(vector<float> rmsd_wts)
}
else
{
val = -2; //according to Zbyszek.
val = -2;
}
if(yvals.at(i)>0)
......
......@@ -17,14 +17,10 @@ class file_info
~file_info();
void read_line();
void set_file_name(string file_name_par);
// char get_caa();
// void delete_pFile();
string get_current_line();
// void open(string path_par);
void set_path(string path_par);
void open();
void set_position();
// string file_name;
string path;
int total_tags;
int strings_read_in;
......@@ -49,9 +45,7 @@ file_info::file_info()
file_info::~file_info()
{
cout<<"Top of file_info destructor."<<endl;
// fclose(pFile);
cout<<"Closed the file."<<endl;
//delete pFile;
cout<<"Bottom of file_info destructor."<<endl;
}
......@@ -98,8 +92,6 @@ void file_info::set_path(string path_par)
fgetpos(pFile,&position); //point to the first sequence in the file.
// read_line();
if(DEBUG_FI || verbose)
{
cout<<"total_tags in current TAGS file = "<<total_tags<<endl;
......@@ -127,8 +119,6 @@ void file_info::open()
fsetpos(pFile,&position); //set the position to the last place we read from.
}
// strings_read_in=0;
// fgetpos(pFile,&position); //point to the first sequence in the file.
read_line();
if(s==last_read)
......@@ -184,13 +174,3 @@ string file_info::get_current_line()
{
return s;
}
/*char file_info::get_caa()
{
cout<<"in get_caa(). last_read_sequence = "<<s<<endl;
return s[0];
}
*/
//void file_info::delete_pFile()
//{
//}
......@@ -171,7 +171,5 @@ int main(int argc, char* argv[])
otagstream.close();
}
return 0;
}
......@@ -27,9 +27,6 @@ const int SIZE_CUTOFF=140000; //SIZE_CUTOFF: A constant that ~identifies orga
//Any proteome with < SIZE_CUTOFF amino acids considered a reduced organism
//and flagged as potentially problematic in problematic_orgs.txt file.
const int TAGS_MAX = 5000000; //TAGS_MAX: Maximum size of the vector holding/sorting the groups of tags.
//Can be increased when code is run on machines with more memory.
const bool DO_SCRAMBLING = true; //DO_SCRAMBLING: Can be set to false when user wants to maintain a previously generated background.
//Scrambling introduces a random element into the code, with each scramble
//generating very slightly different results; keeping this random background
......@@ -88,8 +85,6 @@ class sttagger
void do_tags();
void do_tags_v2();
// void do_scr_tags();
void write_out_aux_files();
void write_out_aux_files_v2();
void write_out_aahistogram();
......@@ -149,6 +144,7 @@ sttagger::sttagger(string path_par, string species_par, bool filtering_par, bool
keepers.push_back("IDTPGHVDFTIEVERSMRVLDGAIVV");
keepers.push_back("IDTPGHVDFTIEVERSMRVLDGAVMV");
}
//If we are filtering Archaea, "" "" "" (same as above)
else if(filtering && species=="A")
{
......@@ -189,15 +185,13 @@ sttagger::sttagger(string path_par, string species_par, bool filtering_par, bool
keepers.push_back("IDTPGHVDFTIEVERSMRVLDGAIVV");
keepers.push_back("IDTPGHVDFTIEVERSMRVLDGAVMV");
}
//If we are filtering Archaea, "" "" "" (same as above)
else if(filtering && species=="A")
{
keepers.push_back("YFTIIDAPGHRDFVKNMITGASQADAALLVV");
}
//string kfile = path+"/"+bfn+"/"+bfn+"_keepers_for_filtering.txt";
//u.open_ofile(keeperstream,kfile,"kfile");
complete_setup_v2();
}
......@@ -241,15 +235,6 @@ void sttagger::complete_setup_v2()
logstream.close();
}
/*
problem_stream.open( (path+"/"+bfn+"/problematic_inputs.txt").c_str(), ios::out | ios::app);
if(problem_stream.fail())
{
cout<<"Failed to open problem_stream."<<endl;
exit(1);
}
*/
parse_if_v2();
}
......@@ -481,7 +466,6 @@ void sttagger::do_tags()
#pragma omp parallel for
for(int i=0; i<organisms.size(); i++) {
cout << "sttagger::do_tags(): i=" << i << " " << organisms.at(i).get_directory() << endl;
// organisms.at(i).generate_tags(!filtering); //Only generate the scrambled tags if this is not a filtering run.
organisms.at(i).generate_tags(true,partial_run); //Only generate the scrambled tags if this is not a filtering run.
//Check the total number of amino acids.
......@@ -528,7 +512,6 @@ void sttagger::do_tags_v2()
int num_tags=0;
cout << "sttagger::do_tags_v2(): ordinal=" << ordinal << " " << organisms.at(0).get_directory() << endl;
// organisms.at(i).generate_tags(!filtering); //Only generate the scrambled tags if this is not a filtering run.
organisms.at(0).generate_tags(true,partial_run); //Only generate the scrambled tags if this is not a filtering run.
//Check the total number of amino acids.
......@@ -633,20 +616,6 @@ void sttagger::write_out_tags_v2()
organism_tags.clear();
organisms.at(0).clear_tags();
outstream.close();
/*
ofstream histstream;
histstream.open( (path+"/"+bfn+"/"+bfn+"_hist.txt").c_str());
if(histstream.fail())
{
cout<<"Failed to open histstream "<<endl;
exit(1);
}
histstream<<ordinal<<endl<<organisms.at(0).get_total_num_aa()<<endl;
organisms.at(0).write_out_aa_array(outstream);
histstream<<endl;
histstream.close();*/
}
void sttagger::write_out_scr_tags(int organism)
......@@ -728,7 +697,6 @@ void sttagger::write_out_aux_files()
void sttagger::write_out_aux_files_v2()
{
write_out_aahistogram_v2();
// write_out_aaentropies_v2();
if(filtering)
{
write_out_keepers_log_v2();
......@@ -827,12 +795,6 @@ void sttagger::write_out_aaentropies()
outstream.close();
}
/*vector<org> sttagger::return_organisms()
{
return organisms;
}
*/
void sttagger::close_pstream()
{
problem_stream.close();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment