Commit f84d0ba9 authored by Raquel Bromberg's avatar Raquel Bromberg

wip mefilter.cpp added

parent b06e34d3
...@@ -18,6 +18,7 @@ int main(int argc, char* argv[]) ...@@ -18,6 +18,7 @@ int main(int argc, char* argv[])
string full_path = argv[1]; string full_path = argv[1];
char c; char c;
bool partial_run=false;
int fs=-1; int fs=-1;
int fo=-1; int fo=-1;
...@@ -32,7 +33,7 @@ int main(int argc, char* argv[]) ...@@ -32,7 +33,7 @@ int main(int argc, char* argv[])
cout<<argv[i]<<endl; cout<<argv[i]<<endl;
} }
while((c = getopt(argc,argv,"p:o:f:")) != -1) while((c = getopt(argc,argv,"p:o:f:C")) != -1)
{ {
switch(c) switch(c)
{ {
...@@ -45,6 +46,9 @@ int main(int argc, char* argv[]) ...@@ -45,6 +46,9 @@ int main(int argc, char* argv[])
case 'f': case 'f':
fs=atoi(optarg); fs=atoi(optarg);
break; break;
case 'C':
partial_run=true;
break;
default: default:
abort(); abort();
} }
...@@ -54,10 +58,10 @@ int main(int argc, char* argv[]) ...@@ -54,10 +58,10 @@ int main(int argc, char* argv[])
// cout<<"p="<<full_path<<endl; // cout<<"p="<<full_path<<endl;
// cout<<"fs="<<fs<<endl; // cout<<"fs="<<fs<<endl;
mctr m(full_path,"",fs,fo); mctr m(full_path,"",fs,fo,partial_run);
cout<<"Calling run()"<<endl; cout<<"Calling run()"<<endl;
m.run_it0(); m.run_it0();
mctr mscr(full_path,"scr",fs,fo);//scrambled mctr mscr(full_path,"scr",fs,fo,partial_run);//scrambled
mscr.run_it0(); mscr.run_it0();
return 0; return 0;
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <cstdlib> #include <cstdlib>
#include <cmath> #include <cmath>
#include <omp.h> #include <omp.h>
#include <getopt.h>
#include "org.h" #include "org.h"
#include "amino_acids.h" #include "amino_acids.h"
...@@ -45,11 +46,12 @@ struct v ...@@ -45,11 +46,12 @@ struct v
int uscore; int uscore;
}; };
void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array, int& max, int& min,string& most_unlikely, int& uscore,int SOS_CUTOFF); void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array, int& max, int& min,string& most_unlikely, int& uscore,int SOS_CUTOFF,bool partial_run,char partial_run_char);
avg calc_running_avgs(int array[], int max, int min); avg calc_running_avgs(int array[], int max, int min);
bool check_composition(string s,int tag_length,amino_acids& aas,int SOSCUTOFF); bool check_composition(string s,int tag_length,amino_acids& aas,int SOSCUTOFF);
int get_num_bins(string path,string bfn); int get_num_bins(string path,string bfn);
int calc_sos_cutoff(string path,string bfn); int calc_sos_cutoff(string path,string bfn);
char get_pr_lead(string path, string bfn);
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
...@@ -69,6 +71,27 @@ int main(int argc, char* argv[]) ...@@ -69,6 +71,27 @@ int main(int argc, char* argv[])
cout<<"num_bins = "<<num_bins<<endl; cout<<"num_bins = "<<num_bins<<endl;
string PS = "P"; string PS = "P";
int SOS_CUTOFF = calc_sos_cutoff(path,bfn); int SOS_CUTOFF = calc_sos_cutoff(path,bfn);
bool partial_run=false;
char partial_run_lead;
char c;
cout<<"Values stored in argv:"<<endl;
for(int i=0; i<argc; i++)
{
cout<<argv[i]<<endl;
}
while((c = getopt(argc,argv,":C")) != -1)
{
switch(c)
{
case 'C':
partial_run=true;
break;
default:
abort();
}
}
//string FM = argv[3]; //always set to true //string FM = argv[3]; //always set to true
// int num_bins=atoi(argv[4]); //set it from within // int num_bins=atoi(argv[4]); //set it from within
...@@ -76,7 +99,11 @@ int main(int argc, char* argv[]) ...@@ -76,7 +99,11 @@ int main(int argc, char* argv[])
//int SOS_CUTOFF=atoi(argv[6]); //set it from within //int SOS_CUTOFF=atoi(argv[6]); //set it from within
amino_acids aas; amino_acids aas;
if(partial_run)
{
partial_run_lead=get_pr_lead(path,bfn);
}
//1. read in info_file //1. read in info_file
ifstream instream; ifstream instream;
string info_file = path+"/"+bfn+"/"+bfn+"_info.txt"; string info_file = path+"/"+bfn+"/"+bfn+"_info.txt";
...@@ -265,8 +292,8 @@ int main(int argc, char* argv[]) ...@@ -265,8 +292,8 @@ int main(int argc, char* argv[])
int max(0),min(0); int max(0),min(0);
string most_unlikely=""; string most_unlikely="";
int uscore=0; int uscore=0;
do_counts(proteomes.at(i),proteomes.at(j),tag_length,array,max,min,most_unlikely,uscore,SOS_CUTOFF); do_counts(proteomes.at(i),proteomes.at(j),tag_length,array,max,min,most_unlikely,uscore,SOS_CUTOFF,partial_run,partial_run_lead);
do_counts(proteomes.at(j),proteomes.at(i),tag_length,array,max,min,most_unlikely,uscore,SOS_CUTOFF); do_counts(proteomes.at(j),proteomes.at(i),tag_length,array,max,min,most_unlikely,uscore,SOS_CUTOFF,partial_run,partial_run_lead);
//avg result; //avg result;
//result = calc_running_avgs(array,max,min); //result = calc_running_avgs(array,max,min);
...@@ -357,7 +384,7 @@ int main(int argc, char* argv[]) ...@@ -357,7 +384,7 @@ int main(int argc, char* argv[])
} }
//void do_counts(Mproteome p1, Mproteome p2, int tag_length, int array[],int& max, int& min,string& most_unlikely,int& uscore) //void do_counts(Mproteome p1, Mproteome p2, int tag_length, int array[],int& max, int& min,string& most_unlikely,int& uscore)
void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,int& max, int& min,string& most_unlikely,int& uscore,int SOS_CUTOFF) void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,int& max, int& min,string& most_unlikely,int& uscore,int SOS_CUTOFF,bool partial_run,char partial_run_lead)
{ {
int sum=0; int sum=0;
for(int i=0; i<array.size(); i++) for(int i=0; i<array.size(); i++)
...@@ -371,17 +398,19 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in ...@@ -371,17 +398,19 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
} }
amino_acids aas; amino_acids aas;
for(int i=0; i<p1.proteins.size(); i++) if(!partial_run)
{ {
for(int j=0; j<p1.proteins.at(i).length(); j++) for(int i=0; i<p1.proteins.size(); i++)
{ {
string s=""; for(int j=0; j<p1.proteins.at(i).length(); j++)
double score=0.0;
for(int k=0; k<tag_length && (j+k)<p1.proteins.at(i).length(); k++)
{ {
// bool composition_ok=check_composition(p1.proteins.at(i).substr(k,tag_length),tag_length,aas,SOS_CUTOFF); string s="";
// if(composition_ok) double score=0.0;
// { for(int k=0; k<tag_length && (j+k)<p1.proteins.at(i).length(); k++)
{
// bool composition_ok=check_composition(p1.proteins.at(i).substr(k,tag_length),tag_length,aas,SOS_CUTOFF);
// if(composition_ok)
// {
s+=p1.proteins.at(i)[j+k]; s+=p1.proteins.at(i)[j+k];
int index=aas.get_aa(p1.proteins.at(i)[j+k]); int index=aas.get_aa(p1.proteins.at(i)[j+k]);
...@@ -413,6 +442,58 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in ...@@ -413,6 +442,58 @@ void do_counts(Mproteome p1, Mproteome p2, int tag_length, vector<int>& array,in
// k=tag_length; // k=tag_length;
// j+=k-1; // j+=k-1;
// } // }
}
}
}
}
else if(partial_run)
{
for(int i=0; i<p1.proteins.size(); i++) // for all proteins in proteome p1
{
for(int j=0; j<p1.proteins.at(i).length(); j++) // for all positions in given protein
{
if(p1.proteins.at(i)[j]==partial_run_lead)
{
string s="";
double score=0.0;
for(int k=0; k<tag_length && (j+k)<p1.proteins.at(i).length(); k++)
{
// bool composition_ok=check_composition(p1.proteins.at(i).substr(k,tag_length),tag_length,aas,SOS_CUTOFF);
// if(composition_ok)
// {
s+=p1.proteins.at(i)[j+k];
int index=aas.get_aa(p1.proteins.at(i)[j+k]);
if(index>=0 && index<20)
{
score+=p1.bit_scores[index]+p2.bit_scores[index];
array[(int)(score+.5)]+=1;
if( (score+.5)<min || min==0)
{
min=score+.5;
//cout<<"min score = "<<min<<" sequence = "<<s<<endl;
}
if( (score+.5)>max)
{
max=score+.5;
//cout<<"max score = "<<max<<" sequence = "<<s<<endl;
uscore=max;
most_unlikely=s;
}
}
else
{
k=tag_length;
j+=k-1;
}
// }
// else
// {
// k=tag_length;
// j+=k-1;
// }
}
}
} }
} }
} }
...@@ -541,3 +622,21 @@ int calc_sos_cutoff(string path,string bfn) ...@@ -541,3 +622,21 @@ int calc_sos_cutoff(string path,string bfn)
instream.close(); instream.close();
return result; return result;
} }
char get_pr_lead(string path, string bfn)
{
ifstream instream;
string infile = path+"/"+bfn+"/TAGS/TAGS0.txt";
instream.open(infile.c_str());
if(instream.fail())
{
cout<<"In function tmerg::get_pr_lead(): Failed to open infile to "<<infile<<endl;
exit(1);
}
int num_tags;
instream>>num_tags;
string first_tag;
instream>>first_tag;
instream.close();
return first_tag[0];
}
#ifndef FILTER_H
#define FILTER_H
/* /*
A filter for mobile elements. SlopeTree CONSERVATION FILTER.
filter: Filter:
Reads in the merged list of kmers (produced by tmerg) and creates a filtered set of proteins Reads in the merged list of kmers (produced by tmerg)
in which proteins with kmers exhibiting unusual copy number patterns are removed. and creates a filtered set of proteins in which proteins
with kmers exhibiting unusual copy number patterns are
removed.
*/ */
#ifndef FILTER_H
#define FILTER_H
#include <iostream> #include <iostream>
#include <vector> #include <vector>
#include <sstream> #include <sstream>
......
...@@ -78,6 +78,8 @@ class mctr ...@@ -78,6 +78,8 @@ class mctr
amino_acids aas; amino_acids aas;
util u; util u;
int run_type; //0=normal run, 1=ref_set_run, 2=final pair run int run_type; //0=normal run, 1=ref_set_run, 2=final pair run
bool partial_run;
char partial_run_lead;
//Variables necessary for HGT correction on subsequent passes through mctr. //Variables necessary for HGT correction on subsequent passes through mctr.
int prob1; //ordinal of one of the 2 problematic proteomes in the whole run. int prob1; //ordinal of one of the 2 problematic proteomes in the whole run.
...@@ -151,10 +153,11 @@ class mctr ...@@ -151,10 +153,11 @@ class mctr
void populate_bitterv(); void populate_bitterv();
int get_bitterv_index(int i1, int i2); int get_bitterv_index(int i1, int i2);
int get_sos_cutoff(); int get_sos_cutoff();
char get_pr_lead();
public: public:
//constructors //constructors
mctr(string full_path_par,string dir_par, int filtering_steps_par, int filtering_option_par); //Use for the main run when doing entire set. mctr(string full_path_par,string dir_par, int filtering_steps_par, int filtering_option_par,bool partial_run_par); //Use for the main run when doing entire set.
mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_options_par, int prob1_par, int prob2_par,vector<string>& ref_tags_par); //Use when testing a pair against a reference set. mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_options_par, int prob1_par, int prob2_par,vector<string>& ref_tags_par); //Use when testing a pair against a reference set.
mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par, int prob1_par, int prob2_par); //Use when running two proteomes, HGT-genes removed, against each other. Final step. mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par, int prob1_par, int prob2_par); //Use when running two proteomes, HGT-genes removed, against each other. Final step.
...@@ -171,13 +174,18 @@ class mctr ...@@ -171,13 +174,18 @@ class mctr
//Count all matches between all pairs present in a MERGED_TAGS directory (and also the corresponding MERGED_TAGSscr directory, but has to be called on the data separately using a different object). //Count all matches between all pairs present in a MERGED_TAGS directory (and also the corresponding MERGED_TAGSscr directory, but has to be called on the data separately using a different object).
//This could be for a set without filtering (e.g. MERGED_TAGS plain) or with filering (e.g. MERGED_TAGS_15_8). //This could be for a set without filtering (e.g. MERGED_TAGS plain) or with filering (e.g. MERGED_TAGS_15_8).
//Without filtering: filtering_steps_par=-1, filtering_option_par=-1 //Without filtering: filtering_steps_par=-1, filtering_option_par=-1
mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par) mctr::mctr(string full_path_par, string dir_par, int filtering_steps_par, int filtering_option_par,bool partial_run_par)
{ {
cout<<"mctr::mctr(string,string,int,int)"<<endl; cout<<"mctr::mctr(string,string,int,int)"<<endl;
run_type=0; run_type=0;
partial_run=partial_run_par;
//main variables. //main variables.
u.extract_paths(full_path_par,path,bfn); u.extract_paths(full_path_par,path,bfn);
if(partial_run)
{
partial_run_lead=get_pr_lead();
}
u.get_rss(path+"/"+bfn+"/"+bfn+"_info.txt",refsetsize); u.get_rss(path+"/"+bfn+"/"+bfn+"_info.txt",refsetsize);
u.get_tl(path+"/"+bfn+"/"+bfn+"_info.txt",tag_length); u.get_tl(path+"/"+bfn+"/"+bfn+"_info.txt",tag_length);
mbkl=int(MBKL*(double)tag_length); mbkl=int(MBKL*(double)tag_length);
...@@ -404,15 +412,52 @@ void mctr::run_it0() ...@@ -404,15 +412,52 @@ void mctr::run_it0()
} }
//parallelize command. //parallelize command.
for(int i=0; i<20; i++) if(!partial_run)
{
for(int i=0; i<20; i++)
{
//Coptim1_3Dmat* corr_array_temp; //when it's parallelized, this will be set to zero, filled by each thread, then added into the real one at the end with a lock on it
all_matches.clear();
ofstream aastr;
open_str(aam_directory,i,aastr);
ofstream astr;
open_str(am_directory,i,astr);
//for(int n=0; n<all_matches.size(); n++)
//{
//all_matches_stream<<all_matches.at(n)<<endl;
//}
//all_matches.clear();
array.clear();
gnums.clear();
gene_ids.clear();
set_array_it0(i);
cout<<"First 3 rows of array: "<<endl;
cout<<array.at(0)<<" "<<gnums.at(0)<<" "<<gene_ids.at(0)<<endl;
cout<<array.at(1)<<" "<<gnums.at(1)<<" "<<gene_ids.at(1)<<endl;
cout<<array.at(2)<<" "<<gnums.at(2)<<" "<<gene_ids.at(2)<<endl;
cout<<"Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<" "<<gnums.at(gnums.size()-1)<<" "<<gene_ids.at(gene_ids.size()-1)<<endl;
cout<<"Second to last row of array: "<<endl;
cout<<array.at(array.size()-2)<<endl;
cout<<"Last row of array: "<<endl;
cout<<array.at(array.size()-1)<<endl;
cout<<"Entering recursive function find_matches_it0(...)..."<<endl;
find_matches_it0(0,0,array.size()-1,"", aastr, astr);
cout<<"Escaped recursive function find_matches(...)!"<<endl;
astr.close();
}
}
else if(partial_run)
{ {
//Coptim1_3Dmat* corr_array_temp; //when it's parallelized, this will be set to zero, filled by each thread, then added into the real one at the end with a lock on it //Coptim1_3Dmat* corr_array_temp; //when it's parallelized, this will be set to zero, filled by each thread, then added into the real one at the end with a lock on it
all_matches.clear(); all_matches.clear();
ofstream aastr; ofstream aastr;
open_str(aam_directory,i,aastr); open_str(aam_directory,aas.get_aa(partial_run_lead),aastr);
ofstream astr; ofstream astr;
open_str(am_directory,i,astr); open_str(am_directory,aas.get_aa(partial_run_lead),astr);
//for(int n=0; n<all_matches.size(); n++) //for(int n=0; n<all_matches.size(); n++)
//{ //{
//all_matches_stream<<all_matches.at(n)<<endl; //all_matches_stream<<all_matches.at(n)<<endl;
...@@ -422,7 +467,7 @@ void mctr::run_it0() ...@@ -422,7 +467,7 @@ void mctr::run_it0()
array.clear(); array.clear();
gnums.clear(); gnums.clear();
gene_ids.clear(); gene_ids.clear();
set_array_it0(i); set_array_it0(aas.get_aa(partial_run_lead));
cout<<"First 3 rows of array: "<<endl; cout<<"First 3 rows of array: "<<endl;
cout<<array.at(0)<<" "<<gnums.at(0)<<" "<<gene_ids.at(0)<<endl; cout<<array.at(0)<<" "<<gnums.at(0)<<" "<<gene_ids.at(0)<<endl;
cout<<array.at(1)<<" "<<gnums.at(1)<<" "<<gene_ids.at(1)<<endl; cout<<array.at(1)<<" "<<gnums.at(1)<<" "<<gene_ids.at(1)<<endl;
...@@ -436,7 +481,7 @@ void mctr::run_it0() ...@@ -436,7 +481,7 @@ void mctr::run_it0()
cout<<"Entering recursive function find_matches_it0(...)..."<<endl; cout<<"Entering recursive function find_matches_it0(...)..."<<endl;
find_matches_it0(0,0,array.size()-1,"", aastr, astr); find_matches_it0(0,0,array.size()-1,"", aastr, astr);
cout<<"Escaped recursive function find_matches(...)!"<<endl; cout<<"Escaped recursive function find_matches(...)!"<<endl;
astr.close(); astr.close();
} }
cout<<"Done with mctr main algorithm."<<endl; cout<<"Done with mctr main algorithm."<<endl;
...@@ -3533,6 +3578,24 @@ int mctr::get_sos_cutoff() ...@@ -3533,6 +3578,24 @@ int mctr::get_sos_cutoff()
return 6.5*tag_length; return 6.5*tag_length;
} }
char mctr::get_pr_lead()
{
ifstream instream;
string infile = path+"/"+bfn+"/TAGS/TAGS0.txt";
instream.open(infile.c_str());
if(instream.fail())
{
cout<<"In function tmerg::get_pr_lead(): Failed to open infile to "<<infile<<endl;
exit(1);
}
int num_tags;
instream>>num_tags;
string first_tag;
instream>>first_tag;
return first_tag[0];
instream.close();
}
/*void mctr::fill_genesOI() /*void mctr::fill_genesOI()
{ {
ifstream instream; ifstream instream;
......
...@@ -12,6 +12,7 @@ int main(int argc, char* argv[]) ...@@ -12,6 +12,7 @@ int main(int argc, char* argv[])
if(argc<2) if(argc<2)
{ {
cout<<"Wrong input to mdist.cpp: ./mdist -p <full path> -f <filtering steps, =0 for unfiltered (e.g. all) data> -o <filtering option, =0 for unfiltered (e.g. all) data>"<<endl; cout<<"Wrong input to mdist.cpp: ./mdist -p <full path> -f <filtering steps, =0 for unfiltered (e.g. all) data> -o <filtering option, =0 for unfiltered (e.g. all) data>"<<endl;
exit(1);
} }
string full_path = argv[1]; string full_path = argv[1];
......
#include <iostream>
#include <fstream>
//#include "mefilter.h"
#include "util.h"
using namespace std;
void populate(vector<string> files,vector<string>& proteins,vector<string>& info_lines);
int get_gene_id(string s);
void read_in_conserved_kmers(string consdirectory,vector<string>& conserved_kmers);
int main(int argc, char* argv[])
{
if(argc<2)
{
cout<<"Wrong use of mefilter.cpp: ./mef <full_path_to_run>"<<endl;
exit(1);
}
string full_path=argv[1];
string consdirectory=argv[2];
cout<<"full_path="<<full_path;
util u;
string path,bfn;
cout<<"extracting paths..."<<endl;
u.extract_paths(full_path,path,bfn);
cout<<"path="<<path<<endl;
cout<<"bfn="<<bfn<<endl;
ofstream logstream;
cout<<"Opening mefilter.cpp log file..."<<endl;
string logfile = path+"/"+bfn+"/logfile_mefilter.txt";
logstream.open(logfile.c_str());
if(logstream.fail())
{
cout<<"Failed to open logstream to logfile="<<logfile<<endl;
exit(1);
}
else
{
cout<<"Successfully opened logstream to logfile="<<logfile<<endl;
logstream<<"full_path="<<full_path<<endl;
logstream<<"path="<<path<<endl;
logstream<<"bfn="<<bfn<<endl;
}
int tag_length;
u.get_tl2(full_path,tag_length);
logstream<<"tag_length="<<tag_length<<endl;
int total_size;
total_size = u.get_ts(full_path); //total number of organisms.
logstream<<"total_size="<<total_size<<endl;
cout<<"total_size="<<total_size<<endl;
vector<string> conserved_kmers();
read_in_conserved_kmers(consfile,conserved_kmers);
logstream<<"Starting loop through all genome ordinals..."<<endl;
cout<<"mefilter.cpp: Starting loop through all genome ordinals..."<<endl;
for(int i=0; i<total_size; i++)
{
//ordinal=i.
logstream<<"i="<<i<<endl;
cout<<"Doing ordinal i="<<i<<" out of "<<total_size<<endl;
vector<string> files = u.get_files(full_path,i);
cout<<"Size of files = "<<files.size()<<endl;
vector<string> proteins;
vector<string> info_lines;
populate(files,proteins,info_lines);
// vector<string> tags = u.get_tags(full_path,i);
vector<string> tags1;
vector<int> tags2;
vector<int> tags3;
u.get_tags(full_path,i,tags1,tags2,tags3);
cout<<"size of files = "<<files.size()<<endl;
cout<<"size of tags1 = "<<tags1.size()<<endl;
cout<<"size of tags2 = "<<tags2.size()<<endl;
cout<<"size of tags3 = "<<tags3.size()<<endl;
vector<int> gids;
for(int i=0; i<proteins.size(); i++)
{
gids.push_back(0);
}
//At this stage, all proteins and their info lines have been read in and are stored in the info_lines and proteins vectors. Similarly, all tags have been read in and are stored in tags.
int lindex=0;
int rindex=1;
while(lindex<tags1.size() && rindex<tags1.size())
{
if(tags1.at(lindex)[tag_length-1]!='^')
{
while(rindex<tags1.size() && tags1.at(lindex)==tags1.at(rindex))
{
rindex++;
}
}
int findex=rindex-1;
if(findex>lindex) //no cluster.
{
cout<<"Matches: "<<endl;
for(int j=lindex; j<=findex; j++)
{
int gid=tags3.at(j);
gids.at(gid)++;
cout<<tags1.at(j)<<endl;
cout<<info_lines.at(gid)<<endl;
}
cout<<endl;
}
lindex=rindex;
rindex=lindex+1;
}
cout<<"Gids vector: "<<endl;
for(int i=0; i<gids.size(); i++)
{
cout<<i<<" "<<gids.at(i)<<endl;
}
int ME_count=0;
for(int i=0; i<gids.size(); i++)
{
if(gids.at(i)>0)// && gids.at(i)>=(double)proteins.at(i).length()/2.0)
{