Commit b06e34d3 authored by David Trudgian's avatar David Trudgian

Few more optimizns

parent 3789dd5b
......@@ -9,7 +9,7 @@ sttag: sttag.cpp sttagger.h org.h util.h amino_acids.h
g++ $(CPPFLAGS) sttag.cpp -fopenmp -o sttag
tmerg: tmerg.cpp tmerg.h
g++ $(CPPFLAGS) tmerg.cpp -o tmerg
g++ $(CPPFLAGS) tmerg.cpp -fopenmp -o tmerg
filt: filter.cpp filter.h
g++ $(CPPFLAGS) filter.cpp -o filt
......
......@@ -5,6 +5,7 @@
#include <fstream>
#include <vector>
#include <algorithm>
#include <numeric>
#include <set>
#include <cstdlib>
#include <sstream>
......@@ -182,12 +183,16 @@ void proteome_v2::mark_keepers(vector<string> keepers)
for(int i=0; i<proteins.size(); i++)
{
for(int j=0; j<proteins.at(i).get_sequence().length(); j++)
string prot_seq = proteins.at(i).get_sequence();
int prot_length = prot_seq.length();
for(int j=0; j<prot_length; j++)
{
for(int f=0; f<keepers.size(); f++)
{
// if(keepers.at(f)==proteins.at(i).get_sequence().substr(j,keepers.at(f).length()))
if(keeper_present(keepers.at(f),proteins.at(i).get_sequence().substr(j,keepers.at(f).length())))
if(keeper_present(keepers.at(f), prot_seq.substr(j,keepers.at(f).length())))
{
cout<<"Marking keeper: i="<<i<<endl<<keepers.at(f)<<endl<<proteins.at(i).get_info_line()<<endl<<proteins.at(i).get_sequence()<<endl;
proteins.at(i).mark_keeper();
......@@ -1613,38 +1618,47 @@ void proteome_v2::write_out_gene(int index)
bool proteome_v2::is_valid(string s)
{
std::size_t found = s.find_first_not_of(aas.valid_aas);
if(found != std::string::npos ){
return false;
}
if(SUMOFSQUARES)
if(SUMOFSQUARES)
{
bool result = check_composition(s);
return(true & result);
return check_composition(s);
}else{
std::size_t found = s.find_first_not_of(aas.valid_aas);
if(found != std::string::npos ){
return false;
}
}
return true;
}
bool proteome_v2::check_composition(string s)
{
vector<int> array;
for(int i=0; i<20; i++)
int array[20];
for(int i=0; i<20; i++)
{
char aa = aas.get_aa(i);
int aa_count = std::count(s.begin(),s.end(),aa);
array.push_back( aa_count );
array[i] = aa_count;
}
int aa_total=0;
int result=0;
// for(int i=0; i<s.length(); i++)
for(int i=0; i<array.size(); i++)
for(int i=0; i<20; i++)
{
aa_total += array[i];
result+=array[i]*array[i];
}
// Invalid characters?
if( aa_total < s.length() )
{
return false;
}
if(result>=0 && result<sosv.size())
{
sosv.at(result)++;
......@@ -1877,22 +1891,13 @@ bool proteome_v2::keeper_present(string s1,string s2)
int count_mismatches=0;
for(int i=0; i<s1.length(); i++)
{
if(s1[i]!=s2[i])
{
count_mismatches++;
}
count_mismatches += (s1[i] != s2[i]);
}
double val = (double)count_mismatches/(double)s1.length();
if(val>KEEPERS_CUTOFF)
{
return false;
}
else
{
return true;
}
return val<KEEPERS_CUTOFF;
}
void proteome_v2::wokl(int id, ofstream& outstream)
......
......@@ -17,6 +17,7 @@ into a final sorted list.
#include <cstdlib>
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include "file_info.h"
#include "amino_acids.h"
......@@ -202,8 +203,14 @@ void tmerg::read_in_tags(vector<string>& tags, char current_amino_acid, int& num
//Need this number because a set of merged tags for this set is also written out
//and the total is always included at the top fo these merged files.
#pragma omp parallel for
for(int i=0; i<fiv.size(); i++)
{
int file_refs = 0;
vector<string> file_tags;
// Educated guess to cut down vector resziing
file_tags.reserve( fiv.at(i)->total_tags / 20);
cout<<"tmerg::read_in_tags(vector<string>, char, int&) : current_amino_acid = "<<current_amino_acid<<endl;
if(TMERG_DEBUG)
{
......@@ -217,14 +224,14 @@ void tmerg::read_in_tags(vector<string>& tags, char current_amino_acid, int& num
while( (fiv.at(i)->strings_read_in<fiv.at(i)->total_tags) && fiv.at(i)->s[0]==current_amino_acid)
{
string ctag = fiv.at(i)->get_current_line();
tags.push_back(ctag);
file_tags.push_back(ctag);
istringstream iss(ctag);
string e;
int gnum;
iss>>e>>gnum;
if(gnum<ref_set_size)
{
num_refs++;
file_refs++;
}
fiv.at(i)->read_line();
......@@ -234,6 +241,14 @@ void tmerg::read_in_tags(vector<string>& tags, char current_amino_acid, int& num
cout<<"counter = "<<counter<<" ctag = "<<ctag<<endl;
}
}
#pragma omp critical
{
num_refs += file_refs;
tags.reserve(tags.size() + file_tags.size());
tags.insert(tags.end(), file_tags.begin(), file_tags.end());
}
cout<<"tmerg::read_in_tags(vector<string>, char, int&) : ";
cout<<"Merged "<<i+1<<" out of "<<fiv.size()<<". tags.size() = "<<tags.size()<<endl;
if(TMERG_DEBUG)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment