Commit e1929e68 authored by Raquel Bromberg's avatar Raquel Bromberg

Entropy calculation moved to its own program dentrop.cpp. 3 scripts added:...

Entropy calculation moved to its own program dentrop.cpp. 3 scripts added: dSTM.sh which runs SlopeTree from the info file generation through to the generation of the distance matrix; and dMT.sh and pFilt.sh which provide intermediate files for filtering procedures.
parent f84d0ba9
#!/bin/sh
# $1 = full path to directory containing FAA directory
# $2 = tag length
# $3 = 'B' for bacteria, 'A' for archaea, 'O' for other
./mif $1 $2
./sttag $1 -f 1 -s $3
./tmerg $1
#!/bin/sh
# $1 = full path to directory containing FAA directory
# $2 = tag length
# $3 = 'B' for bacteria, 'A' for archaea, 'O' for other
# $4 = path to directory containing names.dmp and nodes.dmp
./mif $1 $2
./sttag $1 -f 1 -s $3
wait
./tmerg $1
./cm $1
./dbc $1
./mtax $1 $4
./dent $1
./mdist $1
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <vector>
#include <cmath>
#include "util.h"
using namespace std;
void get_hist_data(string i, int& total_size_i, vector<int>& aa_array_i,string path_frag);
int main(int argc, char* argv[])
{
if(argc<2)
{
cout<<"Wrong use of dentrop. ./dentrop <full path to run>"<<endl;
exit(1);
}
string full_path=argv[1];
util u;
string path,bfn;
u.extract_paths(full_path,path,bfn);
cout<<"here4"<<endl;
int main_set_size,ref_set_size;
cout<<"here1"<<endl;
u.get_mss(path+"/"+bfn+"/"+bfn+"_info.txt",main_set_size);
cout<<"here2"<<endl;
u.get_rss(path+"/"+bfn+"/"+bfn+"_info.txt",ref_set_size);
cout<<"here3"<<endl;
ofstream outstream;
outstream.open( (path+"/"+bfn+"/"+bfn+"_entropies.txt").c_str());
if(outstream.fail())
{
cout<<"Failed to open outstream in info_file::write_out_aaentropies()"<<endl;
exit(1);
}
string path_frag=path+"/"+bfn+"/HIST/"+bfn+"_";
for(int i=0; i<ref_set_size+main_set_size-1; i++)
{
int total_size_i;
vector<int> aa_array_i;
get_hist_data(u.int_to_string(i),total_size_i,aa_array_i,path_frag);
for(int j=i+1; j<ref_set_size+main_set_size; j++)
{
int total_size_j;// = organisms.at(j).get_total_num_aa();
vector<int> aa_array_j;// = organisms.at(j).get_aa_array();
get_hist_data(u.int_to_string(j),total_size_j,aa_array_j,path_frag);
double entropy=0;
for(int k=0; k<aa_array_j.size(); k++)
{
double pi = (((double)aa_array_i.at(k)/(double)total_size_i)+((double)aa_array_j.at(k)/(double)total_size_j))/2.0;
entropy+=pi*log(pi);
}
entropy*=-1.0;
outstream<<i<<" "<<j<<" "<<entropy<<endl;
}
}
outstream.close();
}
void get_hist_data(string i, int& total_size_i, vector<int>& aa_array_i,string path_frag)
{
ifstream instream;
path_frag=path_frag+i+"_hist.txt";
instream.open(path_frag.c_str());
if(instream.fail())
{
cout<<"Failed to open instream to path_frag="<<path_frag<<endl;
exit(1);
}
int junk;
instream>>junk>>total_size_i;
for(int i=0; i<20; i++)
{
int aa_count;
double aa_freq;
instream>>aa_count>>aa_freq;
aa_array_i.push_back(aa_count);
}
instream.close();
}
#!/bin/sh
# $1 = full path to directory containing FAA directory
# $2 = tag length
# $3 = 'B' for bacteria, 'A' for archaea, 'O' for other
./mif $1 $2
./sttag $1 -f 1 -s $3
./tmerg $1
./filt $1
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment