This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository proteinortho.

commit a6e05f7c0c82073170f52b8322ca933e2c160488
Author: Andreas Tille <[email protected]>
Date:   Wed May 4 14:50:54 2016 +0200

    Imported Upstream version 5.13+dfsg
---
 Makefile                      |  17 +++---
 manual.html                   |   6 +-
 po2tree.pl                    |   4 +-
 po_tree.c                     |   8 +--
 proteinortho5.pl              |   4 +-
 proteinortho5_clean_edges.cpp | 136 ------------------------------------------
 proteinortho5_clustering.cpp  |  80 ++++++++++++++++++++++---
 7 files changed, 90 insertions(+), 165 deletions(-)

diff --git a/Makefile b/Makefile
index 3c8b861..648850e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,21 +1,18 @@
 
 INSTALLDIR=/usr/local/bin
 
-CPP      = g++
-CPPFLAGS   = -Wall -O3
+CPP = g++
+CPPFLAGS += -Wall -O3 -Wno-unused-result
 
-all: proteinortho5_clustering proteinortho5_clean_edges proteinortho5_tree
+all: proteinortho5_clustering proteinortho5_tree
 
 proteinortho5_clustering: proteinortho5_clustering.cpp
-       $(CPP) $(CPPFLAGS) -o $@ $<
+       $(CPP) $(CPPFLAGS) $(LDFLAGS) -o $@ $<
 
-proteinortho5_tree: proteinortho5_clustering.cpp
-       $(CPP) $(CPPFLAGS) -o $@ $<
+proteinortho5_tree: po_tree.c
+       $(CPP) $(CPPFLAGS) $(LDFLAGS) -o $@ $<
 
-proteinortho5_clean_edges: proteinortho5_clean_edges.cpp
-       $(CPP) $(CPPFLAGS) -o $@ $<
-
-install: proteinortho5.pl proteinortho5_clustering proteinortho5_singletons.pl 
proteinortho5_clean_edges2.pl ffadj_mcs.py po_tree po_tree.pl
+install: proteinortho5.pl proteinortho5_clustering proteinortho5_singletons.pl 
proteinortho5_clean_edges2.pl ffadj_mcs.py po2tree.pl proteinortho5_tree
        install -v $^ $(INSTALLDIR)
 
 test: proteinortho5.pl proteinortho5_clustering
diff --git a/manual.html b/manual.html
index fac0228..8ae8d68 100644
--- a/manual.html
+++ b/manual.html
@@ -4,7 +4,7 @@
        </head>
 <body>
 <h1>Proteinortho Manual / PoFF Manual</h1>
-<small>This manual corresponds to version 5.12</small>
+<small>This manual corresponds to version 5.13</small>
 <h2>Introduction</h2>
 Proteinortho is a tool to detect orthologous genes within different species.
 For doing so, it compares similarities of given gene sequences and clusters 
them to find significant groups.
@@ -38,8 +38,8 @@ The sources come with a precompiled version of Proteinortho 
for 64bit Linux. If
 <h3>Building and installing from source</h3>
 <ul>
 <li> Fetch the latest source code archive from <a 
href="http://www.bioinf.uni-leipzig.de/Software/proteinortho/"; 
target="_blank">www.bioinf.uni-leipzig.de/Software/proteinortho/</a>.
-<li> Extract the files e.g. via <code>tar -xzvf 
proteinortho_v5.06.tar.gz</code>
-<li> Change directory into the extracted folder e.g. via <code>cd 
proteinortho_v5.06</code>
+<li> Extract the files e.g. via <code>tar -xzvf 
proteinortho_v5.13.tar.gz</code>
+<li> Change directory into the extracted folder e.g. via <code>cd 
proteinortho_v5.13</code>
 <li> You can now run <code>./proteinortho5.pl</code> directly
 <li> If you want to recompile and install Proteinortho, type <code>make</code> 
followed <code>sudo make install</code> (requires root privileges).
 <li> In any case, run <code>make test</code> to make sure Proteinortho works 
as expected
diff --git a/po2tree.pl b/po2tree.pl
index 77d5435..e1a0590 100755
--- a/po2tree.pl
+++ b/po2tree.pl
@@ -34,7 +34,7 @@
 # @email [email protected]
 # @company Bioinformatics, University of Leipzig
 # @version 3.10
-# @date 08-12-2015
+# @date 16-03-2016
 #
 
##########################################################################################
 
@@ -142,7 +142,7 @@ system("cat '$file.tmp.matrix2' >>'$file.tmp.matrix'");
 
 # Run the main algorithm in C
 print STDERR "Calculating tree...\n";
-my $run = $scriptpath."po_tree";
+my $run = $scriptpath."proteinortho5_tree";
 my $out = qx($run $ARGV[0].tmp.matrix);
 $out =~ s/\[.+?\]//g;
 $out =~ s/\s:/:/g;
diff --git a/po_tree.c b/po_tree.c
index 8b6604d..769cf85 100644
--- a/po_tree.c
+++ b/po_tree.c
@@ -29,7 +29,7 @@
  *  @email [email protected]
  *  @company Bioinformatics, University of Leipzig
  *  @version 1.10 (for PO5)
- *  @date 2016-02-22
+ *  @date 2016-03-16
  */
 
 
@@ -105,7 +105,7 @@ int main(int argc, char** argv) {
 void builder(BOOL** matrix, Uint viecher, Uint ccs, char*** pnamen) {
        /* affinity matrix */
        int** aff = (int**)malloc(sizeof(int*)*viecher);
-       int i;
+       unsigned int i;
        /* allocate further memory for affinity matrix */
        for (i = 0; i<viecher; i++) {
                aff[i] = (int*)malloc(sizeof(int)*viecher);
@@ -146,7 +146,7 @@ void builder(BOOL** matrix, Uint viecher, Uint ccs, char*** 
pnamen) {
  *  returns:  nothing, but all involved data-structures represent the merge 
afterwards correctly
  */
 void merge (Uint* max, Uint ccs, Uint viecher, BOOL* away, BOOL** matrix, 
int** aff, char*** pnamen) {
-       int i, counter = 0;     
+       unsigned int i, counter = 0;    
 
        /* only keep components that are present in both species */
        /* foreach component */ 
@@ -199,7 +199,7 @@ void merge (Uint* max, Uint ccs, Uint viecher, BOOL* away, 
BOOL** matrix, int**
        baum = strcat(baum,(*pnamen)[max[1]]);
        baum = strcat(baum,":");
        baum = strcat(baum,length_1);
-/      baum = strcat(baum,")");
+       baum = strcat(baum,")");
 //     baum = strcat(baum,")[");
 //     baum = strcat(baum,anz);
 //     baum = strcat(baum,"]");
diff --git a/proteinortho5.pl b/proteinortho5.pl
index 719e64f..22bbe97 100755
--- a/proteinortho5.pl
+++ b/proteinortho5.pl
@@ -30,7 +30,7 @@
 # @author Marcus Lechner
 # @email [email protected]
 # @company University of Maruburg
-# @date 2016-02-22
+# @date 2016-04-22
 #
 
##########################################################################################
 
@@ -47,7 +47,7 @@ use Thread::Queue;
 
##########################################################################################
 # Variables
 
##########################################################################################
-our $version = "5.12";
+our $version = "5.13";
 our $step = 0;         # 0/1/2/3       -> do all / only apply step 1 / only 
apply step 2 / only apply step 3
 our $verbose = 1;      # 0/1           -> don't / be verbose
 our $debug = 0;                # 0/1           -> don't / show debug data
diff --git a/proteinortho5_clean_edges.cpp b/proteinortho5_clean_edges.cpp
deleted file mode 100644
index 0f81b3a..0000000
--- a/proteinortho5_clean_edges.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <iostream>
-#include <sstream>
-#include <fstream>
-#include <string>
-#include <map>
-#include <vector>
-
-using namespace std;
-
-int parse_excluded_edges(string filename, map<string, int> &excluded_edges);
-void tokenize(const string& str, vector<string>& tokens, const string& 
delimiters);
-int rewrite_graph(const string in, map<string, int> exclude);
-string chomp(string &str);
-
-
-int main(const int argc, char *argv[]) {
-
-       string usage = "USAGE: "+string(argv[0])+" -e <EXCLUSION FILE> GRAPH1 
GRAPH2 ...";
-       
-       vector<string> graphs;
-       map<string, int> exclude;
-       for(int paras = 1; paras < argc; paras++) {
-               string arg = string(argv[paras]);
-               if( argc < 4 || arg == "-h" || arg == "--help") {
-                       cout << usage;
-                       return 0;
-               } else if(arg == "-e") {
-                       parse_excluded_edges(string(argv[++paras]), exclude);
-               } else {
-                       graphs.push_back(arg);
-               }
-       }
-
-       for(vector<string>::iterator it = graphs.begin(); it != graphs.end(); 
it++) {
-               rewrite_graph(*it, exclude);
-       }
-       
-       for(map<string, int>::iterator it = exclude.begin(); it != 
exclude.end(); it++) {
-               //cout << it->first << "\n";
-       }
-       
-}
-
-int rewrite_graph(const string in, map<string, int> exclude) {
-       ifstream graph_file(in.c_str());
-       if(!graph_file.is_open()) {
-               cerr << "could not read input file "+in;
-               return -1;
-       }       
-       
-       int count = 0;
-       string line;
-       while (getline(graph_file, line)) {
-               chomp(line);
-               vector<string> fields;
-               tokenize(line, fields, "\t");
-               if(fields.size() > 1 && fields[0].substr(0, 1) != "#") {
-                       string a = fields[0];
-                       string b = fields[1];
-                       string c = b+" "+a;
-                       if(a.compare(b) < 0 ) {
-                               c = a+" "+b;
-                       }
-                       //cout << c << endl;
-                       if(exclude.count(c) == 1) {
-                               count++;
-                               continue;
-                       }
-               }
-               
-               cout << line << endl;
-       }
-
-       graph_file.close();
-       
-       cerr << "# excluded " << count << " edges from the graph." << endl;
-       return 0;
-}
-
-string chomp(string &str) {
-       string::size_type pos = str.find_last_not_of("\n\r \t");
-       if(pos != string::npos) {
-               str = str.substr(0, pos+1);
-       }
-       return str;
-}
-    
-int parse_excluded_edges(string filename, map<string, int> &excluded_edges) {  
  
-    ifstream data(filename.c_str());
-    if(!data.is_open()) {
-       cerr << "could not open file"+filename;
-    }
-
-    string line;
-    while(getline(data,line)) {
-        
-        chomp(line);
-        vector<string> v;
-        tokenize(line, v, "\t");
-        
-        if(v.size() < 2) {
-               continue;
-        }
-        string a = v[0];
-        string b = v[1];
-        string c = b + " " + a;
-
-        if(a.compare(b) < 0) {
-               c = a + " " + b;
-        }
-        //cout << c << endl;
-               excluded_edges[c] = 1;
-    }
-    
-    data.close();
-    
-    return 0;
-}
-
-// Split a string at a certain delim
-void tokenize(const string& str, vector<string>& tokens, const string& 
delimiters = "\t") {
-    // Skip delimiters at beginning.
-    string::size_type lastPos = str.find_first_not_of(delimiters, 0);
-    // Find first "non-delimiter".
-    string::size_type pos = str.find_first_of(delimiters, lastPos);
-
-    while (string::npos != pos || string::npos != lastPos) {
-        // Found a token, add it to the vector.
-        tokens.push_back(str.substr(lastPos, pos - lastPos));
-        // Skip delimiters.  Note the "not_of"
-        lastPos = str.find_first_not_of(delimiters, pos);
-        // Find next "non-delimiter"
-        pos = str.find_first_of(delimiters, lastPos);
-    }
-}
-
diff --git a/proteinortho5_clustering.cpp b/proteinortho5_clustering.cpp
index a7bbba1..6e07ae5 100755
--- a/proteinortho5_clustering.cpp
+++ b/proteinortho5_clustering.cpp
@@ -3,7 +3,7 @@
  *     Reads edge list and splits connected components
  *     according to algebraic connectivity threshold
  *
- *     Last updated: 2014/07/07                
+ *     Last updated: 2016/04/22                
  *     Author: Marcus Lechner
  */
 
@@ -17,6 +17,7 @@
 #include <vector>
 #include <stack>
 #include <iomanip>
+#include <cstdlib>
 using namespace std;
 
 // Structs
@@ -252,7 +253,7 @@ void partition_graph() {
                // Connectivity analysis
                float connectivity = getConnectivity(current_group);
 
-               if (connectivity < param_con_threshold) {
+               if (connectivity < param_con_threshold && current_group.size() 
> 3) {
                        // Split groups is done in getConnectivity function
                        // Reset flags and repeat without incrementing protein 
counter
                        for (unsigned int i = 0; i < current_group.size(); i++) 
done[current_group[i]] = false;
@@ -260,7 +261,7 @@ void partition_graph() {
                }
        
                // Output
-               print_group(current_group,connectivity);
+               if (connectivity >= param_con_threshold) 
{print_group(current_group,connectivity);}
 
                // Print stats
                stats(protein_id,protein_counter);
@@ -300,6 +301,20 @@ vector<unsigned int> get_deg_one (vector<unsigned int>& 
nodes) {
 vector<float> generate_random_vector(const unsigned int size) {
        vector<float> x(size);
        for (unsigned int i = 0; i < size; i++) {
+         x[i] = (float)(rand() % 999+1)/1000;  // 1 bis 99
+               // at least one value must be different from the others but 
still within 0 and 1
+               if (i > 0 && x[i] == x[i-1]) {
+                       x[i] /= 3;
+               }
+//             cerr << x[i] << endl;
+       }
+       return x;
+}
+
+// Generate random vector x of size size
+vector<float> generate_random_vector_old(const unsigned int size) {
+       vector<float> x(size);
+       for (unsigned int i = 0; i < size; i++) {
          x[i] = (float)rand()/RAND_MAX;
                // at least one value must be different from the others but 
still within 0 and 1
                if (i > 0 && x[i] == x[i-1]) {
@@ -361,8 +376,43 @@ vector<float> getY(float max_degree, vector<float> x_hat, 
vector<float> x_new, v
        return x_hat;
 }
 
+
 // Remove edges connectiong two groups a and b
 void removeExternalEdges(map<unsigned int,bool>& a) {
+//             cerr << "+#" << endl;
+//             for (map<unsigned int,bool>::iterator it = a.begin(); it != 
a.end(); it++) {
+//                     unsigned int protein = it->first;
+//                     cerr << protein << endl;
+//             }
+//             cerr << "#-" << endl;
+               
+               // For each protein in a
+               for (map<unsigned int,bool>::iterator it = a.begin(); it != 
a.end(); it++) {
+                       unsigned int protein = it->first;
+                       // For each target
+                       vector<unsigned int> cleaned_edges;
+                       bool swap = false;
+                       for (vector<unsigned int>::iterator ita = 
graph[protein].edges.begin(); ita != graph[protein].edges.end(); ita++) {
+                               // If it is not present the own group, set flag
+                               if (a.find(*ita) == a.end()) {
+                                       graph_clean << graph[protein].full_name 
<< "\t" << graph[*ita ].full_name << endl; // Improved graph cleaning
+                                       swap = true;
+                               }
+                               // Otherwise, add it to the new edge list
+                               else {
+                                       cleaned_edges.push_back(*ita);
+                               }
+                       }
+                       // If changes were made, swap edge list with new one    
        
+                       if (swap) {
+                               cleaned_edges.swap(graph[protein].edges);
+                       }
+               }
+}
+
+
+// Remove edges connectiong two groups a and b
+void removeExternalEdges_old(map<unsigned int,bool>& a) {
                // For each protein in a
                for (map<unsigned int,bool>::iterator it = a.begin(); it != 
a.end(); it++) {
                        unsigned int protein = it->first;
@@ -388,7 +438,7 @@ void removeExternalEdges(map<unsigned int,bool>& a) {
 }
 
 // Split connected component according to eigenvector
-void splitGroups(vector<float>& y, vector<unsigned int>& nodes){
+void splitGroups(vector<float>& y, vector<unsigned int>& nodes, map<unsigned 
int,unsigned int> mapping){
        // Remove tree like structures in the beginning
        vector<unsigned int> one = get_deg_one(nodes);
        if (one.size() > 0) {
@@ -408,6 +458,8 @@ void splitGroups(vector<float>& y, vector<unsigned int>& 
nodes){
                else                    {groupB[nodes[i]] = true;} // cerr << 
graph[nodes[i]].full_name << " {color:#b01700}" << endl; }
        }
 
+//     cerr << groupA.size() << " -- " << groupB.size() << endl;
+
        // Catch error in laplacien calcs
        if (groupA.size() == 0 || groupB.size() == 0) {
                throw "Failed to partition subgraph! This might lead to an 
infinit loop. Please submit the .blastgraph file to 
[email protected] to help fixing this issue.";
@@ -418,6 +470,18 @@ void splitGroups(vector<float>& y, vector<unsigned int>& 
nodes){
 }
 
 float getConnectivity(vector<unsigned int>& nodes) {
+       // Special cases hotfix
+       if (nodes.size() == 2) {return 1;}
+
+       if (nodes.size() == 3) {
+               vector<unsigned int> min = get_deg_one(nodes);
+               // fully connected
+               if (min.size() == 0) {return 1;}
+               // not
+               else {return 0.667;}
+       }
+       // Hotfix end
+
        // Get max degree of nodes
        unsigned int max_degree = max_deg(nodes);
        
@@ -445,17 +509,17 @@ float getConnectivity(vector<unsigned int>& nodes) {
                x_hat = makeOrthogonal(y);
                // Get lenght (lambda) & normalize vector
                norm = nomalize(x_hat, &current_length);
-//             cerr << "IT: " << current_length << endl;
-               if (abs(current_length-last_length) < 0.000333 && iter >= 10) 
break;    // min 10 iterations (prevent convergence by chance)
+               if (abs(current_length-last_length) < 0.0001 && iter >= 20) 
break;      // min 20 iterations (prevent convergence by chance)
        }
-
        //      cerr << nodes.size() << " nodes done after " << iter << " 
iterations" << endl;
 
        float connectivity = (-current_length+2*max_degree)/(nodes.size());
 
+//     cerr << nodes.size() << " " << connectivity << endl;
+
        // Split groups if connectivity is too low, remove tree like structures 
that might have arosen
        if (connectivity < param_con_threshold) {
-               splitGroups(x_hat, nodes);
+               splitGroups(x_hat, nodes, mapping);
        }
        
        return connectivity;

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/debian-med/proteinortho.git

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

Reply via email to