[pktools] 27/375: added feature selection

Bas Couwenberg Wed, 03 Dec 2014 13:55:49 -0800

This is an automated email from the git hooks/post-receive script.

sebastic-guest pushed a commit to branch upstream-master
in repository pktools.


commit 094c17f13074e221f93e7c4f4e560952c79e7f82
Author: Pieter Kempeneers <kempe...@gmail.com>
Date:   Wed Jan 2 18:18:19 2013 +0100

    added feature selection
---
 ChangeLog                        |   3 +
 src/algorithms/FeatureSelector.h | 777 +++++++++++++++++++++++++++++++++++++++
 src/algorithms/Makefile.am       |   4 +-
 src/algorithms/Makefile.in       |  14 +-
 src/apps/Makefile.am             |   3 +-
 src/apps/Makefile.in             |  27 +-
 src/apps/pkclassify_svm.cc       |   5 +-
 src/apps/pkfs_svm.cc             | 549 +++++++++++++++++++++++++++
 8 files changed, 1362 insertions(+), 20 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3cfd117..6364779 100755
--- a/ChangeLog
+++ b/ChangeLog
@@ -61,6 +61,9 @@ version 2.4
        options min and max are now set with -min (--min) and -max (--max)
  - pkclassify_svm
        do not output input file if no input data was defined in verbose mode
+       update of header information
+ - pkfs_svm
+       feature selection tool for svm classification (added)
  - pkascii2ogr
        tool to create simple vector files from coordinates in ASCII file 
(points or polygon)
  - pksensormodel
diff --git a/src/algorithms/FeatureSelector.h b/src/algorithms/FeatureSelector.h
new file mode 100644
index 0000000..503dfac
--- /dev/null
+++ b/src/algorithms/FeatureSelector.h
@@ -0,0 +1,777 @@
+/**********************************************************************
+FeatureSelector.h: select features, typical use: feature selection for 
classification
+Copyright (C) 2008-2012 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#ifndef _FEATURESELECTOR_H_
+#define _FEATURESELECTOR_H_
+
+#include <math.h>
+#include <vector>
+#include <list>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include "PosValue.h"
+#include "Vector2d.h"
+#include "gsl/gsl_combination.h"
+
+using namespace std;
+
+class FeatureSelector
+{
+ public:
+  FeatureSelector(){};
+  ~FeatureSelector(){};
+  template<class T> double forwardUnivariate(Vector2d<T>& v1, Vector2d<T>& v2, 
double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset, int maxFeatures, double prior1=0.5, double 
prior2=0.5, bool verbose=false);
+  template<class T> double forwardUnivariate(vector< Vector2d<T> >& v, double 
(*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, 
bool verbose=false);
+  template<class T> double forwardUnivariate(map<string, Vector2d<T> > &v, 
map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool 
verbose=false);
+    template<class T> double forward(Vector2d<T>& v1, Vector2d<T>& v2, double 
(*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset, int maxFeatures, double prior1=0.5, double 
prior2=0.5, bool verbose=false);
+    template<class T> double forward(vector< Vector2d<T> >& v, double 
(*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, 
bool verbose=false);
+  template<class T> double forward(map<string, Vector2d<T> > &v, map<string, 
vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, 
vector<T> >&), list<int>& subset, int maxFeatures, bool verbose=false);
+    template<class T> double backward(vector< Vector2d<T> >& v, double 
(*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int minFeatures, 
bool verbose=false);
+    template<class T> double backward(Vector2d<T>& v1, Vector2d<T>& v2, double 
(*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset, int minFeatures, double prior1=0.5, double 
prior2=0.5,bool verbose=false);
+    template<class T> double backward(map<string, Vector2d<T> > &v, 
map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, int minFeatures, bool 
verbose=false);
+    template<class T> double floating(vector< Vector2d<T> >& v, double 
(*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures=0, 
bool verbose=false);
+    template<class T> double floating(Vector2d<T>& v1, Vector2d<T>& v2, double 
(*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset, int maxFeatures=0, double prior1=0.5, double 
prior2=0.5,bool verbose=false);
+    template<class T> double floating(map<string, Vector2d<T> > &v, 
map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool 
verbose=false);
+  template<class T> double bruteForce(Vector2d<T>& v1, Vector2d<T>& v2, double 
(*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset, int maxFeatures=0, double prior1=0.5, double 
prior2=0.5, bool verbose=false);
+  
+  private:
+    template<class T> double addFeature(vector< Vector2d<T> >& v, double 
(*getCost)(const vector< Vector2d<T> >&), list<int>& subset, bool 
verbose=false);
+    template<class T> double addFeature(Vector2d<T>& v1, Vector2d<T>& v2, 
double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset, double prior1=0.5, double prior2=0.5, bool 
verbose=false);
+    template<class T> double addFeature(map<string, Vector2d<T> > &v, 
map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, bool verbose=false);
+  template<class T> double removeFeature(vector< Vector2d<T> >& v, double 
(*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int& r, bool 
verbose=false);  
+    template<class T> double removeFeature(Vector2d<T>& v1, Vector2d<T>& v2, 
double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double 
prior2), list<int>& subset,int& r, double prior1=0.5, double prior2=0.5, bool 
verbose=false);
+    template<class T> double removeFeature(map<string, Vector2d<T> > &v, 
map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset,int& r, bool verbose=false);
+};
+
+//sequential forward selection Univariate (N single best features)
+template<class T> double FeatureSelector::forwardUnivariate(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, int maxFeatures, double 
prior1, double prior2, bool verbose){
+  int maxLevels=v1[0].size();
+  if(!maxFeatures)
+    maxFeatures=maxLevels;
+  int k=subset.size();
+  if(k>=maxFeatures)
+    return -1;
+  vector<PosValue> cost(maxLevels);
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  Vector2d<T> tmp1(v1.size());
+  Vector2d<T> tmp2(v2.size());
+  for(int ilevel=0;ilevel<maxLevels;++ilevel){
+    if(find(tmpset.begin(),tmpset.end(),ilevel)==tmpset.end()){
+      tmpset.push_back(ilevel);
+      v1.selectCols(tmpset,tmp1);
+      v2.selectCols(tmpset,tmp2);
+      try{
+       PosValue pv;
+       pv.position=ilevel;
+       pv.value=getCost(tmp1,tmp2,prior1,prior2);
+       cost[ilevel]=pv;
+      }
+      catch(...){
+       PosValue pv;
+       pv.position=ilevel;
+       pv.value=-1;
+       cost[ilevel]=pv;
+      }
+      tmpset.pop_back();
+    }
+  }
+  sort(cost.begin(),cost.end(),Compare_PosValue());//decreasing order
+  int ilevel=0;
+  while((subset.size()<maxFeatures)&&(ilevel<maxLevels)){
+    if(cost[ilevel].value>0)
+      subset.push_back(cost[ilevel].position);
+    if(verbose)
+      cout << "feature " << subset.back() << " has cost: " << 
cost[ilevel].value << endl;
+    ++ilevel;
+  }
+  double maxCost=-1;
+  while(subset.size()){
+    v1.selectCols(subset,tmp1);
+    v2.selectCols(subset,tmp2);
+    try{
+      maxCost=getCost(tmp1,tmp2,prior1,prior2);
+    }
+    catch(...){
+      subset.pop_back();
+      continue;
+    }
+    return maxCost;
+  }
+}
+
+template<class T> double FeatureSelector::forwardUnivariate(vector< 
Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& 
subset, int maxFeatures=0, bool verbose){
+  int maxLevels=v[0][0].size();
+  if(!maxFeatures)
+    maxFeatures=maxLevels;
+  int k=subset.size();
+  if(k>=maxFeatures)
+    return -1;
+  vector<PosValue> cost(maxLevels);
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  vector< Vector2d<T> > tmp(v.size());
+  for(int ilevel=0;ilevel<maxLevels;++ilevel){
+    if(find(tmpset.begin(),tmpset.end(),ilevel)==tmpset.end()){
+      tmpset.push_back(ilevel);
+      for(int iclass=0;iclass<v.size();++iclass){
+//     tmp[iclass].resize(v[iclass].size());
+       v[iclass].selectCols(tmpset,tmp[iclass]);
+      }
+      try{
+       PosValue pv;
+       pv.position=ilevel;
+       pv.value=getCost(tmp);
+       cost[ilevel]=pv;
+      }
+      catch(...){
+       PosValue pv;
+       pv.position=ilevel;
+       pv.value=-1;
+       cost[ilevel]=pv;
+      }
+      tmpset.pop_back();
+    }
+  }
+  sort(cost.begin(),cost.end(),Compare_PosValue());//decreasing order
+  int ilevel=0;
+  while((subset.size()<maxFeatures)&&(ilevel<maxLevels)){
+    if(cost[ilevel].value>0)
+      subset.push_back(cost[ilevel].position);
+    if(verbose)
+      cout << "feature " << subset.back() << " has cost: " << 
cost[ilevel].value << endl;
+    ++ilevel;
+  }
+  double maxCost=-1;
+  while(subset.size()){
+    for(int iclass=0;iclass<v.size();++iclass){
+//       tmp[iclass].resize(v[iclass].size());
+      v[iclass].selectCols(subset,tmp[iclass]);
+    }
+    try{
+      maxCost=getCost(tmp);
+    }
+    catch(...){
+      subset.pop_back();
+      continue;
+    }
+    return maxCost;
+  }
+}
+
+//sequential forward selection Multivariate (Combination of N best features)
+template<class T> double FeatureSelector::forward(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, int maxFeatures, double 
prior1, double prior2, bool verbose){
+  //Select feature with the best value (get maximal cost for 1 feature)
+  double maxCost=0;
+  int maxLevels=v1[0].size();
+  if(!maxFeatures)
+    maxFeatures=maxLevels;
+  while(subset.size()<maxFeatures){
+    maxCost=addFeature(v1,v2,*getCost,subset,verbose);
+    if(verbose)
+      cout << "added " << subset.back() << ", " << subset.size() << "/" << 
maxFeatures << " features selected with cost: " << maxCost << endl;
+  }//while
+  return maxCost;
+}
+
+
+//sequential forward selection Multivariate (Combination of N best features)
+template<class T> double FeatureSelector::forward(vector< Vector2d<T> >& v, 
double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int 
maxFeatures=0, bool verbose){
+  //Select feature with the best value (get maximal cost for 1 feature)
+  double maxCost=0;
+  int maxLevels=v[0][0].size();
+  if(!maxFeatures)
+    maxFeatures=maxLevels;
+  while(subset.size()<maxFeatures){
+    maxCost=addFeature(v,*getCost,subset,verbose);
+    if(verbose)
+      cout << "added " << subset.back() << ", " << subset.size() << "/" << 
maxFeatures << " features selected with cost: " << maxCost << endl;
+  }//while
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::forward(map<string, Vector2d<T> > 
&v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose)
+{
+  //Select feature with the best value (get maximal cost for 1 feature)
+  double maxCost=0;
+  int maxLevels=(v.begin()->second)[0].size();
+  if(!maxFeatures)
+    maxFeatures=maxLevels;
+  while(subset.size()<maxFeatures){
+    maxCost=addFeature(v,y,*getCost,subset,verbose);
+    if(verbose)
+      cout << "added " << subset.back() << ", " << subset.size() << "/" << 
maxFeatures << " features selected with cost: " << maxCost << endl;
+  }//while
+  return maxCost;
+}
+
+//sequential backward selection
+template<class T> double FeatureSelector::backward(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, int minFeatures, double 
prior1, double prior2, bool verbose){
+  //Select features with least effect on cost when removed (obtain minFeatures 
eventually)
+  double maxCost=0;
+  int removedFeature;
+  if(subset.empty()){
+    for(int iFeature=0;iFeature<v1[0].size();++iFeature)
+      subset.push_back(iFeature);
+  }//if
+  if(subset.size()==minFeatures)
+    maxCost=getCost(v1,v2,prior1,prior2);
+  while(subset.size()>minFeatures){
+    maxCost=removeFeature(v1,v2,*getCost,subset,removedFeature,verbose);
+    if(verbose)
+      cout << "removed " << removedFeature << ", " << subset.size() << "/" << 
minFeatures << " features remain with cost: " << maxCost << endl;
+  }//while
+  return maxCost;
+}
+
+//sequential backward selection
+template<class T> double FeatureSelector::backward(vector< Vector2d<T> >& v, 
double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int 
minFeatures, bool verbose){
+  //Select features with least effect on cost when removed (obtain minFeatures 
eventually)
+  double maxCost=0;
+  int removedFeature;
+  if(subset.empty()){
+    for(int iFeature=0;iFeature<v[0][0].size();++iFeature)
+      subset.push_back(iFeature);
+  }//if
+  if(subset.size()==minFeatures)
+    maxCost=getCost(v);
+  while(subset.size()>minFeatures){
+    maxCost=removeFeature(v,*getCost,subset,removedFeature,verbose);
+    if(verbose)
+      cout << "removed " << removedFeature << ", " << subset.size() << "/" << 
minFeatures << " features remain with cost: " << maxCost << endl;
+  }//while
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::backward(map<string, Vector2d<T> > 
&v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, int minFeatures, bool verbose){
+  //Select features with least effect on cost when removed (obtain minFeatures 
eventually)
+  double maxCost=0;
+  int removedFeature;
+  if(subset.empty()){
+    for(int iFeature=0;iFeature<(v.begin()->second)[0].size();++iFeature)
+      subset.push_back(iFeature);
+  }//if
+  if(subset.size()==minFeatures)
+    maxCost=getCost(v,y);
+  while(subset.size()>minFeatures){
+    maxCost=removeFeature(v,y,*getCost,subset,removedFeature,verbose);
+    if(verbose)
+      cout << "removed " << removedFeature << ", " << subset.size() << "/" << 
minFeatures << " features remain with cost: " << maxCost << endl;
+  }//while
+  return maxCost;
+}
+
+//floating search
+template<class T> double FeatureSelector::floating(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, int maxFeatures, double 
prior1, double prior2, bool verbose){
+  vector<T> cost;
+  int maxLevels=v1[0].size();
+  if(maxFeatures<1)
+    maxFeatures=maxLevels;
+//   int k=0;
+  int initialK=subset.size();
+  int k=initialK;
+  int stopK=initialK+1;
+  if(initialK>=maxFeatures)
+    return -1;
+  cost.push_back(-1);//init 0 features as cost -1
+  cost.push_back(addFeature(v1,v2,*getCost,subset,verbose));
+  ++k;
+  assert(subset.size()==k);
+  if(verbose)
+    cout << "added " << subset.back() << ", " << subset.size() << "/" << 
maxFeatures << " features selected with cost: " << cost.back() << endl;
+  while(k<maxFeatures){
+    cost.push_back(addFeature(v1,v2,*getCost,subset,verbose));
+    ++k;
+    assert(subset.size()==k);
+    if(verbose)
+    cout << "added " << subset.back() << ", " << subset.size() << "/" << 
maxFeatures << " features selected with cost: " << cost.back() << endl;
+//     while(k>1){
+    while(k>stopK){
+      int x_r;
+      double cost_r=removeFeature(v1,v2,*getCost,subset,x_r,verbose);
+      if(cost_r>cost[k-1]){
+       --k;
+       assert(subset.size()==k);
+       cost[k]=cost_r;
+       cost.pop_back();
+       if(verbose)
+         cout << "removed " << x_r << ", " << subset.size() << "/" << 
maxFeatures << " features remain with cost: " << cost_r << endl;
+       continue;
+      }
+      else if(cost_r>=0){
+       subset.push_back(x_r);
+       break;
+      }
+      else if(verbose)
+       cout << "could not remove any feature" << endl;
+      cost.pop_back();
+    }
+  }
+  return cost.back();
+}
+
+//floating search
+template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, 
double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int 
maxFeatures, bool verbose){
+  vector<T> cost;
+  int maxLevels=v[0][0].size();
+  if(maxFeatures<1)
+    maxFeatures=maxLevels;
+  int k=subset.size();
+  if(k>=maxFeatures)
+    return -1;
+  cost.push_back(-1);//init 0 features as cost -1
+  cost.push_back(addFeature(v,*getCost,subset,verbose));
+  ++k;
+  if(verbose)
+    cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << 
maxFeatures << " features selected with cost: " << cost.back() << endl;
+  while(k<maxFeatures){
+    cost.push_back(addFeature(v,*getCost,subset,verbose));
+    ++k;
+    if(verbose)
+    cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << 
maxFeatures << " features selected with cost: " << cost.back() << endl;
+    while(k>1){
+      int x_r;
+      double cost_r=removeFeature(v,*getCost,subset,x_r,verbose);
+      if(cost_r>cost[k-1]){
+       --k;
+       cost[k]=cost_r;
+       cost.pop_back();
+       if(verbose)
+         cout << "removed " << x_r << ", " << cost.size()-1 << "/" << 
maxFeatures << " features remain with cost: " << cost_r << endl;
+       continue;
+      }
+      else if(cost_r>=0){
+       subset.push_back(x_r);
+       break;
+      }
+      else if(verbose)
+       cout << "could not remove any feature" << endl;
+      cost.pop_back();
+    }
+  }
+  return cost.back();
+}
+
+//floating search
+template<class T> double FeatureSelector::floating(map<string, Vector2d<T> > 
&v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose){
+  vector<T> cost;
+  int maxLevels=(v.begin()->second)[0].size();
+  if(maxFeatures<1)
+    maxFeatures=maxLevels;
+  int k=subset.size();
+  if(k>=maxFeatures)
+    return -1;
+  cost.push_back(-1);//init 0 features as cost -1
+  cost.push_back(addFeature(v,y,*getCost,subset,verbose));
+  ++k;
+  if(verbose)
+    cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << 
maxFeatures << " features selected with cost: " << cost.back() << endl;
+  while(k<maxFeatures){
+    cost.push_back(addFeature(v,y,*getCost,subset,verbose));
+    ++k;
+    if(verbose)
+    cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << 
maxFeatures << " features selected with cost: " << cost.back() << endl;
+    while(k>1){
+      int x_r;
+      double cost_r=removeFeature(v,y,*getCost,subset,x_r,verbose);
+      if(cost_r>cost[k-1]){
+       --k;
+       cost[k]=cost_r;
+       cost.pop_back();
+       if(verbose)
+         cout << "removed " << x_r << ", " << cost.size()-1 << "/" << 
maxFeatures << " features remain with cost: " << cost_r << endl;
+       continue;
+      }
+      else if(cost_r>=0){
+       subset.push_back(x_r);
+       break;
+      }
+      else if(verbose)
+       cout << "could not remove any feature" << endl;
+      cost.pop_back();
+    }
+  }
+  return cost.back();
+}
+
+//brute force search (search for all possible combinations and select best)
+template<class T> double FeatureSelector::bruteForce(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, int maxFeatures, double 
prior1, double prior2, bool verbose){
+  int maxLevels=v1[0].size();
+  if(maxFeatures<1)
+    maxFeatures=maxLevels;
+  int k=subset.size();
+  if(k>=maxFeatures)
+    return -1;
+//   gslmm::combination c(v1[0].size(),maxFeatures,false);
+  gsl_combination *c;
+  c=gsl_combination_calloc(v1[0].size(),maxFeatures);
+  
+  list<int> tmpset;//temporary set of selected features (levels)
+  Vector2d<T> tmp1(v1.size());
+  Vector2d<T> tmp2(v2.size());
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  double maxCost=-1;
+  double cost;
+  if(subset.size()>=maxLevels)
+    return maxCost;
+//   c.first();
+  gsl_combination_next(c);
+  do{
+    for(int ifeature=0;ifeature<maxFeatures;++ifeature)
+      tmpset.push_back(c->data[ifeature]);
+    v1.selectCols(tmpset,tmp1);
+    v2.selectCols(tmpset,tmp2);
+    try{
+      cost=getCost(tmp1,tmp2,prior1,prior2);
+    }
+    catch(...){ //singular matrix encountered
+      catchset=tmpset;//this tmpset resulted in failure of getCost
+      if(verbose){
+       cout << "Could not get cost from set: " << endl;
+       gsl_combination_fprintf(stdout,c," %u");
+       printf("\n");
+//     c.print(stdout);
+      }      
+      tmpset.clear();
+      continue;
+    }
+    if(maxCost<cost){ //set with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+    }
+    tmpset.clear();
+  }while(gsl_combination_next(c)==GSL_SUCCESS);
+  gsl_combination_free(c);
+//   }while(c.next());  
+  if(maxCost<0) //no level added to better maxCost than current subset 
(catchset)
+    subset=catchset;
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::addFeature(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, double prior1, double prior2, 
bool verbose){
+  //select feature with the best value (get maximal cost for 1 feature)
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  double maxCost=-1;
+//   if(subset.size()){
+//     try{
+//       v1.selectCols(subset,tmp1);
+//       v2.selectCols(subset,tmp2);
+//       maxCost=getCost(tmp1,tmp2);
+//     }
+//     catch(double determinant){
+//       return -1;
+//     }
+//   }
+  double cost;
+  int maxLevels=v1[0].size();
+  if(subset.size()>=maxLevels)
+    return maxCost;
+  for(int ilevel=0;ilevel<maxLevels;++ilevel){
+    Vector2d<T> tmp1(v1.size());
+    Vector2d<T> tmp2(v2.size());
+    if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
+      continue;
+    tmpset.push_back(ilevel);
+    v1.selectCols(tmpset,tmp1);
+    v2.selectCols(tmpset,tmp2);
+    try{
+      cost=getCost(tmp1,tmp2,prior1,prior2);
+    }
+    catch(...){
+      catchset=tmpset;//this tmpset resulted in singular matrix
+      if(verbose)
+       cout << "Could not add feature " << tmpset.back() << endl;
+      tmpset.pop_back();
+      continue;
+    }
+    if(maxCost<cost){ //level with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+    }
+    tmpset.pop_back();
+  }
+  if(maxCost<0) //no level added to better maxCost than current subset 
(catchset)
+    subset=catchset;
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::addFeature(vector< Vector2d<T> >& v, 
double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, bool 
verbose){
+  //select feature with the best value (get maximal cost for 1 feature)
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  vector< Vector2d<T> > tmp(v.size());
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  double maxCost=-1;
+  double cost;
+  int maxLevels=v[0][0].size();
+  if(subset.size()>=maxLevels)
+    return maxCost;
+  for(int ilevel=0;ilevel<maxLevels;++ilevel){
+    if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
+      continue;
+    tmpset.push_back(ilevel);
+    for(int iclass=0;iclass<v.size();++iclass){
+//       tmp[iclass].resize(v[iclass].size());
+      v[iclass].selectCols(tmpset,tmp[iclass]);
+    }
+    try{
+      cost=getCost(tmp);
+    }
+    catch(...){
+      catchset=tmpset;//this tmpset resulted in singular matrix
+      if(verbose)
+       cout << "Could not add feature " << tmpset.back() << endl;
+      tmpset.pop_back();
+      continue;
+    }
+    if(maxCost<cost){ //level with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+    }
+    tmpset.pop_back();
+  }
+  if(maxCost<0) //no level added to better maxCost than current subset 
(catchset)
+    subset=catchset;
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::addFeature(map<string, Vector2d<T> > 
&v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, 
map<string, vector<T> >&), list<int>& subset, bool verbose){
+  //select feature with the best value (get maximal cost for 1 feature)
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  map<string, Vector2d<double> > tmp;
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  map<string, Vector2d<double> >::const_iterator mit;
+  double maxCost=-1;
+  double cost;
+  int maxLevels=(v.begin()->second)[0].size();
+  if(subset.size()>=maxLevels)
+    return maxCost;
+  for(int ilevel=0;ilevel<maxLevels;++ilevel){
+    if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
+      continue;
+    tmpset.push_back(ilevel);
+    for(mit=v.begin();mit!=v.end();++mit){
+      string theName=mit->first;
+      v[theName].selectCols(tmpset,tmp[theName]);
+    }
+    try{
+      cost=getCost(tmp,y);
+    }
+    catch(...){
+      catchset=tmpset;//this tmpset resulted in singular matrix
+      if(verbose)
+       cout << "Could not add feature " << tmpset.back() << endl;
+      tmpset.pop_back();
+      continue;
+    }
+    if(maxCost<cost){ //level with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+    }
+    tmpset.pop_back();
+  }
+  if(maxCost<0) //no level added to better maxCost than current subset 
(catchset)
+    subset=catchset;
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::removeFeature(Vector2d<T>& v1, 
Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, 
double prior1, double prior2), list<int>& subset, int& r, double prior1, double 
prior2, bool verbose){
+  //find the feature that has the least effect on the cost when it is removed 
from subset
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  Vector2d<T> tmp1(v1.size());
+  Vector2d<T> tmp2(v2.size());
+  int nFeatures=subset.size();
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  double maxCost=-1;
+//   if(subset.size()){
+//     try{
+//       v1.selectCols(subset,tmp1);
+//       v2.selectCols(subset,tmp2);
+//       maxCost=getCost(tmp1,tmp2);
+//     }
+//     catch(double determinant){
+//       return -1;
+//     }
+//   }
+  int last;
+  double cost;
+  int maxLevels=v1[0].size();
+  if(subset.size()>maxLevels||subset.empty()){
+    return maxCost;
+  }
+  list<int>::const_iterator it;
+  for(int i=0;i<nFeatures;++i){
+    last=tmpset.back();
+    tmpset.pop_back();
+    v1.selectCols(tmpset,tmp1);
+    v2.selectCols(tmpset,tmp2);
+    try{
+      cost=getCost(tmp1,tmp2,prior1,prior2);
+    }
+    catch(...){
+      catchset=tmpset;//this tmpset resulted in singular matrix
+      if(verbose)
+       cout << "Could not remove feature " << last << endl;
+      tmpset.push_front(last);
+      continue;
+    }
+    if(maxCost<cost){ //level with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+      r=last;
+    }
+    tmpset.push_front(last);
+  }
+  if(maxCost<0){//all levels removed were catched
+    subset=catchset;
+  }
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::removeFeature(vector< Vector2d<T> >& 
v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int& r, 
bool verbose){
+  //find the feature that has the least effect on the cost when it is removed 
from subset
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  vector< Vector2d<T> > tmp(v.size());
+  int nFeatures=subset.size();
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  double maxCost=-1;
+  int last;
+  double cost;
+  int maxLevels=v[0][0].size();
+  if(subset.size()>maxLevels||subset.empty()){
+    return maxCost;
+  }
+  list<int>::const_iterator it;
+  for(int i=0;i<nFeatures;++i){
+    last=tmpset.back();
+    tmpset.pop_back();
+    for(int iclass=0;iclass<v.size();++iclass){
+//       tmp[iclass].resize(v[iclass].size());
+      v[iclass].selectCols(tmpset,tmp[iclass]);
+    }
+    try{
+      cost=getCost(tmp);
+    }
+    catch(...){
+      catchset=tmpset;//this tmpset resulted in singular matrix
+      if(verbose)
+       cout << "Could not remove feature " << last << endl;
+      tmpset.push_front(last);
+      continue;
+    }
+    if(maxCost<cost){ //level with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+      r=last;
+    }
+    tmpset.push_front(last);
+  }
+  if(maxCost<0){//all levels removed were catched
+    subset=catchset;
+  }
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+
+template<class T> double FeatureSelector::removeFeature(map<string, 
Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, 
Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int& r, bool 
verbose){
+  //find the feature that has the least effect on the cost when it is removed 
from subset
+  list<int> tmpset=subset;//temporary set of selected features (levels)
+  map<string, Vector2d<double> > tmp;
+  map<string, Vector2d<double> >::const_iterator mit;
+  int nFeatures=subset.size();
+  list<int> catchset;//restore set in case of catch all the way to last level 
(no better cost)
+  //initialize maxCost with actual cost for current subset (-1 if empty 
subset) 
+  double maxCost=-1;
+  int last;
+  double cost;
+  int maxLevels=(v.begin()->second)[0].size();
+  if(subset.size()>maxLevels||subset.empty()){
+    return maxCost;
+  }
+  list<int>::const_iterator it;
+  for(int i=0;i<nFeatures;++i){
+    last=tmpset.back();
+    tmpset.pop_back();
+    for(mit=v.begin();mit!=v.end();++mit){
+      string theName=mit->first;
+      v[theName].selectCols(tmpset,tmp[theName]);
+    }
+    try{
+      cost=getCost(tmp,y);
+    }
+    catch(...){ //singular matrix encountered
+      catchset=tmpset;//this tmpset resulted in singular matrix
+      if(verbose)
+       cout << "Could not remove feature " << last << endl;
+      tmpset.push_front(last);
+      continue;
+    }
+    if(maxCost<cost){ //level with better cost is found
+      maxCost=cost;
+      subset=tmpset;
+      r=last;
+    }
+    tmpset.push_front(last);
+  }
+  if(maxCost<0){//all levels removed were catched
+    subset=catchset;
+  }
+  //test: assert list contains no duplicate elements
+  for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+    list<int>::iterator lit2=lit;//start searching from next element
+    assert(find(++lit2,subset.end(),*lit)==subset.end());
+  }
+  return maxCost;
+}
+#endif /* _FEATURESELECTOR_H_ */
diff --git a/src/algorithms/Makefile.am b/src/algorithms/Makefile.am
index ba7c6e3..9f0bafa 100644
--- a/src/algorithms/Makefile.am
+++ b/src/algorithms/Makefile.am
@@ -12,7 +12,7 @@ noinst_LIBRARIES = libalgorithms.a
 libalgorithms_adir = $(includedir)/algorithms
 
 # the list of header files that belong to the library (to be installed later)
-libalgorithms_a_HEADERS = Egcs.h  Filter2d.h  Filter.h  Histogram.h 
ConfusionMatrix.h svm.h
+libalgorithms_a_HEADERS = Egcs.h Filter2d.h Filter.h Histogram.h 
ConfusionMatrix.h svm.h FeatureSelector.h
 
 if USE_FANN
 libalgorithms_a_HEADERS += myfann_cpp.h
@@ -23,5 +23,5 @@ libalgorithms_a_HEADERS += SensorModel.h
 endif
 
 # the sources to add to the library and to add to the source distribution
-libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc  Filter2d.cc  
Filter.cc  Histogram.cc ConfusionMatrix.cc svm.cpp
+libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc Filter2d.cc 
Filter.cc Histogram.cc ConfusionMatrix.cc svm.cpp
 ###############################################################################
diff --git a/src/algorithms/Makefile.in b/src/algorithms/Makefile.in
index af8c667..a0c6645 100644
--- a/src/algorithms/Makefile.in
+++ b/src/algorithms/Makefile.in
@@ -53,9 +53,9 @@ ARFLAGS = cru
 libalgorithms_a_AR = $(AR) $(ARFLAGS)
 libalgorithms_a_LIBADD =
 am__libalgorithms_a_SOURCES_DIST = Egcs.h Filter2d.h Filter.h \
-       Histogram.h ConfusionMatrix.h svm.h myfann_cpp.h SensorModel.h \
-       Egcs.cc Filter2d.cc Filter.cc Histogram.cc ConfusionMatrix.cc \
-       svm.cpp
+       Histogram.h ConfusionMatrix.h svm.h FeatureSelector.h \
+       myfann_cpp.h SensorModel.h Egcs.cc Filter2d.cc Filter.cc \
+       Histogram.cc ConfusionMatrix.cc svm.cpp
 am__objects_1 =
 am__objects_2 = $(am__objects_1) $(am__objects_1)
 am_libalgorithms_a_OBJECTS = $(am__objects_2) Egcs.$(OBJEXT) \
@@ -78,7 +78,8 @@ LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) 
$(LDFLAGS) -o $@
 SOURCES = $(libalgorithms_a_SOURCES)
 DIST_SOURCES = $(am__libalgorithms_a_SOURCES_DIST)
 am__libalgorithms_a_HEADERS_DIST = Egcs.h Filter2d.h Filter.h \
-       Histogram.h ConfusionMatrix.h svm.h myfann_cpp.h SensorModel.h
+       Histogram.h ConfusionMatrix.h svm.h FeatureSelector.h \
+       myfann_cpp.h SensorModel.h
 am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
 am__vpath_adj = case $$p in \
     $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@@ -228,10 +229,11 @@ libalgorithms_adir = $(includedir)/algorithms
 
 # the list of header files that belong to the library (to be installed later)
 libalgorithms_a_HEADERS = Egcs.h Filter2d.h Filter.h Histogram.h \
-       ConfusionMatrix.h svm.h $(am__append_1) $(am__append_2)
+       ConfusionMatrix.h svm.h FeatureSelector.h $(am__append_1) \
+       $(am__append_2)
 
 # the sources to add to the library and to add to the source distribution
-libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc  Filter2d.cc  
Filter.cc  Histogram.cc ConfusionMatrix.cc svm.cpp
+libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc Filter2d.cc 
Filter.cc Histogram.cc ConfusionMatrix.cc svm.cpp
 all: all-am
 
 .SUFFIXES:
diff --git a/src/apps/Makefile.am b/src/apps/Makefile.am
index eb3120e..14d8f95 100644
--- a/src/apps/Makefile.am
+++ b/src/apps/Makefile.am
@@ -6,7 +6,7 @@ LDADD = $(GDAL_LDFLAGS) 
$(top_builddir)/src/algorithms/libalgorithms.a $(top_bui
 ###############################################################################
 
 # the program to build (the names of the final binaries)
-bin_PROGRAMS = pkinfo pkcrop pkreclass pkgetmask pksetmask pkcreatect 
pkdumpimg pkdumpogr pksieve pkstat pkstatogr pkegcs pkextract pkfillnodata 
pkfilter pkdsm2shadow pkmosaic pkndvi pkpolygonize pkascii2img pkdiff 
pkclassify_svm pkascii2ogr #pkxcorimg pkgeom
+bin_PROGRAMS = pkinfo pkcrop pkreclass pkgetmask pksetmask pkcreatect 
pkdumpimg pkdumpogr pksieve pkstat pkstatogr pkegcs pkextract pkfillnodata 
pkfilter pkdsm2shadow pkmosaic pkndvi pkpolygonize pkascii2img pkdiff 
pkclassify_svm pkfs_svm pkascii2ogr #pkxcorimg pkgeom
 if USE_FANN
 bin_PROGRAMS += pkclassify_nn
 pkclassify_nn_SOURCES = $(top_srcdir)/src/algorithms/myfann_cpp.h 
pkclassify_nn.h pkclassify_nn.cc
@@ -49,6 +49,7 @@ pkndvi_SOURCES = pkndvi.cc
 pkpolygonize_SOURCES = pkpolygonize.cc
 pkdiff_SOURCES = pkdiff.cc
 pkclassify_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h 
$(top_srcdir)/src/algorithms/svm.cpp pkclassify_svm.cc
+pkfs_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h 
$(top_srcdir)/src/algorithms/svm.cpp pkfs_svm.cc
 pkascii2img_SOURCES = pkascii2img.cc
 pkascii2ogr_SOURCES = pkascii2ogr.cc
 #pkxcorimg_SOURCES= pkxcorimg.cc
diff --git a/src/apps/Makefile.in b/src/apps/Makefile.in
index 61c9158..184c40a 100644
--- a/src/apps/Makefile.in
+++ b/src/apps/Makefile.in
@@ -39,8 +39,8 @@ bin_PROGRAMS = pkinfo$(EXEEXT) pkcrop$(EXEEXT) 
pkreclass$(EXEEXT) \
        pkextract$(EXEEXT) pkfillnodata$(EXEEXT) pkfilter$(EXEEXT) \
        pkdsm2shadow$(EXEEXT) pkmosaic$(EXEEXT) pkndvi$(EXEEXT) \
        pkpolygonize$(EXEEXT) pkascii2img$(EXEEXT) pkdiff$(EXEEXT) \
-       pkclassify_svm$(EXEEXT) pkascii2ogr$(EXEEXT) $(am__EXEEXT_1) \
-       $(am__EXEEXT_2) $(am__EXEEXT_3)
+       pkclassify_svm$(EXEEXT) pkfs_svm$(EXEEXT) pkascii2ogr$(EXEEXT) \
+       $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3)
 @USE_FANN_TRUE@am__append_1 = pkclassify_nn
 @USE_LAS_TRUE@am__append_2 = pklas2img
 @USE_NLOPT_TRUE@am__append_3 = pksensormodel
@@ -149,6 +149,12 @@ pkfilter_LDADD = $(LDADD)
 pkfilter_DEPENDENCIES = $(am__DEPENDENCIES_1) \
        $(top_builddir)/src/algorithms/libalgorithms.a \
        $(top_builddir)/src/imageclasses/libimageClasses.a
+am_pkfs_svm_OBJECTS = svm.$(OBJEXT) pkfs_svm.$(OBJEXT)
+pkfs_svm_OBJECTS = $(am_pkfs_svm_OBJECTS)
+pkfs_svm_LDADD = $(LDADD)
+pkfs_svm_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+       $(top_builddir)/src/algorithms/libalgorithms.a \
+       $(top_builddir)/src/imageclasses/libimageClasses.a
 am_pkgetmask_OBJECTS = pkgetmask.$(OBJEXT)
 pkgetmask_OBJECTS = $(am_pkgetmask_OBJECTS)
 pkgetmask_LDADD = $(LDADD)
@@ -236,18 +242,18 @@ SOURCES = $(pkascii2img_SOURCES) $(pkascii2ogr_SOURCES) \
        $(pkdsm2shadow_SOURCES) $(pkdumpimg_SOURCES) \
        $(pkdumpogr_SOURCES) $(pkegcs_SOURCES) $(pkextract_SOURCES) \
        $(pkfillnodata_SOURCES) $(pkfilter_SOURCES) \
-       $(pkgetmask_SOURCES) $(pkinfo_SOURCES) $(pklas2img_SOURCES) \
-       $(pkmosaic_SOURCES) $(pkndvi_SOURCES) $(pkpolygonize_SOURCES) \
-       $(pkreclass_SOURCES) $(pksensormodel_SOURCES) \
-       $(pksetmask_SOURCES) $(pksieve_SOURCES) $(pkstat_SOURCES) \
-       $(pkstatogr_SOURCES)
+       $(pkfs_svm_SOURCES) $(pkgetmask_SOURCES) $(pkinfo_SOURCES) \
+       $(pklas2img_SOURCES) $(pkmosaic_SOURCES) $(pkndvi_SOURCES) \
+       $(pkpolygonize_SOURCES) $(pkreclass_SOURCES) \
+       $(pksensormodel_SOURCES) $(pksetmask_SOURCES) \
+       $(pksieve_SOURCES) $(pkstat_SOURCES) $(pkstatogr_SOURCES)
 DIST_SOURCES = $(pkascii2img_SOURCES) $(pkascii2ogr_SOURCES) \
        $(am__pkclassify_nn_SOURCES_DIST) $(pkclassify_svm_SOURCES) \
        $(pkcreatect_SOURCES) $(pkcrop_SOURCES) $(pkdiff_SOURCES) \
        $(pkdsm2shadow_SOURCES) $(pkdumpimg_SOURCES) \
        $(pkdumpogr_SOURCES) $(pkegcs_SOURCES) $(pkextract_SOURCES) \
        $(pkfillnodata_SOURCES) $(pkfilter_SOURCES) \
-       $(pkgetmask_SOURCES) $(pkinfo_SOURCES) \
+       $(pkfs_svm_SOURCES) $(pkgetmask_SOURCES) $(pkinfo_SOURCES) \
        $(am__pklas2img_SOURCES_DIST) $(pkmosaic_SOURCES) \
        $(pkndvi_SOURCES) $(pkpolygonize_SOURCES) $(pkreclass_SOURCES) \
        $(am__pksensormodel_SOURCES_DIST) $(pksetmask_SOURCES) \
@@ -398,6 +404,7 @@ pkndvi_SOURCES = pkndvi.cc
 pkpolygonize_SOURCES = pkpolygonize.cc
 pkdiff_SOURCES = pkdiff.cc
 pkclassify_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h 
$(top_srcdir)/src/algorithms/svm.cpp pkclassify_svm.cc
+pkfs_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h 
$(top_srcdir)/src/algorithms/svm.cpp pkfs_svm.cc
 pkascii2img_SOURCES = pkascii2img.cc
 pkascii2ogr_SOURCES = pkascii2ogr.cc
 all: all-am
@@ -513,6 +520,9 @@ pkfillnodata$(EXEEXT): $(pkfillnodata_OBJECTS) 
$(pkfillnodata_DEPENDENCIES)
 pkfilter$(EXEEXT): $(pkfilter_OBJECTS) $(pkfilter_DEPENDENCIES) 
        @rm -f pkfilter$(EXEEXT)
        $(CXXLINK) $(pkfilter_OBJECTS) $(pkfilter_LDADD) $(LIBS)
+pkfs_svm$(EXEEXT): $(pkfs_svm_OBJECTS) $(pkfs_svm_DEPENDENCIES) 
+       @rm -f pkfs_svm$(EXEEXT)
+       $(CXXLINK) $(pkfs_svm_OBJECTS) $(pkfs_svm_LDADD) $(LIBS)
 pkgetmask$(EXEEXT): $(pkgetmask_OBJECTS) $(pkgetmask_DEPENDENCIES) 
        @rm -f pkgetmask$(EXEEXT)
        $(CXXLINK) $(pkgetmask_OBJECTS) $(pkgetmask_LDADD) $(LIBS)
@@ -570,6 +580,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pkextract.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pkfillnodata.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pkfilter.Po@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pkfs_svm.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pkgetmask.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pkinfo.Po@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pklas2img.Po@am__quote@
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 43508cb..6d5e911 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -1,5 +1,5 @@
 /**********************************************************************
-pkclassify_svm.cc: classify raster image using Artificial Neural Network
+pkclassify_svm.cc: classify raster image using Support Vector Machine
 Copyright (C) 2008-2012 Pieter Kempeneers
 
 This file is part of pktools
@@ -149,7 +149,7 @@ int main(int argc, char *argv[])
     exit(0);
   }
   if(help_opt[0]){
-    std::cout << "usage: pkclassify_nn -i testimage -o outputimage -t training 
[OPTIONS]" << std::endl;
+    std::cout << "usage: pkclassify_svm -i testimage -o outputimage -t 
training [OPTIONS]" << std::endl;
     exit(0);
   }
 
@@ -174,7 +174,6 @@ int main(int argc, char *argv[])
   unsigned int totalSamples=0;
   int nreclass=0;
   vector<int> vcode;//unique class codes in recode string
-  // vector<FANN::neural_net> net(nbag);//the neural network
   vector<struct svm_model*> svm(nbag);
   vector<struct svm_parameter> param(nbag);
 
diff --git a/src/apps/pkfs_svm.cc b/src/apps/pkfs_svm.cc
new file mode 100644
index 0000000..03b56c8
--- /dev/null
+++ b/src/apps/pkfs_svm.cc
@@ -0,0 +1,549 @@
+/**********************************************************************
+pkfs_svm.cc: feature selection for svm classifier
+Copyright (C) 2008-2012 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#include <vector>
+#include <map>
+#include <algorithm>
+#include "imageclasses/ImgReaderGdal.h"
+#include "imageclasses/ImgWriterGdal.h"
+#include "imageclasses/ImgReaderOgr.h"
+#include "imageclasses/ImgWriterOgr.h"
+#include "base/Optionpk.h"
+#include "algorithms/ConfusionMatrix.h"
+#include "algorithms/FeatureSelector.h"
+#include "algorithms/svm.h"
+#include "pkclassify_nn.h"
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+
+double getCost(const vector<Vector2d<float> > &trainingFeatures)
+{
+  //test
+  std::cout << "debug0" << std::endl;
+  bool verbose=true;
+  unsigned short nFeatures=trainingFeatures[0][0].size();
+  //todo: set SVM parameters through command line options
+  struct svm_parameter param;
+  // param.svm_type = svm_type_opt[0];
+  // param.kernel_type = kernel_type_opt[0];
+  // param.degree = kernel_degree_opt[0];
+  // param.gamma = (gamma_opt[0]>0)? gamma_opt[0] : 1.0/nFeatures;
+  // param.coef0 = coef0_opt[0];
+  // param.nu = nu_opt[0];
+  // param.cache_size = cache_opt[0];
+  // param.C = ccost_opt[0];
+  // param.eps = epsilon_tol_opt[0];
+  // param.p = epsilon_loss_opt[0];
+  // param.shrinking = (shrinking_opt[0])? 1 : 0;
+  // param.probability = (prob_est_opt[0])? 1 : 0;
+  // param.nr_weight = 0;//not used: I use priors and balancing
+  // param.weight_label = NULL;
+  // param.weight = NULL;
+  //todo: make parameter
+  unsigned int cv=2;
+  param.svm_type = 0;
+  param.kernel_type = 2;
+  param.degree = 3;
+  param.gamma = 1.0/nFeatures;
+  param.coef0 = 0;
+  param.nu = 0.5;
+  param.cache_size = 100.0;
+  param.C = 1;
+  param.eps = 0.001;
+  param.p = 0.1;
+  param.shrinking = 0;
+  param.probability = 0;
+  param.nr_weight = 0;//not used: I use priors and balancing
+  param.weight_label = NULL;
+  param.weight = NULL;
+  struct svm_model* svm;
+  struct svm_problem prob;
+  struct svm_node* x_space;
+  unsigned int ntraining=0;
+  unsigned short nclass=trainingFeatures.size();
+
+  //test
+  std::cout << "debug1" << std::endl;
+
+  for(int iclass=0;iclass<nclass;++iclass)
+    ntraining+=trainingFeatures[iclass].size();
+
+  prob.l=ntraining;
+  prob.y = Malloc(double,prob.l);
+  prob.x = Malloc(struct svm_node *,prob.l);
+  //test
+  std::cout << "debug2" << std::endl;
+  x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining);
+  unsigned long int spaceIndex=0;
+  int lIndex=0;
+  for(int iclass=0;iclass<nclass;++iclass){
+    for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
+      //test
+      std::cout << "..." << iclass << std::endl;
+      prob.x[lIndex]=&(x_space[spaceIndex]);
+      for(int ifeature=0;ifeature<nFeatures;++ifeature){
+        x_space[spaceIndex].index=ifeature+1;
+        x_space[spaceIndex].value=trainingFeatures[iclass][isample][ifeature];
+        //test
+        std::cout << " " << x_space[spaceIndex].index << ":" << 
x_space[spaceIndex].value;
+        ++spaceIndex;
+      }
+      x_space[spaceIndex++].index=-1;
+      prob.y[lIndex]=iclass;
+      ++lIndex;
+    }
+  }
+  //test
+  std::cout << "debug3" << std::endl;
+
+  assert(lIndex==prob.l);
+  if(verbose)
+    std::cout << "checking parameters" << std::endl;
+  svm_check_parameter(&prob,&param);
+  if(verbose)
+    std::cout << "parameters ok, training" << std::endl;
+  svm=svm_train(&prob,&param);
+  if(verbose)
+    std::cout << "SVM is now trained" << std::endl;
+
+  std::cout << "Confusion matrix" << std::endl;
+  ConfusionMatrix cm(nclass);
+  double *target = Malloc(double,prob.l);
+  svm_cross_validation(&prob,&param,cv,target);
+  assert(param.svm_type != EPSILON_SVR&&param.svm_type != NU_SVR);//only for 
regression
+  int total_correct=0;
+  for(int i=0;i<prob.l;i++)
+    cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1);
+  assert(cm.nReference());
+  // std::cout << "Kappa: " << cm.kappa() << std::endl;
+  // double se95_oa=0;
+  // double doa=0;
+  // doa=cm.oa_pct(&se95_oa);
+  // std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")"  << 
std::endl;
+
+  // *NOTE* Because svm_model contains pointers to svm_problem, you can
+  // not free the memory used by svm_problem if you are still using the
+  // svm_model produced by svm_train(). 
+  // however, we will re-train the svm later on after the feature selection
+  free(target);
+  free(prob.y);
+  free(prob.x);
+  free(x_space);
+  svm_free_and_destroy_model(&(svm));
+  return(cm.kappa());
+}
+
+int main(int argc, char *argv[])
+{
+  map<short,int> reclassMap;
+  vector<int> vreclass;
+  vector<double> priors;
+  
+  //--------------------------- command line options 
------------------------------------
+
+  std::string versionString="version ";
+  versionString+=VERSION;
+  versionString+=", Copyright (C) 2008-2012 Pieter Kempeneers.\n\
+   This program comes with ABSOLUTELY NO WARRANTY; for details type use option 
-h.\n\
+   This is free software, and you are welcome to redistribute it\n\
+   under certain conditions; use option --license for details.";
+  Optionpk<bool> version_opt("\0","version",versionString,false);
+  Optionpk<bool> license_opt("lic","license","show license information",false);
+  Optionpk<bool> help_opt("h","help","shows this help info",false);
+  Optionpk<bool> todo_opt("\0","todo","",false);
+  Optionpk<string> training_opt("t", "training", "training shape file. A 
single shape file contains all training features (must be set as: B0, B1, 
B2,...) for all classes (class numbers identified by label option). Use 
multiple training files for bootstrap aggregation (alternative to the bag and 
bsize options, where a random subset is taken from a single training file)"); 
+  Optionpk<string> label_opt("\0", "label", "identifier for class label in 
training shape file.","label"); 
+  Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to 
select (0 to select all)", 0);
+  Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 
--rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
+  Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data 
to this number of samples for each class", 0);
+  Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less 
then min, do not take this class into account", 0);
+  Optionpk<double> start_opt("s", "start", "start band sequence number (set to 
0)",0); 
+  Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for 
all bands)", 0); 
+  Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral 
band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
+  Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral 
band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale 
min and max in each band to -1.0 and 1.0)", 0.0);
+  Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine 
aggregated classifiers, see also rc option (0: no aggregation, 1: sum rule, 2: 
max rule).",0);
+  Optionpk<double> priors_opt("p", "prior", "prior probabilities for each 
class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0); 
+  Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: 
C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR,        4: nu-SVR)",0);
+  Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel 
function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: 
radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + 
coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
+  Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel 
function",3);
+  Optionpk<float> gamma_opt("g", "gamma", "gamma in kernel function",0);
+  Optionpk<float> coef0_opt("c0", "coef0", "coef0 in kernel function",0);
+  Optionpk<float> ccost_opt("cc", "ccost", "the parameter C of C-SVC, 
epsilon-SVR, and nu-SVR",1);
+  Optionpk<float> nu_opt("nu", "nu", "the parameter nu of nu-SVC, one-class 
SVM, and nu-SVR",0.5);
+  Optionpk<float> epsilon_loss_opt("eloss", "eloss", "the epsilon in loss 
function of epsilon-SVR",0.1);
+  Optionpk<int> cache_opt("cache", "cache", "cache memory size in MB",100);
+  Optionpk<float> epsilon_tol_opt("etol", "etol", "the tolerance of 
termination criterion",0.001);
+  Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the 
shrinking heuristics",false);
+  Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR 
model for probability estimates",false);
+  // Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to 
weight*C, for C-SVC",true);
+  Optionpk<unsigned int> cv_opt("cv", "cv", "n-fold cross validation mode",2);
+  Optionpk<short> verbose_opt("v", "verbose", "set to: 0 (results only), 1 
(confusion matrix), 2 (debug)",0);
+
+  version_opt.retrieveOption(argc,argv);
+  license_opt.retrieveOption(argc,argv);
+  help_opt.retrieveOption(argc,argv);
+  todo_opt.retrieveOption(argc,argv);
+  training_opt.retrieveOption(argc,argv);
+  maxFeatures_opt.retrieveOption(argc,argv);
+  label_opt.retrieveOption(argc,argv);
+  reclass_opt.retrieveOption(argc,argv);
+  balance_opt.retrieveOption(argc,argv);
+  minSize_opt.retrieveOption(argc,argv);
+  start_opt.retrieveOption(argc,argv);
+  end_opt.retrieveOption(argc,argv);
+  offset_opt.retrieveOption(argc,argv);
+  scale_opt.retrieveOption(argc,argv);
+  aggreg_opt.retrieveOption(argc,argv);
+  priors_opt.retrieveOption(argc,argv);
+  svm_type_opt.retrieveOption(argc,argv);
+  kernel_type_opt.retrieveOption(argc,argv);
+  kernel_degree_opt.retrieveOption(argc,argv);
+  gamma_opt.retrieveOption(argc,argv);
+  coef0_opt.retrieveOption(argc,argv);
+  ccost_opt.retrieveOption(argc,argv);
+  nu_opt.retrieveOption(argc,argv);
+  epsilon_loss_opt.retrieveOption(argc,argv);
+  cache_opt.retrieveOption(argc,argv);
+  epsilon_tol_opt.retrieveOption(argc,argv);
+  shrinking_opt.retrieveOption(argc,argv);
+  prob_est_opt.retrieveOption(argc,argv);
+  cv_opt.retrieveOption(argc,argv);
+  verbose_opt.retrieveOption(argc,argv);
+
+  if(version_opt[0]||todo_opt[0]){
+    std::cout << version_opt.getHelp() << std::endl;
+    std::cout << "todo: " << todo_opt.getHelp() << std::endl;
+    exit(0);
+  }
+  if(license_opt[0]){
+    std::cout << Optionpk<bool>::getGPLv3License() << std::endl;
+    exit(0);
+  }
+  if(help_opt[0]){
+    std::cout << "usage: pkfs_svm -t training [OPTIONS]" << std::endl;
+    exit(0);
+  }
+
+  assert(training_opt[0].size());
+  if(verbose_opt[0]>=1)
+    std::cout << "training shape file: " << training_opt[0] << std::endl;
+
+  unsigned int totalSamples=0;
+  int nreclass=0;
+  vector<int> vcode;//unique class codes in recode string
+
+  unsigned short nclass=0;
+  int nband=0;
+  int startBand=2;//first two bands represent X and Y pos
+
+  vector<double> offset;
+  vector<double> scale;
+  vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+
+  if(reclass_opt.size()>1){
+    vreclass.resize(reclass_opt.size());
+    for(int iclass=0;iclass<reclass_opt.size();++iclass){
+      reclassMap[iclass]=reclass_opt[iclass];
+      vreclass[iclass]=reclass_opt[iclass];
+    }
+  }
+  if(priors_opt.size()>1){//priors from argument list
+    priors.resize(priors_opt.size());
+    double normPrior=0;
+    for(int iclass=0;iclass<priors_opt.size();++iclass){
+      priors[iclass]=priors_opt[iclass];
+      normPrior+=priors[iclass];
+    }
+    //normalize
+    for(int iclass=0;iclass<priors_opt.size();++iclass)
+      priors[iclass]/=normPrior;
+  }
+
+
+  //----------------------------------- Training 
-------------------------------
+  struct svm_problem prob;
+  vector<string> fields;
+  //organize training data
+  trainingPixels.clear();
+  map<int,Vector2d<float> > trainingMap;
+  if(verbose_opt[0]>=1)
+    std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+  try{
+    
totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+    if(trainingMap.size()<2){
+      string errorstring="Error: could not read at least two classes from 
training file";
+      throw(errorstring);
+    }
+  }
+  catch(string error){
+    cerr << error << std::endl;
+    exit(1);
+  }
+  catch(...){
+    cerr << "error catched" << std::endl;
+    exit(1);
+  }
+  //delete class 0
+  if(verbose_opt[0]>=1)
+    std::cout << "erasing class 0 from training set (" << 
trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+  totalSamples-=trainingMap[0].size();
+  trainingMap.erase(0);
+  //convert map to vector
+  short iclass=0;
+  if(reclass_opt.size()==1){//no reclass option, read classes from shape
+    reclassMap.clear();
+    vreclass.clear();
+  }
+  if(verbose_opt[0]>1)
+    std::cout << "training pixels: " << std::endl;
+  map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+  while(mapit!=trainingMap.end()){
+    //       for(map<int,Vector2d<float> >::const_iterator 
mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
+    //delete small classes
+    if((mapit->second).size()<minSize_opt[0]){
+      trainingMap.erase(mapit);
+      continue;
+      //todo: beware of reclass option: delete this reclass if no samples are 
left in this classes!!
+    }
+    if(reclass_opt.size()==1){//no reclass option, read classes from shape
+      reclassMap[iclass]=(mapit->first);
+      vreclass.push_back(mapit->first);
+    }
+    trainingPixels.push_back(mapit->second);
+    if(verbose_opt[0]>1)
+      std::cout << mapit->first << ": " << (mapit->second).size() << " 
samples" << std::endl;
+    ++iclass;
+    ++mapit;
+  }
+  nclass=trainingPixels.size();
+  nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
+  assert(reclassMap.size()==nclass);
+
+  //do not remove outliers here: could easily be obtained through ogr2ogr 
-where 'B2<110' output.shp input.shp
+  //balance training data
+  if(balance_opt[0]>0){
+    if(random)
+      srand(time(NULL));
+    totalSamples=0;
+    for(int iclass=0;iclass<nclass;++iclass){
+      if(trainingPixels[iclass].size()>balance_opt[0]){
+        while(trainingPixels[iclass].size()>balance_opt[0]){
+          int index=rand()%trainingPixels[iclass].size();
+          trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
+        }
+      }
+      else{
+        int oldsize=trainingPixels[iclass].size();
+        for(int 
isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
+          int index = rand()%oldsize;
+          trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
+        }
+      }
+      totalSamples+=trainingPixels[iclass].size();
+    }
+    assert(totalSamples==nclass*balance_opt[0]);
+  }
+    
+  //set scale and offset
+  offset.resize(nband);
+  scale.resize(nband);
+  if(offset_opt.size()>1)
+    assert(offset_opt.size()==nband);
+  if(scale_opt.size()>1)
+    assert(scale_opt.size()==nband);
+  Histogram hist;
+  for(int iband=0;iband<nband;++iband){
+    if(verbose_opt[0]>=1)
+      std::cout << "scaling for band" << iband << std::endl;
+    offset[iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
+    scale[iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
+    //search for min and maximum
+    if(scale[iband]<=0){
+      float theMin=trainingPixels[0][0][iband+startBand];
+      float theMax=trainingPixels[0][0][iband+startBand];
+      for(int iclass=0;iclass<nclass;++iclass){
+        for(int isample=0;isample<trainingPixels[iclass].size();++isample){
+          if(theMin>trainingPixels[iclass][isample][iband+startBand])
+            theMin=trainingPixels[iclass][isample][iband+startBand];
+          if(theMax<trainingPixels[iclass][isample][iband+startBand])
+            theMax=trainingPixels[iclass][isample][iband+startBand];
+        }
+      }
+      offset[iband]=theMin+(theMax-theMin)/2.0;
+      scale[iband]=(theMax-theMin)/2.0;
+      if(verbose_opt[0]>=1){
+        std::cout << "Extreme image values for band " << iband << ": [" << 
theMin << "," << theMax << "]" << std::endl;
+        std::cout << "Using offset, scale: " << offset[iband] << ", " << 
scale[iband] << std::endl;
+        std::cout << "scaled values for band " << iband << ": [" << 
(theMin-offset[iband])/scale[iband] << "," << 
(theMax-offset[iband])/scale[iband] << "]" << std::endl;
+      }
+    }
+  }
+
+  //recode vreclass to ordered vector, starting from 0 to nreclass
+  vcode.clear();
+  if(verbose_opt[0]>=1){
+    std::cout << "before recoding: " << std::endl;
+    for(int iclass = 0; iclass < vreclass.size(); iclass++)
+      std::cout << " " << vreclass[iclass];
+    std::cout << std::endl; 
+  }
+  vector<int> vord=vreclass;//ordered vector, starting from 0 to nreclass
+  map<short,int> mreclass;
+  for(int ic=0;ic<vreclass.size();++ic){
+    if(mreclass.find(vreclass[ic])==mreclass.end())
+      mreclass[vreclass[ic]]=iclass++;
+  }
+  for(int ic=0;ic<vreclass.size();++ic)
+    vord[ic]=mreclass[vreclass[ic]];
+  //construct uniqe class codes
+  while(!vreclass.empty()){
+    vcode.push_back(*(vreclass.begin()));
+    //delete all these entries from vreclass
+    vector<int>::iterator vit;
+    
while((vit=find(vreclass.begin(),vreclass.end(),vcode.back()))!=vreclass.end())
+      vreclass.erase(vit);
+  }
+  if(verbose_opt[0]>=1){
+    std::cout << "recode values: " << std::endl;
+    for(int icode=0;icode<vcode.size();++icode)
+      std::cout << vcode[icode] << " ";
+    std::cout << std::endl;
+  }
+  vreclass=vord;
+  if(verbose_opt[0]>=1){
+    std::cout << "after recoding: " << std::endl;
+    for(int iclass = 0; iclass < vord.size(); iclass++)
+      std::cout << " " << vord[iclass];
+    std::cout << std::endl; 
+  }
+      
+  vector<int> vuniqueclass=vreclass;
+  //remove duplicate elements from vuniqueclass
+  sort( vuniqueclass.begin(), vuniqueclass.end() );
+  vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), 
vuniqueclass.end() );
+  nreclass=vuniqueclass.size();
+  if(verbose_opt[0]>=1){
+    std::cout << "unique classes: " << std::endl;
+    for(int iclass = 0; iclass < vuniqueclass.size(); iclass++)
+      std::cout << " " << vuniqueclass[iclass];
+    std::cout << std::endl; 
+    std::cout << "number of reclasses: " << nreclass << std::endl;
+  }
+    
+  if(priors_opt.size()==1){//default: equal priors for each class
+    priors.resize(nclass);
+    for(int iclass=0;iclass<nclass;++iclass)
+      priors[iclass]=1.0/nclass;
+  }
+  assert(priors_opt.size()==1||priors_opt.size()==nclass);
+    
+  if(verbose_opt[0]>=1){
+    std::cout << "number of bands: " << nband << std::endl;
+    std::cout << "number of classes: " << nclass << std::endl;
+    std::cout << "priors:";
+    for(int iclass=0;iclass<nclass;++iclass)
+      std::cout << " " << priors[iclass];
+    std::cout << std::endl;
+  }
+
+  //Calculate features of trainig set
+  vector< Vector2d<float> > trainingFeatures(nclass);
+  for(int iclass=0;iclass<nclass;++iclass){
+    int nctraining=0;
+    if(verbose_opt[0]>=1)
+      std::cout << "calculating features for class " << iclass << std::endl;
+    if(random)
+      srand(time(NULL));
+    nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of 
training size
+    if(verbose_opt[0]>=1)
+      std::cout << "nctraining class " << iclass << ": " << nctraining << 
std::endl;
+    int index=0;
+      
+    trainingFeatures[iclass].resize(nctraining);
+    for(int isample=0;isample<nctraining;++isample){
+      //scale pixel values according to scale and offset!!!
+      for(int iband=0;iband<nband;++iband){
+        assert(trainingPixels[iclass].size()>isample);
+        assert(trainingPixels[iclass][isample].size()>iband+startBand);
+        assert(offset.size()>iband);
+        assert(scale.size()>iband);
+        float value=trainingPixels[iclass][isample][iband+startBand];
+        
trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+      }
+    }
+    assert(trainingFeatures[iclass].size()==nctraining);
+  }
+    
+  unsigned int ntraining=0;
+  for(int iclass=0;iclass<nclass;++iclass){
+    std::cout << "..." << iclass << "..." << std::endl;
+    if(verbose_opt[0]>=1)
+      std::cout << "training sample size for class " << vcode[iclass] << ": " 
<< trainingFeatures[iclass].size() << std::endl;
+    ntraining+=trainingFeatures[iclass].size();
+  }
+
+  int nFeatures=trainingFeatures[0][0].size();
+  int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : nFeatures;
+  list<int> subset;//set of selected features (levels) for each class 
combination
+  double cost=0;
+  FeatureSelector selector;
+  if(maxFeatures==nFeatures){
+    subset.clear();
+    for(int ifeature=0;ifeature<nFeatures;++ifeature)
+      subset.push_back(ifeature);
+    // cost=getCost(trainingFeatures[iclass1],trainingFeatures[iclass2]);
+  }
+  else{
+    try{
+      //test
+      std::cout << "debug3" << std::endl;
+      
cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,(verbose_opt[0]>0)
 ? true:false);
+    }
+    catch(...){
+      std::cout << "catched feature selection" << std::endl;
+      exit(1);
+    }
+  }
+  if(verbose_opt[0]){
+    cout <<"cost: " << cost << endl;
+    cout << "levels: ";
+    for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit){
+      cout << *lit;
+      if((*(lit))!=subset.back())
+        cout <<",";
+      else
+        cout << endl;
+    }
+  }
+
+  // *NOTE* Because svm_model contains pointers to svm_problem, you can
+  // not free the memory used by svm_problem if you are still using the
+  // svm_model produced by svm_train(). 
+
+  // free(prob.y);
+  // free(prob.x);
+  // free(x_space);
+  // svm_destroy_param(&param);
+  return 0;
+}
+                            

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/pkg-grass/pktools.git

_______________________________________________
Pkg-grass-devel mailing list
Pkg-grass-devel@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-grass-devel

[pktools] 27/375: added feature selection

Reply via email to