This is an automated email from the git hooks/post-receive script. sebastic-guest pushed a commit to branch upstream-master in repository pktools.
commit 2b73cf4a846816f60724b2b0ee3c619773c76333 Author: Pieter Kempeneers <kempe...@gmail.com> Date: Fri Feb 1 18:14:48 2013 +0100 confusion matrix supports bagging for pkclassify_svm.cc, balancing and bagsize for each class. Still to be implemented for pkclassify_nn.cc --- src/apps/pkascii2img.cc | 1 - src/apps/pkascii2ogr.cc | 1 - src/apps/pkclassify_nn.cc | 1 - src/apps/pkclassify_svm.cc | 114 ++++++++++++++++++++++++--------------------- src/apps/pkcrop.cc | 24 +++++----- src/apps/pkextract.cc | 2 +- src/apps/pklas2img.cc | 4 +- 7 files changed, 76 insertions(+), 71 deletions(-) diff --git a/src/apps/pkascii2img.cc b/src/apps/pkascii2img.cc index 1b51861..4e3e064 100644 --- a/src/apps/pkascii2img.cc +++ b/src/apps/pkascii2img.cc @@ -43,7 +43,6 @@ int main(int argc, char *argv[]) bool doProcess;//stop process when program was invoked with help option (-h --help) try{ doProcess=input_opt.retrieveOption(argc,argv); - input_opt.retrieveOption(argc,argv); output_opt.retrieveOption(argc,argv); dataType_opt.retrieveOption(argc,argv); imageType_opt.retrieveOption(argc,argv); diff --git a/src/apps/pkascii2ogr.cc b/src/apps/pkascii2ogr.cc index 60791ba..7d90515 100644 --- a/src/apps/pkascii2ogr.cc +++ b/src/apps/pkascii2ogr.cc @@ -40,7 +40,6 @@ int main(int argc, char *argv[]) bool doProcess;//stop process when program was invoked with help option (-h --help) try{ doProcess=input_opt.retrieveOption(argc,argv); - input_opt.retrieveOption(argc,argv); output_opt.retrieveOption(argc,argv); colX_opt.retrieveOption(argc,argv); colY_opt.retrieveOption(argc,argv); diff --git a/src/apps/pkclassify_nn.cc b/src/apps/pkclassify_nn.cc index afcd750..8a2a2bd 100644 --- a/src/apps/pkclassify_nn.cc +++ b/src/apps/pkclassify_nn.cc @@ -120,7 +120,6 @@ int main(int argc, char *argv[]) bool doProcess;//stop process when program was invoked with help option (-h --help) try{ doProcess=input_opt.retrieveOption(argc,argv); - input_opt.retrieveOption(argc,argv); training_opt.retrieveOption(argc,argv); label_opt.retrieveOption(argc,argv); reclass_opt.retrieveOption(argc,argv); diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc index 8cd1029..913f5f3 100644 --- a/src/apps/pkclassify_svm.cc +++ b/src/apps/pkclassify_svm.cc @@ -114,7 +114,7 @@ int main(int argc, char *argv[]) // Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true); Optionpk<unsigned short> comb_opt("c", "comb", "how to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0); Optionpk<unsigned short> bag_opt("\0", "bag", "Number of bootstrap aggregations", 1); - Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation", 100); + Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation (one size for all classes, or a different size for each class respectively", 100); Optionpk<string> classBag_opt("\0", "class", "output for each individual bootstrap aggregation"); Optionpk<string> mask_opt("\0", "mask", "mask image (see also mvalue option"); Optionpk<short> maskValue_opt("\0", "mvalue", "mask value(s) not to consider for classification (use negative values if only these values should be taken into account). Values will be taken over in classification image.", 0); @@ -195,6 +195,7 @@ int main(int argc, char *argv[]) if(verbose_opt[0]>=1) std::cout << "number of bootstrap aggregations: " << nbag << std::endl; + unsigned int totalSamples=0; vector<short> vcode;//unique reclass codes (e.g., -rc 1 -rc 1 -rc 2 -rc 2 -> vcode[0]=1,vcode[1]=2) vector<struct svm_model*> svm(nbag); @@ -247,6 +248,7 @@ int main(int argc, char *argv[]) std::sort(band_opt.begin(),band_opt.end()); //----------------------------------- Training ------------------------------- + ConfusionMatrix cm; vector< vector<double> > offset(nbag); vector< vector<double> > scale(nbag); map<string,Vector2d<float> > trainingMap; @@ -272,6 +274,7 @@ int main(int argc, char *argv[]) // struct svm_node *x_space; vector<string> fields; + for(int ibag=0;ibag<nbag;++ibag){ //organize training data if(ibag<training_opt.size()){//if bag contains new training pixels @@ -351,26 +354,27 @@ int main(int argc, char *argv[]) //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp //balance training data if(balance_opt[0]>0){ + while(balance_opt.size()<nclass) + balance_opt.push_back(balance_opt.back()); if(random) srand(time(NULL)); totalSamples=0; for(short iclass=0;iclass<nclass;++iclass){ - if(trainingPixels[iclass].size()>balance_opt[0]){ - while(trainingPixels[iclass].size()>balance_opt[0]){ + if(trainingPixels[iclass].size()>balance_opt[iclass]){ + while(trainingPixels[iclass].size()>balance_opt[iclass]){ int index=rand()%trainingPixels[iclass].size(); trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index); } } else{ int oldsize=trainingPixels[iclass].size(); - for(int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){ + for(int isample=trainingPixels[iclass].size();isample<balance_opt[iclass];++isample){ int index = rand()%oldsize; trainingPixels[iclass].push_back(trainingPixels[iclass][index]); } } totalSamples+=trainingPixels[iclass].size(); } - assert(totalSamples==nclass*balance_opt[0]); } //set scale and offset @@ -477,6 +481,7 @@ int main(int argc, char *argv[]) } assert(priors_opt.size()==1||priors_opt.size()==nclass); + //set priors priorsReclass.resize(nreclass); for(short iclass=0;iclass<nreclass;++iclass){ priorsReclass[iclass]=0; @@ -485,6 +490,9 @@ int main(int argc, char *argv[]) priorsReclass[iclass]+=priors[ic]; } } + //set bagsize for each class if not done already via command line + while(bagSize_opt.size()<nclass) + bagSize_opt.push_back(bagSize_opt.back()); if(verbose_opt[0]>=1){ std::cout << "number of bands: " << nband << std::endl; @@ -494,6 +502,28 @@ int main(int argc, char *argv[]) std::cout << " " << priors[iclass]; std::cout << std::endl; } + // ConfusionMatrix cm(nclass); + map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); + if(reclass_opt.empty()){ + while(mapit!=trainingMap.end()){ + cm.pushBackClassName(mapit->first); + ++mapit; + } + } + else{ + if(verbose_opt[0]>1) + std::cout << "classes for confusion matrix: " << std::endl; + for(short iclass=0;iclass<nreclass;++iclass){ + ostringstream os; + os << vcode[iclass]; + if(verbose_opt[0]>1) + std::cout << os.str() << " "; + cm.pushBackClassName(os.str()); + } + if(verbose_opt[0]>1) + std::cout << std::endl; + } + assert(cm.size()==nreclass); }//if(!ibag) //Calculate features of trainig set @@ -504,12 +534,12 @@ int main(int argc, char *argv[]) std::cout << "calculating features for class " << iclass << std::endl; if(random) srand(time(NULL)); - nctraining=(bagSize_opt[0]<100)? trainingPixels[iclass].size()/100.0*bagSize_opt[0] : trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size + nctraining=(bagSize_opt[iclass]<100)? trainingPixels[iclass].size()/100.0*bagSize_opt[iclass] : trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size if(nctraining<=0) nctraining=1; assert(nctraining<=trainingPixels[iclass].size()); int index=0; - if(bagSize_opt[0]<100) + if(bagSize_opt[iclass]<100) random_shuffle(trainingPixels[iclass].begin(),trainingPixels[iclass].end()); trainingFeatures[iclass].resize(nctraining); @@ -578,64 +608,40 @@ int main(int argc, char *argv[]) if(verbose_opt[0]) std::cout << "parameters ok, training" << std::endl; svm[ibag]=svm_train(&prob[ibag],¶m[ibag]); - - if(verbose_opt[0]>1) - std::cout << "SVM is now trained" << std::endl; if(cv_opt[0]>0){ - //todo: implement reclassification - // ConfusionMatrix cm(nclass); - ConfusionMatrix cm; - map<string,Vector2d<float> >::iterator mapit=trainingMap.begin(); - if(reclass_opt.empty()){ - while(mapit!=trainingMap.end()){ - cm.pushBackClassName(mapit->first); - ++mapit; - } - } - else{ - if(verbose_opt[0]>1) - std::cout << "classes for confusion matrix: " << std::endl; - for(short iclass=0;iclass<nreclass;++iclass){ - ostringstream os; - os << vcode[iclass]; - if(verbose_opt[0]>1) - std::cout << os.str() << " "; - cm.pushBackClassName(os.str()); - } - if(verbose_opt[0]>1) - std::cout << std::endl; - } - assert(cm.size()==nreclass); - double *target = Malloc(double,prob[ibag].l); svm_cross_validation(&prob[ibag],¶m[ibag],cv_opt[0],target); assert(param[ibag].svm_type != EPSILON_SVR&¶m[ibag].svm_type != NU_SVR);//only for regression for(int i=0;i<prob[ibag].l;i++) - cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1); - assert(cm.nReference()); - std::cout << cm << std::endl; - cout << "class #samples userAcc prodAcc" << endl; - double se95_ua=0; - double se95_pa=0; - double se95_oa=0; - double dua=0; - double dpa=0; - double doa=0; - for(short iclass=0;iclass<cm.nClasses();++iclass){ - dua=cm.ua_pct(cm.getClass(iclass),&se95_ua); - dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa); - cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl; - } - std::cout << "Kappa: " << cm.kappa() << std::endl; - doa=cm.oa_pct(&se95_oa); - std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl; + cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1.0/nbag); free(target); - } + } + if(verbose_opt[0]>1) + std::cout << "SVM is now trained" << std::endl; // *NOTE* Because svm_model contains pointers to svm_problem, you can // not free the memory used by svm_problem if you are still using the // svm_model produced by svm_train(). }//for ibag + if(cv_opt[0]>0){ + assert(cm.nReference()); + std::cout << cm << std::endl; + cout << "class #samples userAcc prodAcc" << endl; + double se95_ua=0; + double se95_pa=0; + double se95_oa=0; + double dua=0; + double dpa=0; + double doa=0; + for(short iclass=0;iclass<cm.nClasses();++iclass){ + dua=cm.ua_pct(cm.getClass(iclass),&se95_ua); + dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa); + cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl; + } + std::cout << "Kappa: " << cm.kappa() << std::endl; + doa=cm.oa_pct(&se95_oa); + std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl; + } //--------------------------------- end of training ----------------------------------- if(input_opt.empty()) diff --git a/src/apps/pkcrop.cc b/src/apps/pkcrop.cc index 6a4e71b..158ed5f 100644 --- a/src/apps/pkcrop.cc +++ b/src/apps/pkcrop.cc @@ -30,8 +30,8 @@ along with pktools. If not, see <http://www.gnu.org/licenses/>. int main(int argc, char *argv[]) { - Optionpk<string> input_opt("i", "input", "Input image file(s). If input contains multiple images, a multi-band output is created", ""); - Optionpk<string> output_opt("o", "output", "Output image file", ""); + Optionpk<string> input_opt("i", "input", "Input image file(s). If input contains multiple images, a multi-band output is created"); + Optionpk<string> output_opt("o", "output", "Output image file"); Optionpk<string> projection_opt("p", "projection", "projection in EPSG format (leave blank to copy from input file, use EPSG:3035 to use European projection and to force to European grid", ""); Optionpk<string> extent_opt("e", "extent", "get boundary from extent from polygons in vector file", ""); Optionpk<bool> mask_opt("m","mask","mask values out of polygon in extent file to flag option (tip: for better performance, use gdal_rasterize -i -burn 0 -l extent extent.shp output (with output the result of pkcrop)",false); @@ -56,7 +56,6 @@ int main(int argc, char *argv[]) bool doProcess;//stop process when program was invoked with help option (-h --help) try{ doProcess=input_opt.retrieveOption(argc,argv); - input_opt.retrieveOption(argc,argv); output_opt.retrieveOption(argc,argv); projection_opt.retrieveOption(argc,argv); extent_opt.retrieveOption(argc,argv); @@ -87,6 +86,14 @@ int main(int argc, char *argv[]) std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl; exit(0);//help was invoked, stop processing } + if(input_opt.empty()){ + std::cerr << "No input file provided (use option -i). Use pkinfo --help for help information" << std::endl; + exit(0);//help was invoked, stop processing + } + if(output_opt.empty()){ + std::cerr << "No output file provided (use option -i). Use pkinfo --help for help information" << std::endl; + exit(0);//help was invoked, stop processing + } RESAMPLE theResample; if(resample_opt[0]=="near"){ @@ -307,16 +314,11 @@ int main(int argc, char *argv[]) else if(imgReader.isGeoRef()) imgWriter.setProjection(imgReader.getProjection()); if(colorTable_opt.size()){ - if(verbose_opt[0]) - cout << "set colortable " << colorTable_opt[0] << endl; - assert(imgWriter.getDataType()==GDT_Byte); - imgWriter.setColorTable(colorTable_opt[0]); + if(colorTable_opt[0]!="none") + imgWriter.setColorTable(colorTable_opt[0]); } - else if(imgReader.getColorTable()!=NULL){ - if(verbose_opt[0]) - cout << "set colortable from input image" << endl; + else if (imgReader.getColorTable()!=NULL)//copy colorTable from input image imgWriter.setColorTable(imgReader.getColorTable()); - } } double startCol=uli; double endCol=lri; diff --git a/src/apps/pkextract.cc b/src/apps/pkextract.cc index b18b17c..e6e46ad 100644 --- a/src/apps/pkextract.cc +++ b/src/apps/pkextract.cc @@ -46,7 +46,7 @@ int main(int argc, char *argv[]) Optionpk<string> bufferOutput_opt("bu", "bu", "Buffer output shape file", ""); Optionpk<short> geo_opt("g", "geo", "geo coordinates", 1); Optionpk<short> down_opt("down", "down", "down sampling factor. Can be used to create grid points", 1); - Optionpk<float> threshold_opt("t", "threshold", "threshold for selecting samples (randomly). Provide probability in percentage (>0) or absolute (<0). Use multiple threshold values (e.g. -t 80 -t 60) is more classes are to be extracted with random selection. Use value 100 to select all pixels for selected class(es)", 100); + Optionpk<float> threshold_opt("t", "threshold", "threshold for selecting samples (randomly). Provide probability in percentage (>0) or absolute (<0). Use multiple threshold values (e.g. -t 80 -t 60) if more classes are to be extracted with random selection. Use value 100 to select all pixels for selected class(es)", 100); Optionpk<double> min_opt("min", "min", "minimum number of samples to select (0)", 0); Optionpk<short> boundary_opt("bo", "boundary", "boundary for selecting the sample", 1); Optionpk<short> rbox_opt("rb", "rbox", "rectangular boundary box (total width in m) to draw around the selected pixel. Can not combined with class option. Use multiple rbox options for multiple boundary boxes. Use value 0 for no box)", 0); diff --git a/src/apps/pklas2img.cc b/src/apps/pklas2img.cc index afbf4ad..fd330a2 100644 --- a/src/apps/pklas2img.cc +++ b/src/apps/pklas2img.cc @@ -450,8 +450,8 @@ int main(int argc,char **argv) { cout << errorString << endl; exit(1); } - int newdimx=2*(dimx-1)+1; - int newdimy=2*(dimy-1)+1;//from PE&RS vol 71 pp313-324 + int newdimx=(dimx==1)? 3: 2*(dimx-1)+1; + int newdimy=(dimx==1)? 3: 2*(dimy-1)+1;//from PE&RS vol 71 pp313-324 hThreshold=hThreshold_opt[0]+maxSlope_opt[0]*(newdimx-dimx)*dx_opt[0]; dimx=newdimx; dimy=newdimy; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git _______________________________________________ Pkg-grass-devel mailing list Pkg-grass-devel@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-grass-devel