Do you have any context around the locations in the training file? If not, You 
might be getting the entire sentence because you are basically training it to 
find a word.

Mg

> On May 18, 2014, at 3:30 PM, lina abu jaradeh <[email protected]> wrote:
> 
> I am trying to train en-ner-location.bin file using opennlp in java The thing 
> is i got the training text file in the following format <START:location> 
> Fontana <END> .<START:location> Palo Verde <END> .
> <START:location> Picacho <END> .
> I stored it in a text file called citytrain.txt the file contains 120770 line 
> of city namesthen i used the following code to train the fileimport 
> java.io.BufferedOutputStream;
> import java.io.BufferedReader;
> import java.io.File;
> import java.io.FileInputStream;
> import java.io.FileOutputStream;
> import java.io.FileReader;
> import java.io.IOException;
> import java.io.InputStream;
> import java.nio.charset.Charset;
> import java.util.Collections;
> 
> import opennlp.tools.namefind.NameFinderME;
> import opennlp.tools.namefind.NameSample;
> import opennlp.tools.namefind.NameSampleDataStream;
> import opennlp.tools.namefind.TokenNameFinderModel;
> import opennlp.tools.tokenize.Tokenizer;
> import opennlp.tools.tokenize.TokenizerME;
> import opennlp.tools.tokenize.TokenizerModel;
> import opennlp.tools.util.ObjectStream;
> import opennlp.tools.util.PlainTextByLineStream;
> import opennlp.tools.util.Span;
> import opennlp.tools.util.featuregen.AdaptiveFeatureGenerator;
> 
> public class TrainNames {     
>        @SuppressWarnings("deprecation")
>        public void TrainNames() throws IOException{
>            //File baseDir = new File("src/test/resources");
>            //File destDir = new File("target");
>            //<start id="ne-train"/>
>            File inFile = new File("citytrain.txt");
>            NameSampleDataStream nss = new NameSampleDataStream( //<co 
> id="co.opennlp.name.initnamestream"/>
>              new PlainTextByLineStream(
>                new java.io.FileReader(inFile)));
> 
>            int iterations = 100;
>            int cutoff = 5;
>            TokenNameFinderModel model = NameFinderME.train( //<co 
> id="co.opennlp.name.train"/>
>                "en", // language
>                "location", // type
>                nss, 
>                (AdaptiveFeatureGenerator) null,
>                Collections.<String,Object>emptyMap(),
>                iterations,
>                cutoff);
>            
>            File outFile = new File("en-ner-locationNews.bin");
>            FileOutputStream outFileStream = new FileOutputStream(outFile);
>            model.serialize(outFileStream);
>        }
> }
> 
> when i used the output bi file to test it on a string to pull up Fontana from 
> a string the result returned the whole string....don't know why or what i am 
> doing wrongthe following code i used to extract Fontana from a stringimport 
> java.io.FileInputStream;
> 
> import java.io.FileNotFoundException;
> import java.io.IOException;
> import java.io.InputStream;
> import opennlp.tools.namefind.NameFinderME;
> import opennlp.tools.namefind.TokenNameFinderModel;
> import opennlp.tools.util.InvalidFormatException;
> import opennlp.tools.util.Span;
> import opennlp.tools.tokenize.Tokenizer;
> import opennlp.tools.tokenize.TokenizerME;
> import opennlp.tools.tokenize.TokenizerModel;
> import opennlp.tools.tokenize.SimpleTokenizer;
> import opennlp.tools.sentdetect.SentenceDetectorME;
> import opennlp.tools.sentdetect.SentenceModel;
> 
> import org.xml.sax.SAXException;
> 
> 
> public class CityFinder {
> 
>    public String Tokens[];
> 
>    public static void main(String[] args) throws IOException, SAXException {
> 
>        CityFinder toi = new CityFinder();
>        String cnt;
>        cnt="John is planning to specialize in Electrical Engineering in UC 
> Fontana and pursue a career with IBM.";
>        toi.tokenization(cnt);
>        String cities = toi.namefind(toi.Tokens);
>        String org = toi.orgfind(toi.Tokens);
> 
>        System.out.println("City name is : "+cities);
>        System.out.println("organization name is: "+org);
> 
>    }
>        public String namefind(String cnt[]) {
>        InputStream is;
>        TokenNameFinderModel tnf;
>        NameFinderME nf;
>        String sd = "";
>        try {
>            is = new FileInputStream("en-ner-locationNew.bin");
>            tnf = new TokenNameFinderModel(is);
>            nf = new NameFinderME(tnf);
>            Span sp[] = nf.find(cnt);
>            String a[] = Span.spansToStrings(sp, cnt);
>            StringBuilder fd = new StringBuilder();
>            int l = a.length;
> 
>            for (int j = 0; j < l; j++) {
>                fd = fd.append(a[j] + "\n");
> 
>            }
>            sd = fd.toString();
> 
>        } catch (FileNotFoundException e) {
> 
>            e.printStackTrace();
>        } catch (InvalidFormatException e) {
> 
>            e.printStackTrace();
>        } catch (IOException e) {
> 
>            e.printStackTrace();
>        }
>        return sd;
>    }
> 
>    public String orgfind(String cnt[]) {
>        InputStream is;
>        TokenNameFinderModel tnf;
>        NameFinderME nf;
>        String sd = "";
>        try {
>            is = new FileInputStream("en-ner-organization.bin");
>            tnf = new TokenNameFinderModel(is);
>            nf = new NameFinderME(tnf);
>            Span sp[] = nf.find(cnt);
>            String a[] = Span.spansToStrings(sp, cnt);
>            StringBuilder fd = new StringBuilder();
>            int l = a.length;
>            for (int j = 0; j < l; j++) {
>                fd = fd.append(a[j] + "\n");
> 
>            }
> 
>            sd = fd.toString();
> 
>        } catch (FileNotFoundException e) {
> 
>            e.printStackTrace();
>        } catch (InvalidFormatException e) {
> 
>            e.printStackTrace();
>        } catch (IOException e) {
> 
>            e.printStackTrace();
>        }
>        return sd;
> 
>    }
>    public void tokenization(String tokens) {
> 
>        InputStream is;
>        TokenizerModel tm;
>        try {
>            is = new FileInputStream("en-token.bin");
>            tm = new TokenizerModel(is);
>            Tokenizer tz = new TokenizerME(tm);
>            Tokens = tz.tokenize(tokens);
>            // System.out.println(Tokens[1]);
>        } catch (IOException e) {
>            e.printStackTrace();
>        }
>    }
> 
> }
> can you please let me know where i did wrong...??                         

Reply via email to