There is a internal buffer in AdaptiveLogisticRegression, the
NullPointerException is caused when the backend crossfloderlearners
starting training the examples.

The default size of the buffer is 500, and the exception is caused
when I put the 501'th example to ALR.


On Mon, May 30, 2011 at 2:27 AM, Ted Dunning <ted.dunn...@gmail.com> wrote:
> This usually means that you have fed the ALR enough data for it to push a
> batch of learning into the evolutionary algorithm.  That means that there
> isn't any best result yet.
> Getting that null doesn't impact the model, but you have to watch out for
> it.
>
> On Sun, May 29, 2011 at 1:23 AM, XiaoboGu <guxiaobo1...@gmail.com> wrote:
>>
>> Hi,
>>
>>        The main process for MAHOUT-696 is as following, but it will always
>> cause a NullPointerException after the first call to getBest, can we
>> continue training AdaptiveLogisticRegressions after using getBest() to score
>> some new lines just as TrainLogistic does?
>>
>>
>>
>> double logPEstimate = 0;
>>                        int k = 0;
>>
>>                        CsvRecordFactory csv = lmp.getCsvRecordFactory();
>>                        model = lmp.createAdaptiveLogisticRegression();
>>                        State<Wrapper, CrossFoldLearner> best = null;
>>                        CrossFoldLearner learner = null;
>>
>>                        for (int pass = 0; pass < passes; pass++) {
>>                                BufferedReader in = open(inputFile);
>>
>>                                // read variable names
>>                                csv.firstLine(in.readLine());
>>
>>                                String line = in.readLine();
>>
>>                                while (line != null) {
>>                                        // for each new line, get target
>> and predictors
>>                                        Vector input = new
>> RandomAccessSparseVector(lmp.getNumFeatures());
>>                                        int targetValue =
>> csv.processLine(line, input);
>>
>>                                        // update model
>>                                        model.train(targetValue, input);
>>
>>                                        k ++;
>>
>>                                        if (scores && (k % (skipscorenum +
>> 1) == 0) ) {
>>
>>                                                best = model.getBest();
>>                                                if (null != best) {
>>                                                        learner =
>> best.getPayload().getLearner();
>>                                                }
>>                                                if (learner != null) {
>>                                                // check performance while
>> this is still news
>>                                                double logP =
>> learner.logLikelihood(targetValue, input);
>>                                                if
>> (!Double.isInfinite(logP)) {
>>                                                        if (k < 20) {
>>
>>  logPEstimate = (k * logPEstimate + logP)
>>
>>      / (k + 1);
>>                                                        } else {
>>
>>  logPEstimate = 0.95 * logPEstimate + 0.05
>>
>>      * logP;
>>                                                        }
>>                                                }
>>                                                double p =
>> learner.classifyScalar(input);
>>
>>  output.printf(Locale.ENGLISH,
>>                                                                "%10d %2d
>> %10.2f %2.4f %10.4f %10.4f\n",
>>                                                                k,
>> targetValue,
>>
>>  learner.percentCorrect(), p, logP,
>>
>>  logPEstimate);
>>                                                }else{
>>
>>  output.printf(Locale.ENGLISH,
>>
>>  "%10d %2d %s\n", k, targetValue,
>>
>>  "AdaptiveLogisticRegression is not ready for scoring ... ");
>>                                                }
>>                                        }
>>
>>
>>                                        line = in.readLine();
>>                                }
>>                                in.close();
>>                        }
>>
>>
>>
>>
>>       100  1 AdaptiveLogisticRegression is not ready for scoring ...
>>       200  0 AdaptiveLogisticRegression is not ready for scoring ...
>>       300  1 AdaptiveLogisticRegression is not ready for scoring ...
>>       400  0 AdaptiveLogisticRegression is not ready for scoring ...
>>       500  1 AdaptiveLogisticRegression is not ready for scoring ...
>> Exception in thread "main" java.lang.IllegalStateException:
>> java.lang.NullPointerException
>>        at
>> org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression.trainWithBufferedExamples(AdaptiveLogisticRegression.java:144)
>>        at
>> org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression.train(AdaptiveLogisticRegression.java:117)
>>        at
>> org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression.train(AdaptiveLogisticRegression.java:103)
>>        at
>> org.apache.mahout.classifier.sgd.TrainAdaptiveLogistic.main(TrainAdaptiveLogistic.java:72)
>> Caused by: java.lang.NullPointerException
>>        at
>> org.apache.mahout.classifier.sgd.CrossFoldLearner.train(CrossFoldLearner.java:134)
>>        at
>> org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression$Wrapper.train(AdaptiveLogisticRegression.java:411)
>>        at
>> org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression$1.apply(AdaptiveLogisticRegression.java:128)
>>        at
>> org.apache.mahout.classifier.sgd.AdaptiveLogisticRegression$1.apply(AdaptiveLogisticRegression.java:1)
>>        at
>> org.apache.mahout.ep.EvolutionaryProcess$1.call(EvolutionaryProcess.java:146)
>>        at
>> org.apache.mahout.ep.EvolutionaryProcess$1.call(EvolutionaryProcess.java:1)
>>        at
>> java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:303)
>>        at java.util.concurrent.FutureTask.run(FutureTask.java:138)
>>        at
>> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>>        at
>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>>        at java.lang.Thread.run(Thread.java:662)
>>
>
>

Reply via email to