Author: srowen Date: Wed Feb 17 12:52:25 2010 New Revision: 910951 URL: http://svn.apache.org/viewvc?rev=910951&view=rev Log: Fixed possible bug that would mix up PreferenceArray / Collection in obscure situation when reusing previously loaded file data
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=910951&r1=910950&r2=910951&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Wed Feb 17 12:52:25 2010 @@ -98,7 +98,7 @@ * * <p> * It is possible and likely useful to subclass this class and customize its behavior to accommodate - * application-specific needs and input formats. See {...@link #processLine(String, FastByIDMap)} and + * application-specific needs and input formats. See {...@link #processLine(String, FastByIDMap, boolean)} and * {...@link #processLineWithoutID(String, FastByIDMap)} */ public class FileDataModel implements DataModel { @@ -203,10 +203,10 @@ FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>(); FileLineIterator iterator = new FileLineIterator(dataFile, false); - processFile(iterator, data); + processFile(iterator, data, false); for (File updateFile : findUpdateFiles()) { - processFile(new FileLineIterator(updateFile, false), data); + processFile(new FileLineIterator(updateFile, false), data, false); } return new GenericDataModel(GenericDataModel.toDataMap(data, true)); @@ -216,7 +216,7 @@ FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData(); for (File updateFile : findUpdateFiles()) { - processFile(new FileLineIterator(updateFile, false), rawData); + processFile(new FileLineIterator(updateFile, false), rawData, true); } return new GenericDataModel(rawData); @@ -308,13 +308,15 @@ return delimiter; } - protected void processFile(FileLineIterator dataOrUpdateFileIterator, FastByIDMap<?> data) { + protected void processFile(FileLineIterator dataOrUpdateFileIterator, + FastByIDMap<?> data, + boolean fromPriorData) { log.info("Reading file info..."); AtomicInteger count = new AtomicInteger(); while (dataOrUpdateFileIterator.hasNext()) { String line = dataOrUpdateFileIterator.next(); if (line.length() > 0) { - processLine(line, data); + processLine(line, data, fromPriorData); int currentCount = count.incrementAndGet(); if (currentCount % 1000000 == 0) { log.info("Processed {} lines", currentCount); @@ -341,8 +343,13 @@ * line from input data file * @param data * all data read so far, as a mapping from user IDs to preferences + * @param fromPriorData an implementation detail -- if true, data will map IDs to + * {...@link PreferenceArray} since the framework is attempting to read and update raw + * data that is already in memory. Otherwise it maps to {...@link Collection}s of + * {...@link Preference}s, since it's reading fresh data. Subclasses must be prepared + * to handle this wrinkle. */ - protected void processLine(String line, FastByIDMap<?> data) { + protected void processLine(String line, FastByIDMap<?> data, boolean fromPriorData) { if ((line.length() == 0) || (line.charAt(0) == COMMENT_CHAR)) { return; @@ -379,7 +386,7 @@ // This is kind of gross but need to handle two types of storage Object maybePrefs = data.get(userID); - if (maybePrefs instanceof PreferenceArray) { + if (fromPriorData) { PreferenceArray prefs = (PreferenceArray) maybePrefs; if (preferenceValueString.length() == 0) { Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java?rev=910951&r1=910950&r2=910951&view=diff ============================================================================== --- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java (original) +++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterDataModel.java Wed Feb 17 12:52:25 2010 @@ -57,12 +57,12 @@ protected DataModel buildModel() throws IOException { FastByIDMap<Collection<Preference>> data = new FastByIDMap<Collection<Preference>>(); FileLineIterator iterator = new FileLineIterator(getDataFile(), false); - processFile(iterator, data); + processFile(iterator, data, false); return new GenericDataModel(GenericDataModel.toDataMap(data, true)); } @Override - protected void processLine(String line, FastByIDMap<?> rawData) { + protected void processLine(String line, FastByIDMap<?> rawData, boolean fromPriorData) { FastByIDMap<Collection<Preference>> data = (FastByIDMap<Collection<Preference>>) rawData; String[] jokePrefs = line.split(","); int count = Integer.parseInt(jokePrefs[0]);