mahout git commit: Revert "MAHOUT-1649: Upgrade to Lucene 4.10.x, this closes apache/mahout#114"

akm Wed, 08 Apr 2015 10:22:35 -0700

Repository: mahout
Updated Branches:
  refs/heads/master b5c63caf9 -> 864ba1aea



Revert "MAHOUT-1649: Upgrade to Lucene 4.10.x, this closes apache/mahout#114"

This reverts commit 670a7d219e4eab8c7735083d52cefa13e81197fb.


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/864ba1ae
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/864ba1ae
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/864ba1ae

Branch: refs/heads/master
Commit: 864ba1aea23ece6491acff38adab0cd1b0d29354
Parents: b5c63ca
Author: Andrew Musselman <[email protected]>
Authored: Wed Apr 8 10:21:39 2015 -0700
Committer: Andrew Musselman <[email protected]>
Committed: Wed Apr 8 10:21:39 2015 -0700

----------------------------------------------------------------------
 .../mahout/classifier/NewsgroupHelper.java      |   2 +-
 integration/pom.xml                             |  10 -
 .../mahout/text/LuceneSegmentInputFormat.java   |   4 +-
 .../mahout/text/LuceneSegmentInputSplit.java    |   4 +-
 .../mahout/text/LuceneSegmentRecordReader.java  |   3 +-
 .../mahout/text/LuceneStorageConfiguration.java |   4 +-
 .../text/MailArchivesClusteringAnalyzer.java    |  22 +-
 .../text/ReadOnlyFileSystemDirectory.java       | 354 +++++++++++++++++++
 .../text/SequenceFilesFromLuceneStorage.java    |   1 +
 .../SequenceFilesFromLuceneStorageDriver.java   |   3 +-
 .../SequenceFilesFromMailArchivesMapper.java    |  29 +-
 .../text/wikipedia/WikipediaAnalyzer.java       |  10 +-
 .../mahout/utils/regex/AnalyzerTransformer.java |   2 +-
 .../mahout/common/lucene/AnalyzerUtils.java     |   4 +-
 .../encoders/InteractionValueEncoder.java       |   6 +-
 .../mahout/classifier/ConfusionMatrixTest.java  |   4 +-
 .../classifier/df/DecisionForestTest.java       |   1 +
 .../apache/mahout/classifier/df/data/Utils.java |  10 +-
 .../mapreduce/partial/PartialBuilderTest.java   |  16 +-
 .../classifier/mlp/TestNeuralNetwork.java       |  11 +-
 .../classifier/naivebayes/NaiveBayesTest.java   |  17 +-
 .../encoders/TextValueEncoderTest.java          |   2 +-
 pom.xml                                         |   4 +-
 23 files changed, 448 insertions(+), 75 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
----------------------------------------------------------------------
diff --git 
a/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java 
b/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
index 2c857cc..3674a57 100644
--- a/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
+++ b/examples/src/main/java/org/apache/mahout/classifier/NewsgroupHelper.java
@@ -60,7 +60,7 @@ public final class NewsgroupHelper {
   private static final long WEEK = 7 * 24 * 3600;
   
   private final Random rand = RandomUtils.getRandom();  
-  private final Analyzer analyzer = new StandardAnalyzer();
+  private final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
   private final FeatureVectorEncoder encoder = new 
StaticWordValueEncoder("body");
   private final FeatureVectorEncoder bias = new 
ConstantValueEncoder("Intercept");
   

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/pom.xml
----------------------------------------------------------------------
diff --git a/integration/pom.xml b/integration/pom.xml
index 9dcc03a..fcb85cb 100644
--- a/integration/pom.xml
+++ b/integration/pom.xml
@@ -139,16 +139,6 @@
     </dependency>
 
     <dependency>
-      <groupId>org.apache.solr</groupId>
-      <artifactId>solr-core</artifactId>
-      <version>${lucene.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>commons-httpclient</groupId>
-      <artifactId>commons-httpclient</artifactId>
-      <version>3.1</version>
-    </dependency>
-    <dependency>
       <groupId>org.mongodb</groupId>
       <artifactId>mongo-java-driver</artifactId>
       <version>2.11.2</version>

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
index 60d48ce..1c4f8de 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
@@ -32,7 +32,6 @@ import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfos;
-import org.apache.solr.store.hdfs.HdfsDirectory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -53,7 +52,8 @@ public class LuceneSegmentInputFormat extends InputFormat {
 
     List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
     for (Path indexPath : indexPaths) {
-      HdfsDirectory directory = new HdfsDirectory(indexPath, configuration);
+      ReadOnlyFileSystemDirectory directory = new 
ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+                                                                              
false, configuration);
       SegmentInfos segmentInfos = new SegmentInfos();
       segmentInfos.read(directory);
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
index f30c7fb..1441e32 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentInfos;
-import org.apache.solr.store.hdfs.HdfsDirectory;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -89,7 +88,8 @@ public class LuceneSegmentInputSplit extends InputSplit 
implements Writable {
    * @throws IOException if an error occurs when accessing the directory
    */
   public SegmentCommitInfo getSegment(Configuration configuration) throws 
IOException {
-    HdfsDirectory directory = new HdfsDirectory(indexPath, configuration);
+    ReadOnlyFileSystemDirectory directory = new 
ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
+                                                                            
false, configuration);
 
     SegmentInfos segmentInfos = new SegmentInfos();
     segmentInfos.read(directory);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
index d41ead2..485e856 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
@@ -62,8 +62,9 @@ public class LuceneSegmentRecordReader extends 
RecordReader<Text, NullWritable>
     for (String field : lucene2SeqConfiguration.getFields()) {
         LuceneIndexHelper.fieldShouldExistInIndex(segmentReader, field);
     }
+
     Weight weight = lucene2SeqConfiguration.getQuery().createWeight(searcher);
-    scorer = weight.scorer(segmentReader.getContext(), 
segmentReader.getLiveDocs());
+    scorer = weight.scorer(segmentReader.getContext(), false, false, null);
     if (scorer == null) {
       throw new IllegalArgumentException("Could not create query scorer for 
query: "
           + lucene2SeqConfiguration.getQuery());

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
index 7eed822..b36f3e9 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
@@ -40,12 +40,12 @@ import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.util.Version;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
 
+import static org.apache.lucene.util.Version.LUCENE_46;
 
 /**
  * Holds all the configuration for {@link SequenceFilesFromLuceneStorage}, 
which generates a sequence file
@@ -213,7 +213,7 @@ public class LuceneStorageConfiguration implements Writable 
{
       }
       idField = in.readUTF();
       fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
-      query = new QueryParser(Version.LUCENE_4_10_3, "query", new 
StandardAnalyzer(Version.LUCENE_4_10_3)).parse(in.readUTF());
+      query = new QueryParser(LUCENE_46, "query", new 
StandardAnalyzer(LUCENE_46)).parse(in.readUTF());
       maxHits = in.readInt();
     } catch (ParseException e) {
       throw new RuntimeException("Could not deserialize " + 
this.getClass().getName(), e);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
 
b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
index 4f6ba78..8776c5f 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
@@ -21,6 +21,7 @@ import java.io.Reader;
 import java.util.Arrays;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -41,13 +42,13 @@ import org.apache.lucene.util.Version;
  * stop words, excluding non-alpha-numeric tokens, and porter stemming.
  */
 public final class MailArchivesClusteringAnalyzer extends StopwordAnalyzerBase 
{
-  private static final Version LUCENE_VERSION = Version.LUCENE_4_10_3;
-
+  private static final Version LUCENE_VERSION = Version.LUCENE_46;
+  
   // extended set of stop words composed of common mail terms like "hi",
   // HTML tags, and Java keywords asmany of the messages in the archives
   // are subversion check-in notifications
     
-  private static final CharArraySet STOP_SET = new CharArraySet(Arrays.asList(
+  private static final CharArraySet STOP_SET = new 
CharArraySet(LUCENE_VERSION, Arrays.asList(
     "3d","7bit","a0","about","above","abstract","across","additional","after",
     "afterwards","again","against","align","all","almost","alone","along",
     "already","also","although","always","am","among","amongst","amoungst",
@@ -107,17 +108,22 @@ public final class MailArchivesClusteringAnalyzer extends 
StopwordAnalyzerBase {
   private static final Matcher MATCHER = ALPHA_NUMERIC.matcher("");
 
   public MailArchivesClusteringAnalyzer() {
-    super(STOP_SET);
+    super(LUCENE_VERSION, STOP_SET);
   }
 
+  public MailArchivesClusteringAnalyzer(CharArraySet stopSet) {
+    super(LUCENE_VERSION, stopSet);
+
+  }
+  
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader 
reader) {
-    Tokenizer tokenizer = new StandardTokenizer(reader);
-    TokenStream result = new StandardFilter(tokenizer);
-    result = new LowerCaseFilter(result);
+    Tokenizer tokenizer = new StandardTokenizer(LUCENE_VERSION, reader);
+    TokenStream result = new StandardFilter(LUCENE_VERSION, tokenizer);
+    result = new LowerCaseFilter(LUCENE_VERSION, result);
     result = new ASCIIFoldingFilter(result);
     result = new AlphaNumericMaxLengthFilter(result);
-    result = new StopFilter(result, STOP_SET);
+    result = new StopFilter(LUCENE_VERSION, result, STOP_SET);
     result = new PorterStemFilter(result);
     return new TokenStreamComponents(tokenizer, result);
   }

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
 
b/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
new file mode 100644
index 0000000..e97e35b
--- /dev/null
+++ 
b/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
@@ -0,0 +1,354 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.text;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.lucene.store.BaseDirectory;
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.BufferedIndexOutput;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.Lock;
+import org.apache.lucene.store.LockFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Collection;
+
+//TODO: is there a better way of doing this in Lucene 4.x?
+
+/**
+ * This class implements a read-only Lucene Directory on top of a general 
FileSystem.
+ * Currently it does not support locking.
+ * <p/>
+ * // TODO: Rename to FileSystemReadOnlyDirectory
+ */
+public class ReadOnlyFileSystemDirectory extends BaseDirectory {
+
+  private final FileSystem fs;
+  private final Path directory;
+  private final int ioFileBufferSize;
+
+  private static final Logger log = 
LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class);
+
+      /**
+       * Constructor
+       *
+       * @param fs - filesystem
+       * @param directory - directory path
+       * @param create - if true create the directory
+       * @param conf - MR Job Configuration
+       * @throws IOException
+       */
+
+  public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean 
create,
+                                     Configuration conf) throws IOException {
+
+    this.fs = fs;
+    this.directory = directory;
+    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);
+
+    if (create) {
+      create();
+    }
+
+    boolean isDir = false;
+    try {
+      FileStatus status = fs.getFileStatus(directory);
+      if (status != null) {
+        isDir = status.isDir();
+      }
+    } catch (IOException e) {
+      log.error(e.getMessage(), e);
+    }
+    if (!isDir) {
+      throw new IOException(directory + " is not a directory");
+    }
+  }
+
+
+  private void create() throws IOException {
+    if (!fs.exists(directory)) {
+      fs.mkdirs(directory);
+    }
+
+    boolean isDir = false;
+    try {
+      FileStatus status = fs.getFileStatus(directory);
+      if (status != null) {
+        isDir = status.isDir();
+      }
+    } catch (IOException e) {
+      log.error(e.getMessage(), e);
+    }
+    if (!isDir) {
+      throw new IOException(directory + " is not a directory");
+    }
+
+    // clear old index files
+    FileStatus[] fileStatus =
+            fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
+    for (FileStatus status : fileStatus) {
+      if (!fs.delete(status.getPath(), true)) {
+        throw new IOException("Cannot delete index file "
+                + status.getPath());
+      }
+    }
+  }
+
+  public String[] list() throws IOException {
+    FileStatus[] fileStatus =
+            fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
+    String[] result = new String[fileStatus.length];
+    for (int i = 0; i < fileStatus.length; i++) {
+      result[i] = fileStatus[i].getPath().getName();
+    }
+    return result;
+  }
+
+  @Override
+  public String[] listAll() throws IOException {
+    return list();
+  }
+
+  @Override
+  public boolean fileExists(String name) throws IOException {
+    return fs.exists(new Path(directory, name));
+  }
+
+  @Override
+  public long fileLength(String name) throws IOException {
+    return fs.getFileStatus(new Path(directory, name)).getLen();
+  }
+
+  @Override
+  public void deleteFile(String name) throws IOException {
+    if (!fs.delete(new Path(directory, name), true)) {
+      throw new IOException("Cannot delete index file " + name);
+    }
+  }
+
+  @Override
+  public IndexOutput createOutput(String name, IOContext context) throws 
IOException {
+    //TODO: What should we be doing with the IOContext here, if anything?
+    Path file = new Path(directory, name);
+    if (fs.exists(file) && !fs.delete(file, true)) {
+      // delete the existing one if applicable
+      throw new IOException("Cannot overwrite index file " + file);
+    }
+
+    return new FileSystemIndexOutput(file, ioFileBufferSize);
+  }
+
+  @Override
+  public void sync(Collection<String> names) throws IOException {
+    // do nothing, as this is read-only
+  }
+
+  @Override
+  public IndexInput openInput(String name, IOContext context) throws 
IOException {
+    return new FileSystemIndexInput(new Path(directory, name), 
ioFileBufferSize);
+  }
+
+  @Override
+  public Lock makeLock(final String name) {
+    return new Lock() {
+      public boolean obtain() {
+        return true;
+      }
+
+      public void release() {
+      }
+
+      public boolean isLocked() {
+        throw new UnsupportedOperationException();
+      }
+
+      public String toString() {
+        return "Lock@" + new Path(directory, name);
+      }
+    };
+  }
+
+  @Override
+  public void clearLock(String name) throws IOException {
+    // do nothing
+  }
+
+  @Override
+  public void close() throws IOException {
+    // do not close the file system
+  }
+
+  @Override
+  public void setLockFactory(LockFactory lockFactory) throws IOException {
+    // do nothing
+  }
+
+  @Override
+  public LockFactory getLockFactory() {
+    return null;
+  }
+
+  @Override
+  public String toString() {
+    return this.getClass().getName() + "@" + directory;
+  }
+
+  private class FileSystemIndexInput extends BufferedIndexInput implements 
Cloneable {
+
+    // shared by clones
+    private class Descriptor {
+      public final FSDataInputStream in;
+      public long position; // cache of in.getPos()
+
+      public Descriptor(Path file, int ioFileBufferSize) throws IOException {
+        this.in = fs.open(file, ioFileBufferSize);
+      }
+    }
+
+    private final Path filePath; // for debugging
+    private final Descriptor descriptor;
+    private final long length;
+    private boolean isOpen;
+    private boolean isClone;
+
+    public FileSystemIndexInput(Path path, int ioFileBufferSize)
+      throws IOException {
+      super("FSII_" + path.getName(), ioFileBufferSize);
+      filePath = path;
+      descriptor = new Descriptor(path, ioFileBufferSize);
+      length = fs.getFileStatus(path).getLen();
+      isOpen = true;
+    }
+
+    @Override
+    protected void readInternal(byte[] b, int offset, int len)
+      throws IOException {
+      long position = getFilePointer();
+      if (position != descriptor.position) {
+        descriptor.in.seek(position);
+        descriptor.position = position;
+      }
+      int total = 0;
+      do {
+        int i = descriptor.in.read(b, offset + total, len - total);
+        if (i == -1) {
+          throw new IOException("Read past EOF");
+        }
+        descriptor.position += i;
+        total += i;
+      } while (total < len);
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (!isClone) {
+        if (isOpen) {
+          descriptor.in.close();
+          isOpen = false;
+        } else {
+          throw new IOException("Index file " + filePath + " already closed");
+        }
+      }
+    }
+
+    @Override
+    protected void seekInternal(long position) {
+      // handled in readInternal()
+    }
+
+    @Override
+    public long length() {
+      return length;
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+      super.finalize();
+      if (!isClone && isOpen) {
+        close(); // close the file
+      }
+    }
+
+    @Override
+    public BufferedIndexInput clone() {
+      FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
+      clone.isClone = true;
+      return clone;
+    }
+  }
+
+  private class FileSystemIndexOutput extends BufferedIndexOutput {
+
+    private final Path filePath; // for debugging
+    private final FSDataOutputStream out;
+    private boolean isOpen;
+
+    public FileSystemIndexOutput(Path path, int ioFileBufferSize)
+      throws IOException {
+      filePath = path;
+      // overwrite is true by default
+      out = fs.create(path, true, ioFileBufferSize);
+      isOpen = true;
+    }
+
+    @Override
+    public void flushBuffer(byte[] b, int offset, int size) throws IOException 
{
+      out.write(b, offset, size);
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (isOpen) {
+        super.close();
+        out.close();
+        isOpen = false;
+      } else {
+        throw new IOException("Index file " + filePath + " already closed");
+      }
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public long length() throws IOException {
+      return out.getPos();
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+      super.finalize();
+      if (isOpen) {
+        close(); // close the file
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
index 4906d3a..b7fd495 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
@@ -82,6 +82,7 @@ public class SequenceFilesFromLuceneStorage {
       processedDocs = writerCollector.processedDocs;
       Closeables.close(sequenceFileWriter, false);
       directory.close();
+      //searcher.close();
       reader.close();
     }
   }

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
index 4de372f..1bd3f3e 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
@@ -96,7 +96,8 @@ public class SequenceFilesFromLuceneStorageDriver extends 
AbstractJob {
     if (hasOption(OPTION_QUERY)) {
       try {
         String queryString = 
COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
-        QueryParser queryParser = new QueryParser(queryString, new 
StandardAnalyzer());
+        QueryParser queryParser = new QueryParser(Version.LUCENE_46, 
queryString,
+            new StandardAnalyzer(Version.LUCENE_46));
         query = queryParser.parse(queryString);
       } catch (ParseException e) {
         throw new IllegalArgumentException(e.getMessage(), e);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
index 07226d3..203e8fb 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromMailArchivesMapper.java
@@ -17,21 +17,9 @@
 
 package org.apache.mahout.text;
 
-import java.io.ByteArrayInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.BytesWritable;
@@ -44,6 +32,17 @@ import org.apache.mahout.common.iterator.FileLineIterable;
 import org.apache.mahout.utils.email.MailOptions;
 import org.apache.mahout.utils.email.MailProcessor;
 
+import java.io.ByteArrayInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 import static org.apache.mahout.text.SequenceFilesFromMailArchives.BODY_OPTION;
 import static 
org.apache.mahout.text.SequenceFilesFromMailArchives.BODY_SEPARATOR_OPTION;
 import static 
org.apache.mahout.text.SequenceFilesFromMailArchives.CHARSET_OPTION;
@@ -94,13 +93,13 @@ public class SequenceFilesFromMailArchivesMapper extends 
Mapper<IntWritable, Byt
       options.setCharset(charset);
     }
 
-    List<Pattern> patterns = new ArrayList<>(5);
+    List<Pattern> patterns = Lists.newArrayListWithCapacity(5);
     // patternOrder is used downstream so that we can know what order the
     // text is in instead
     // of encoding it in the string, which
     // would require more processing later to remove it pre feature
     // selection.
-    Map<String, Integer> patternOrder = new HashMap<>();
+    Map<String, Integer> patternOrder = Maps.newHashMap();
     int order = 0;
     if (!configuration.get(FROM_OPTION[1], "").equals("")) {
       patterns.add(MailProcessor.FROM_PREFIX);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
 
b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
index eae3d6d..ad55ba7 100644
--- 
a/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
+++ 
b/integration/src/main/java/org/apache/mahout/text/wikipedia/WikipediaAnalyzer.java
@@ -34,19 +34,19 @@ import org.apache.lucene.util.Version;
 public class WikipediaAnalyzer extends StopwordAnalyzerBase {
   
   public WikipediaAnalyzer() {
-    super(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
+    super(Version.LUCENE_46, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
   }
   
   public WikipediaAnalyzer(CharArraySet stopSet) {
-    super(stopSet);
+    super(Version.LUCENE_46, stopSet);
   }
 
   @Override
   protected TokenStreamComponents createComponents(String fieldName, Reader 
reader) {
     Tokenizer tokenizer = new WikipediaTokenizer(reader);
-    TokenStream result = new StandardFilter(tokenizer);
-    result = new LowerCaseFilter(result);
-    result = new StopFilter(result, getStopwordSet());
+    TokenStream result = new StandardFilter(Version.LUCENE_46, tokenizer);
+    result = new LowerCaseFilter(Version.LUCENE_46, result);
+    result = new StopFilter(Version.LUCENE_46, result, getStopwordSet());
     return new TokenStreamComponents(tokenizer, result);
   }
 }

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
 
b/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
index 16623c9..36b166a 100644
--- 
a/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
+++ 
b/integration/src/main/java/org/apache/mahout/utils/regex/AnalyzerTransformer.java
@@ -37,7 +37,7 @@ public class AnalyzerTransformer implements RegexTransformer {
   private static final Logger log = 
LoggerFactory.getLogger(AnalyzerTransformer.class);
 
   public AnalyzerTransformer() {
-    this(new StandardAnalyzer());
+    this(new StandardAnalyzer(Version.LUCENE_46), "text");
   }
 
   public AnalyzerTransformer(Analyzer analyzer) {

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java 
b/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
index cfaac07..37ca383 100644
--- a/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
+++ b/mr/src/main/java/org/apache/mahout/common/lucene/AnalyzerUtils.java
@@ -32,7 +32,7 @@ public final class AnalyzerUtils {
    * @throws ClassNotFoundException - {@link ClassNotFoundException}
    */
   public static Analyzer createAnalyzer(String analyzerClassName) throws 
ClassNotFoundException {
-    return createAnalyzer(analyzerClassName, Version.LUCENE_4_10_3);
+    return createAnalyzer(analyzerClassName, Version.LUCENE_46);
   }
 
   public static Analyzer createAnalyzer(String analyzerClassName, Version 
version) throws ClassNotFoundException {
@@ -47,7 +47,7 @@ public final class AnalyzerUtils {
    * @return {@link Analyzer}
    */
   public static Analyzer createAnalyzer(Class<? extends Analyzer> 
analyzerClass) {
-    return createAnalyzer(analyzerClass, Version.LUCENE_4_10_3);
+    return createAnalyzer(analyzerClass, Version.LUCENE_46);
   }
 
   public static Analyzer createAnalyzer(Class<? extends Analyzer> 
analyzerClass, Version version) {

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
----------------------------------------------------------------------
diff --git 
a/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
 
b/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
index e0f6ce1..0be8823 100644
--- 
a/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
+++ 
b/mr/src/main/java/org/apache/mahout/vectorizer/encoders/InteractionValueEncoder.java
@@ -18,9 +18,11 @@
 package org.apache.mahout.vectorizer.encoders;
 
 import java.util.Locale;
-import org.apache.commons.io.Charsets;
+
 import org.apache.mahout.math.Vector;
 
+import com.google.common.base.Charsets;
+
 public class InteractionValueEncoder extends FeatureVectorEncoder {
   private final FeatureVectorEncoder firstEncoder;
   private final FeatureVectorEncoder secondEncoder;
@@ -86,7 +88,7 @@ public class InteractionValueEncoder extends 
FeatureVectorEncoder {
           int n = (k + j) % data.size();
           if (isTraceEnabled()) {
             trace(String.format("%s:%s", new String(originalForm1, 
Charsets.UTF_8), new String(originalForm2,
-                Charsets.UTF_8)), n);
+               Charsets.UTF_8)), n);
           }
           data.set(n, data.get(n) + w);
         }

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
----------------------------------------------------------------------
diff --git 
a/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java 
b/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
index 8edc99b..3ffff85 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/ConfusionMatrixTest.java
@@ -17,11 +17,11 @@
 
 package org.apache.mahout.classifier;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Map;
 
+import com.google.common.collect.Lists;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.Matrix;
 import org.junit.Test;
@@ -102,7 +102,7 @@ public final class ConfusionMatrixTest extends 
MahoutTestCase {
   }
   
   private static ConfusionMatrix fillConfusionMatrix(int[][] values, String[] 
labels, String defaultLabel) {
-    Collection<String> labelList = new ArrayList<>();
+    Collection<String> labelList = Lists.newArrayList();
     labelList.add(labels[0]);
     labelList.add(labels[1]);
     ConfusionMatrix confusionMatrix = new ConfusionMatrix(labelList, 
defaultLabel);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
----------------------------------------------------------------------
diff --git 
a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java 
b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
index d7ab09c..f1ec07f 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/DecisionForestTest.java
@@ -143,6 +143,7 @@ public final class DecisionForestTest extends 
MahoutTestCase {
     Data testData = DataLoader.loadData(dataset, TEST_DATA);
 
     double noValue = dataset.valueOf(4, "no");
+    double yesValue = dataset.valueOf(4, "yes");
     assertEquals(noValue, forest.classify(testData.getDataset(), rng, 
testData.get(0)), EPSILON);
     // This one is tie-broken -- 1 is OK too
     //assertEquals(yesValue, forest.classify(testData.getDataset(), rng, 
testData.get(1)), EPSILON);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java 
b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
index db62d85..1cf8b6a 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/df/data/Utils.java
@@ -23,8 +23,9 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Random;
 
+import com.google.common.base.Charsets;
+import com.google.common.io.Closeables;
 import com.google.common.io.Files;
-import org.apache.commons.io.Charsets;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -233,12 +234,17 @@ public final class Utils {
   }
 
   private static void writeDataToFile(String[] sData, Path path) throws 
IOException {
-    try (BufferedWriter output = Files.newWriter(new File(path.toString()), 
Charsets.UTF_8)){
+    BufferedWriter output = null;
+    try {
+      output = Files.newWriter(new File(path.toString()), Charsets.UTF_8);
       for (String line : sData) {
         output.write(line);
         output.write('\n');
       }
+    } finally {
+      Closeables.close(output, false);
     }
+  
   }
 
   public static Path writeDataToTestFile(String[] sData) throws IOException {

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
----------------------------------------------------------------------
diff --git 
a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
 
b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
index e41071c..3903c33 100644
--- 
a/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
+++ 
b/mr/src/test/java/org/apache/mahout/classifier/df/mapreduce/partial/PartialBuilderTest.java
@@ -18,24 +18,25 @@
 package org.apache.mahout.classifier.df.mapreduce.partial;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Random;
 
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.Writer;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.classifier.df.builder.DefaultTreeBuilder;
 import org.apache.mahout.classifier.df.builder.TreeBuilder;
 import org.apache.mahout.classifier.df.mapreduce.MapredOutput;
 import org.apache.mahout.classifier.df.node.Leaf;
 import org.apache.mahout.classifier.df.node.Node;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.RandomUtils;
 import org.junit.Test;
 
 public final class PartialBuilderTest extends MahoutTestCase {
@@ -65,10 +66,15 @@ public final class PartialBuilderTest extends 
MahoutTestCase {
     FileSystem fs = base.getFileSystem(conf);
 
     Path outputFile = new Path(base, "PartialBuilderTest.seq");
-    try (Writer writer = SequenceFile.createWriter(fs, conf, outputFile, 
TreeID.class, MapredOutput.class)){
+    Writer writer = SequenceFile.createWriter(fs, conf, outputFile,
+        TreeID.class, MapredOutput.class);
+
+    try {
       for (int index = 0; index < NUM_TREES; index++) {
         writer.append(keys[index], values[index]);
       }
+    } finally {
+      Closeables.close(writer, false);
     }
 
     // load the output and make sure its valid
@@ -110,7 +116,7 @@ public final class PartialBuilderTest extends 
MahoutTestCase {
   private static void randomKeyValues(Random rng, TreeID[] keys, 
MapredOutput[] values, int[] firstIds) {
     int index = 0;
     int firstId = 0;
-    Collection<Integer> partitions = new ArrayList<>();
+    Collection<Integer> partitions = Lists.newArrayList();
 
     for (int p = 0; p < NUM_MAPS; p++) {
       // select a random partition, not yet selected

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
----------------------------------------------------------------------
diff --git 
a/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java 
b/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
index 917bf1a..ebe5424 100644
--- a/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
+++ b/mr/src/test/java/org/apache/mahout/classifier/mlp/TestNeuralNetwork.java
@@ -19,14 +19,11 @@ package org.apache.mahout.classifier.mlp;
 
 import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
-import com.google.common.io.Files;
 import org.apache.commons.csv.CSVUtils;
-import org.apache.commons.io.Charsets;
 import org.apache.mahout.classifier.mlp.NeuralNetwork.TrainingMethod;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.math.DenseMatrix;
@@ -35,6 +32,10 @@ import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.Vector;
 import org.junit.Test;
 
+import com.google.common.base.Charsets;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
 /** Test the functionality of {@link NeuralNetwork}. */
 public class TestNeuralNetwork extends MahoutTestCase {
 
@@ -217,7 +218,7 @@ public class TestNeuralNetwork extends MahoutTestCase {
     File cancerDataset = getTestTempFile("cancer.csv");
     writeLines(cancerDataset, Datasets.CANCER);
 
-    List<Vector> records = new ArrayList<>();
+    List<Vector> records = Lists.newArrayList();
     // Returns a mutable list of the data
     List<String> cancerDataSetList = Files.readLines(cancerDataset, 
Charsets.UTF_8);
     // Skip the header line, hence remove the first element in the list
@@ -271,7 +272,7 @@ public class TestNeuralNetwork extends MahoutTestCase {
     writeLines(irisDataset, Datasets.IRIS);
 
     int numOfClasses = 3;
-    List<Vector> records = new ArrayList<>();
+    List<Vector> records = Lists.newArrayList();
     // Returns a mutable list of the data
     List<String> irisDataSetList = Files.readLines(irisDataset, 
Charsets.UTF_8);
     // Skip the header line, hence remove the first element in the list

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
----------------------------------------------------------------------
diff --git 
a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java 
b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
index b0672bf..abd666e 100644
--- 
a/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
+++ 
b/mr/src/test/java/org/apache/mahout/classifier/naivebayes/NaiveBayesTest.java
@@ -19,6 +19,7 @@ package org.apache.mahout.classifier.naivebayes;
 
 import java.io.File;
 
+import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -64,8 +65,10 @@ public class NaiveBayesTest extends MahoutTestCase {
     outputDir.delete();
     tempDir = getTestTempDir("tmp");
 
-    try (SequenceFile.Writer writer = new 
SequenceFile.Writer(FileSystem.get(conf), conf,
-        new Path(inputFile.getAbsolutePath()), Text.class, 
VectorWritable.class)) {
+    SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), 
conf,
+        new Path(inputFile.getAbsolutePath()), Text.class, 
VectorWritable.class);
+
+    try {
       writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, 
ORIGIN_DOMESTIC));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, 
ORIGIN_DOMESTIC));
       writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, 
ORIGIN_DOMESTIC));
@@ -76,6 +79,8 @@ public class NaiveBayesTest extends MahoutTestCase {
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_YELLOW, TYPE_SUV, 
ORIGIN_DOMESTIC));
       writer.append(LABEL_NOT_STOLEN, trainingInstance(COLOR_RED, TYPE_SUV, 
ORIGIN_IMPORTED));
       writer.append(LABEL_STOLEN, trainingInstance(COLOR_RED, TYPE_SPORTS, 
ORIGIN_IMPORTED));
+    } finally {
+      Closeables.close(writer, false);
     }
   }
 
@@ -83,8 +88,8 @@ public class NaiveBayesTest extends MahoutTestCase {
   public void toyData() throws Exception {
     TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
     trainNaiveBayes.setConf(conf);
-    trainNaiveBayes.run(new String[]{"--input", inputFile.getAbsolutePath(), 
"--output", outputDir.getAbsolutePath(),
-        "--tempDir", tempDir.getAbsolutePath()});
+    trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), 
"--output", outputDir.getAbsolutePath(),
+        "--tempDir", tempDir.getAbsolutePath() });
 
     NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new 
Path(outputDir.getAbsolutePath()), conf);
 
@@ -102,9 +107,9 @@ public class NaiveBayesTest extends MahoutTestCase {
   public void toyDataComplementary() throws Exception {
     TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
     trainNaiveBayes.setConf(conf);
-    trainNaiveBayes.run(new String[]{"--input", inputFile.getAbsolutePath(), 
"--output", outputDir.getAbsolutePath(),
+    trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), 
"--output", outputDir.getAbsolutePath(),
         "--trainComplementary",
-        "--tempDir", tempDir.getAbsolutePath()});
+        "--tempDir", tempDir.getAbsolutePath() });
 
     NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new 
Path(outputDir.getAbsolutePath()), conf);
 

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
----------------------------------------------------------------------
diff --git 
a/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
 
b/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
index 3b7c93e..4446fef 100644
--- 
a/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
+++ 
b/mr/src/test/java/org/apache/mahout/vectorizer/encoders/TextValueEncoderTest.java
@@ -70,7 +70,7 @@ public final class TextValueEncoderTest extends 
MahoutTestCase {
   @Test
   public void testLuceneEncoding() throws Exception {
     LuceneTextValueEncoder enc = new LuceneTextValueEncoder("text");
-    enc.setAnalyzer(new WhitespaceAnalyzer());
+    enc.setAnalyzer(new WhitespaceAnalyzer(Version.LUCENE_46));
     Vector v1 = new DenseVector(200);
     enc.addToVector("test1 and more", v1);
     enc.flush(1, v1);

http://git-wip-us.apache.org/repos/asf/mahout/blob/864ba1ae/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index ab1734d..80b4a20 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,8 +115,8 @@
     <mfindbugs.version>2.5.2</mfindbugs.version>
     <mjavadoc.version>2.9.1</mjavadoc.version>
     <hbase.version>1.0.0</hbase.version>
-    <lucene.version>4.10.3</lucene.version>
-    <slf4j.version>1.7.12</slf4j.version>
+    <lucene.version>4.6.1</lucene.version>
+    <slf4j.version>1.7.10</slf4j.version>
     <scala.compat.version>2.10</scala.compat.version>
     <scala.version>2.10.4</scala.version>
     <spark.version>1.1.1</spark.version>

mahout git commit: Revert "MAHOUT-1649: Upgrade to Lucene 4.10.x, this closes apache/mahout#114"

Reply via email to