mahout#170

smarthi Tue, 03 Nov 2015 17:31:58 -0800

MAHOUT-1782: Remove code for lucene2seq, this closes apache/mahout#170


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/82e78a8c
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/82e78a8c
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/82e78a8c

Branch: refs/heads/master
Commit: 82e78a8c9a1beee637ad381baf3ff9d4bb4297cc
Parents: 708cc4f
Author: smarthi <[email protected]>
Authored: Tue Nov 3 20:30:37 2015 -0500
Committer: smarthi <[email protected]>
Committed: Tue Nov 3 20:30:37 2015 -0500

----------------------------------------------------------------------
 .../mahout/text/LuceneIndexFileNameFilter.java  |  62 ----
 .../apache/mahout/text/LuceneIndexHelper.java   |  41 ---
 .../mahout/text/LuceneSegmentInputFormat.java   |  80 -----
 .../mahout/text/LuceneSegmentInputSplit.java    | 107 ------
 .../mahout/text/LuceneSegmentRecordReader.java  | 103 ------
 .../apache/mahout/text/LuceneSeqFileHelper.java |  54 ---
 .../mahout/text/LuceneStorageConfiguration.java | 333 -----------------
 .../text/ReadOnlyFileSystemDirectory.java       | 355 -------------------
 .../text/SequenceFilesFromLuceneStorage.java    | 139 --------
 .../SequenceFilesFromLuceneStorageDriver.java   | 140 --------
 .../SequenceFilesFromLuceneStorageMRJob.java    |  66 ----
 .../SequenceFilesFromLuceneStorageMapper.java   |  83 -----
 .../mahout/text/AbstractLuceneStorageTest.java  | 107 ------
 .../text/LuceneSegmentInputFormatTest.java      |  85 -----
 .../text/LuceneSegmentInputSplitTest.java       |  88 -----
 .../text/LuceneSegmentRecordReaderTest.java     | 121 -------
 .../text/LuceneStorageConfigurationTest.java    |  49 ---
 ...equenceFilesFromLuceneStorageDriverTest.java | 174 ---------
 ...SequenceFilesFromLuceneStorageMRJobTest.java |  87 -----
 .../SequenceFilesFromLuceneStorageTest.java     | 244 -------------
 src/conf/driver.classes.default.props           |   1 -
 21 files changed, 2519 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
deleted file mode 100644
index c505fcb..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneIndexFileNameFilter.java
+++ /dev/null
@@ -1,62 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.lucene.index.IndexFileNames;
-
-import java.util.regex.Pattern;
-
-/**
- * A wrapper class to convert an IndexFileNameFilter which implements
- * java.io.FilenameFilter to an org.apache.hadoop.fs.PathFilter.
- */
-@Deprecated
-final class LuceneIndexFileNameFilter implements PathFilter {
-
-  private static final LuceneIndexFileNameFilter LUCENE_INDEX_FILE_NAME_FILTER 
= new LuceneIndexFileNameFilter();
-
-  /**
-   * Get a static instance.
-   *
-   * @return the static instance
-   */
-  public static LuceneIndexFileNameFilter getFilter() {
-    return LUCENE_INDEX_FILE_NAME_FILTER;
-  }
-
-  private LuceneIndexFileNameFilter() {}
-
-  //TODO: Lucene defines this in IndexFileNames, but it is package private,
-  // so make sure it doesn't change w/ new releases.
-  private static final Pattern CODEC_FILE_PATTERN = 
Pattern.compile("_[a-z0-9]+(_.*)?\\..*");
-
-  public boolean accept(Path path) {
-    String name = path.getName();
-    if (CODEC_FILE_PATTERN.matcher(name).matches() || 
name.startsWith(IndexFileNames.SEGMENTS)) {
-      return true;
-    }
-    for (String extension : IndexFileNames.INDEX_EXTENSIONS) {
-      if (name.endsWith(extension)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java 
b/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
deleted file mode 100644
index 465e51b..0000000
--- a/integration/src/main/java/org/apache/mahout/text/LuceneIndexHelper.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexableField;
-
-import java.io.IOException;
-
-/**
- * Utility for checking if a field is stored in a Lucene index.
- */
-public class LuceneIndexHelper {
-
-  private LuceneIndexHelper() {
-
-  }
-
-  public static void fieldShouldExistInIndex(IndexReader reader, String 
fieldName) throws IOException {
-    IndexableField field = reader.document(0).getField(fieldName);
-    if (field == null || !field.fieldType().stored()) {
-      throw new IllegalArgumentException("Field '" + fieldName +
-          "' is possibly not stored since first document in index does not 
contain this field.");
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
deleted file mode 100644
index 1b5d717..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputFormat.java
+++ /dev/null
@@ -1,80 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputFormat;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfos;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * {@link InputFormat} implementation which splits a Lucene index at the 
segment level.
- */
-@Deprecated
-public class LuceneSegmentInputFormat extends InputFormat {
-
-  private static final Logger LOG = 
LoggerFactory.getLogger(LuceneSegmentInputFormat.class);
-
-  @Override
-  public List<LuceneSegmentInputSplit> getSplits(JobContext context) throws 
IOException, InterruptedException {
-    Configuration configuration = context.getConfiguration();
-
-    LuceneStorageConfiguration lucene2SeqConfiguration = new 
LuceneStorageConfiguration(configuration);
-
-    List<LuceneSegmentInputSplit> inputSplits = new ArrayList<>();
-
-    List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
-    for (Path indexPath : indexPaths) {
-      ReadOnlyFileSystemDirectory directory = new 
ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
-                                                                              
false, configuration);
-      SegmentInfos segmentInfos = new SegmentInfos();
-      segmentInfos.read(directory);
-
-      for (SegmentCommitInfo segmentInfo : segmentInfos) {
-        LuceneSegmentInputSplit inputSplit = new 
LuceneSegmentInputSplit(indexPath, segmentInfo.info.name,
-                                                                         
segmentInfo.sizeInBytes());
-        inputSplits.add(inputSplit);
-        LOG.info("Created {} byte input split for index '{}' segment {}", 
segmentInfo.sizeInBytes(), indexPath.toUri(),
-                 segmentInfo.info.name);
-      }
-    }
-
-    return inputSplits;
-  }
-
-  @Override
-  public RecordReader<Text, NullWritable> createRecordReader(InputSplit 
inputSplit, TaskAttemptContext context)
-    throws IOException, InterruptedException {
-    LuceneSegmentRecordReader luceneSegmentRecordReader = new 
LuceneSegmentRecordReader();
-    luceneSegmentRecordReader.initialize(inputSplit, context);
-    return luceneSegmentRecordReader;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
deleted file mode 100644
index 12949f5..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentInputSplit.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *3
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfo;
-import org.apache.lucene.index.SegmentInfos;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-
-/**
- * {@link InputSplit} implementation that represents a Lucene segment.
- */
-@Deprecated
-public class LuceneSegmentInputSplit extends InputSplit implements Writable {
-
-  private Path indexPath;
-  private String segmentInfoName;
-  private long length;
-
-  public LuceneSegmentInputSplit() {
-    // For deserialization
-  }
-
-  public LuceneSegmentInputSplit(Path indexPath, String segmentInfoName, long 
length) {
-    this.indexPath = indexPath;
-    this.segmentInfoName = segmentInfoName;
-    this.length = length;
-  }
-
-  @Override
-  public long getLength() throws IOException, InterruptedException {
-    return length;
-  }
-
-  @Override
-  public String[] getLocations() throws IOException, InterruptedException {
-    return new String[]{};
-  }
-
-  public String getSegmentInfoName() {
-    return segmentInfoName;
-  }
-
-  public Path getIndexPath() {
-    return indexPath;
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    out.writeUTF(indexPath.toString());
-    out.writeUTF(segmentInfoName);
-    out.writeLong(length);
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    this.indexPath = new Path(in.readUTF());
-    this.segmentInfoName = in.readUTF();
-    this.length = in.readLong();
-  }
-
-  /**
-   * Get the {@link SegmentInfo} of this {@link InputSplit} via the given 
{@link Configuration}
-   *
-   * @param configuration the configuration used to locate the index
-   * @return the segment info or throws exception if not found
-   * @throws IOException if an error occurs when accessing the directory
-   */
-  public SegmentCommitInfo getSegment(Configuration configuration) throws 
IOException {
-    ReadOnlyFileSystemDirectory directory = new 
ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath,
-                                                                            
false, configuration);
-
-    SegmentInfos segmentInfos = new SegmentInfos();
-    segmentInfos.read(directory);
-
-    for (SegmentCommitInfo segmentInfo : segmentInfos) {
-      if (segmentInfo.info.name.equals(segmentInfoName)) {
-        return segmentInfo;
-      }
-    }
-
-    throw new IllegalArgumentException("No such segment: '" + segmentInfoName
-        + "' in directory " + directory.toString());
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
deleted file mode 100644
index 66d37f7..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneSegmentRecordReader.java
+++ /dev/null
@@ -1,103 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.InputSplit;
-import org.apache.hadoop.mapreduce.RecordReader;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.store.IOContext;
-
-import java.io.IOException;
-
-/**
- * {@link RecordReader} implementation for Lucene segments. Each {@link 
InputSplit} contains a separate Lucene segment.
- * Emits records consisting of a {@link Text} document ID and a null key.
- */
-@Deprecated
-public class LuceneSegmentRecordReader extends RecordReader<Text, 
NullWritable> {
-
-  public static final int USE_TERM_INFO = 1;
-
-  private SegmentReader segmentReader;
-  private Scorer scorer;
-
-  private int nextDocId;
-  private Text key = new Text();
-
-  @Override
-  public void initialize(InputSplit split, TaskAttemptContext context) throws 
IOException, InterruptedException {
-    LuceneSegmentInputSplit inputSplit = (LuceneSegmentInputSplit) split;
-
-    Configuration configuration = context.getConfiguration();
-    LuceneStorageConfiguration lucene2SeqConfiguration = new 
LuceneStorageConfiguration(configuration);
-
-    SegmentCommitInfo segmentInfo = inputSplit.getSegment(configuration);
-    segmentReader = new SegmentReader(segmentInfo, USE_TERM_INFO, 
IOContext.READ);
-
-
-    IndexSearcher searcher = new IndexSearcher(segmentReader);
-    String idField = lucene2SeqConfiguration.getIdField();
-    LuceneIndexHelper.fieldShouldExistInIndex(segmentReader, idField);
-    for (String field : lucene2SeqConfiguration.getFields()) {
-        LuceneIndexHelper.fieldShouldExistInIndex(segmentReader, field);
-    }
-
-    Weight weight = lucene2SeqConfiguration.getQuery().createWeight(searcher);
-    scorer = weight.scorer(segmentReader.getContext(), false, false, null);
-    if (scorer == null) {
-      throw new IllegalArgumentException("Could not create query scorer for 
query: "
-          + lucene2SeqConfiguration.getQuery());
-    }
-  }
-
-  @Override
-  public boolean nextKeyValue() throws IOException, InterruptedException {
-    nextDocId = scorer.nextDoc();
-
-    return nextDocId != Scorer.NO_MORE_DOCS;
-  }
-
-  @Override
-  public Text getCurrentKey() throws IOException, InterruptedException {
-    key.set(String.valueOf(nextDocId));
-    return key;
-  }
-
-  @Override
-  public NullWritable getCurrentValue() throws IOException, 
InterruptedException {
-    return NullWritable.get();
-  }
-
-  @Override
-  public float getProgress() throws IOException, InterruptedException {
-    //this is a rough estimate, due to the possible inaccuracies of cost
-    return scorer.cost() == 0 ? 0 : (float) nextDocId / scorer.cost();
-  }
-
-  @Override
-  public void close() throws IOException {
-    segmentReader.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java 
b/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
deleted file mode 100644
index e6dc84a..0000000
--- a/integration/src/main/java/org/apache/mahout/text/LuceneSeqFileHelper.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import com.google.common.base.Strings;
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.document.Document;
-
-import java.util.List;
-
-import static org.apache.commons.lang.StringUtils.isNotBlank;
-
-/**
- *
- *
- **/
-@Deprecated
-class LuceneSeqFileHelper {
-
-  public static final String SEPARATOR_FIELDS = " ";
-  public static final int USE_TERM_INFOS = 1;
-
-  private LuceneSeqFileHelper() {}
-
-  public static void populateValues(Document document, Text theValue, 
List<String> fields) {
-
-    StringBuilder valueBuilder = new StringBuilder();
-    for (int i = 0; i < fields.size(); i++) {
-      String field = fields.get(i);
-      String fieldValue = document.get(field);
-      if (isNotBlank(fieldValue)) {
-        valueBuilder.append(fieldValue);
-        if (i != fields.size() - 1) {
-          valueBuilder.append(SEPARATOR_FIELDS);
-        }
-      }
-    }
-    theValue.set(Strings.nullToEmpty(valueBuilder.toString()));
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
 
b/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
deleted file mode 100644
index 735fb5d..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/LuceneStorageConfiguration.java
+++ /dev/null
@@ -1,333 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-
-import com.google.common.base.Preconditions;
-import org.apache.commons.lang.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.DefaultStringifier;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.document.DocumentStoredFieldVisitor;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.Query;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
-
-import static org.apache.lucene.util.Version.LUCENE_46;
-
-/**
- * Holds all the configuration for {@link SequenceFilesFromLuceneStorage}, 
which generates a sequence file
- * with id as the key and a content field as value.
- */
-@Deprecated
-public class LuceneStorageConfiguration implements Writable {
-
-  private static final Query DEFAULT_QUERY = new MatchAllDocsQuery();
-  private static final int DEFAULT_MAX_HITS = Integer.MAX_VALUE;
-
-  static final String KEY = 
"org.apache.mahout.text.LuceneIndexToSequenceFiles";
-
-  static final String SEPARATOR_FIELDS = ",";
-  static final String SEPARATOR_PATHS = ",";
-
-  private Configuration configuration;
-  private List<Path> indexPaths;
-  private Path sequenceFilesOutputPath;
-  private String idField;
-  private List<String> fields;
-  private Query query;
-  private int maxHits;
-
-  /**
-   * Create a configuration bean with all mandatory parameters.
-   *
-   * @param configuration           Hadoop configuration for writing 
sequencefiles
-   * @param indexPaths              paths to the index
-   * @param sequenceFilesOutputPath path to output the sequence file
-   * @param idField                 field used for the key of the sequence file
-   * @param fields                  field(s) used for the value of the 
sequence file
-   */
-  public LuceneStorageConfiguration(Configuration configuration, List<Path> 
indexPaths, Path sequenceFilesOutputPath,
-                                    String idField, List<String> fields) {
-    Preconditions.checkArgument(configuration != null, "Parameter 
'configuration' cannot be null");
-    Preconditions.checkArgument(indexPaths != null, "Parameter 'indexPaths' 
cannot be null");
-    Preconditions.checkArgument(indexPaths != null && !indexPaths.isEmpty(), 
"Parameter 'indexPaths' cannot be empty");
-    Preconditions.checkArgument(sequenceFilesOutputPath != null, "Parameter 
'sequenceFilesOutputPath' cannot be null");
-    Preconditions.checkArgument(idField != null, "Parameter 'idField' cannot 
be null");
-    Preconditions.checkArgument(fields != null, "Parameter 'fields' cannot be 
null");
-    Preconditions.checkArgument(fields != null && !fields.isEmpty(), 
"Parameter 'fields' cannot be empty");
-
-    this.configuration = configuration;
-    this.indexPaths = indexPaths;
-    this.sequenceFilesOutputPath = sequenceFilesOutputPath;
-    this.idField = idField;
-    this.fields = fields;
-
-    this.query = DEFAULT_QUERY;
-    this.maxHits = DEFAULT_MAX_HITS;
-  }
-
-  public LuceneStorageConfiguration() {
-    // Used during serialization. Do not use.
-  }
-
-  /**
-   * Deserializes a {@link LuceneStorageConfiguration} from a {@link 
Configuration}.
-   *
-   * @param conf the {@link Configuration} object with a serialized {@link 
LuceneStorageConfiguration}
-   * @throws IOException if deserialization fails
-   */
-  public LuceneStorageConfiguration(Configuration conf) throws IOException {
-    Preconditions.checkNotNull(conf, "Parameter 'configuration' cannot be 
null");
-
-    String serializedConfigString = conf.get(KEY);
-
-    if (serializedConfigString == null) {
-      throw new IllegalArgumentException("Parameter 'configuration' does not 
contain a serialized " + this.getClass());
-    }
-
-    LuceneStorageConfiguration luceneStorageConf = 
DefaultStringifier.load(conf, KEY, LuceneStorageConfiguration.class);
-
-    this.configuration = conf;
-    this.indexPaths = luceneStorageConf.getIndexPaths();
-    this.sequenceFilesOutputPath = 
luceneStorageConf.getSequenceFilesOutputPath();
-    this.idField = luceneStorageConf.getIdField();
-    this.fields = luceneStorageConf.getFields();
-    this.query = luceneStorageConf.getQuery();
-    this.maxHits = luceneStorageConf.getMaxHits();
-  }
-
-  /**
-   * Serializes this object in a Hadoop {@link Configuration}
-   *
-   * @return a {@link Configuration} object with a String serialization
-   * @throws IOException if serialization fails
-   */
-  public Configuration serialize() throws IOException {
-    DefaultStringifier.store(configuration, this, KEY);
-
-    return new Configuration(configuration);
-  }
-
-  /**
-   * Returns an {@link Iterator} which returns (Text, Text) {@link Pair}s of 
the produced sequence files.
-   *
-   * @return iterator
-   */
-  public Iterator<Pair<Text, Text>> getSequenceFileIterator() {
-    return new SequenceFileDirIterable<Text, Text>(sequenceFilesOutputPath, 
PathType.LIST, PathFilters.logsCRCFilter(),
-                                                   configuration).iterator();
-  }
-
-  public Configuration getConfiguration() {
-    return configuration;
-  }
-
-  public Path getSequenceFilesOutputPath() {
-    return sequenceFilesOutputPath;
-  }
-
-  public List<Path> getIndexPaths() {
-    return indexPaths;
-  }
-
-  public String getIdField() {
-    return idField;
-  }
-
-  public List<String> getFields() {
-    return fields;
-  }
-
-  public void setQuery(Query query) {
-    this.query = query;
-  }
-
-  public Query getQuery() {
-    return query;
-  }
-
-  public void setMaxHits(int maxHits) {
-    this.maxHits = maxHits;
-  }
-
-  public int getMaxHits() {
-    return maxHits;
-  }
-
-  public DocumentStoredFieldVisitor getStoredFieldVisitor() {
-    Set<String> fieldSet = new HashSet<>(Collections.singleton(idField));
-    fieldSet.addAll(fields);
-    return new DocumentStoredFieldVisitor(fieldSet);
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    out.writeUTF(sequenceFilesOutputPath.toString());
-    out.writeUTF(StringUtils.join(indexPaths, SEPARATOR_PATHS));
-    out.writeUTF(idField);
-    out.writeUTF(StringUtils.join(fields, SEPARATOR_FIELDS));
-    out.writeUTF(query.toString());
-    out.writeInt(maxHits);
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    try {
-      sequenceFilesOutputPath = new Path(in.readUTF());
-      indexPaths = new ArrayList<>();
-      String[] indexPaths = in.readUTF().split(SEPARATOR_PATHS);
-      for (String indexPath : indexPaths) {
-        this.indexPaths.add(new Path(indexPath));
-      }
-      idField = in.readUTF();
-      fields = Arrays.asList(in.readUTF().split(SEPARATOR_FIELDS));
-      query = new QueryParser(LUCENE_46, "query", new 
StandardAnalyzer(LUCENE_46)).parse(in.readUTF());
-      maxHits = in.readInt();
-    } catch (ParseException e) {
-      throw new RuntimeException("Could not deserialize " + 
this.getClass().getName(), e);
-    }
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    LuceneStorageConfiguration that = (LuceneStorageConfiguration) o;
-
-    if (maxHits != that.maxHits) {
-      return false;
-    }
-    if (fields != null ? !fields.equals(that.fields) : that.fields != null) {
-      return false;
-    }
-    if (idField != null) {
-      if (!idField.equals(that.idField)) {
-        return false;
-      } else {
-        if (indexPaths != null) {
-          if (query != null) {
-            if (sequenceFilesOutputPath != null) {
-              return indexPaths.equals(that.indexPaths) && 
sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath) && 
query.equals(that.query);
-            } else {
-              return indexPaths.equals(that.indexPaths) && 
that.sequenceFilesOutputPath == null && query.equals(that.query);
-            }
-          } else {
-            // query == null
-            if (that.query == null && indexPaths.equals(that.indexPaths)) {
-              if (sequenceFilesOutputPath != null) {
-                return 
sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath);
-              } else {
-                return that.sequenceFilesOutputPath == null;
-              }
-            } else {
-              return false;
-            }
-          }
-        } else {
-          // indexPaths == null
-          if (that.indexPaths == null) {
-            if (query != null) {
-              if (sequenceFilesOutputPath != null) {
-                return 
sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath) && 
query.equals(that.query);
-              } else {
-                return that.sequenceFilesOutputPath == null && 
query.equals(that.query);
-              }
-            } else {
-              if (that.query == null) {
-                if (sequenceFilesOutputPath != null) {
-                  return 
sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath);
-                } else {
-                  return that.sequenceFilesOutputPath == null;
-                }
-              } else {
-                return false;
-              }
-            }
-          } else {
-            return false;
-          }
-        }
-      }
-    } else {
-      if (that.idField != null) {
-        return false;
-      } else {
-        if (indexPaths != null) {
-          if (query != null) {
-            if (sequenceFilesOutputPath != null) {
-              return !!indexPaths.equals(that.indexPaths) && 
!!query.equals(that.query) && 
!!sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath);
-            } else {
-              return !!indexPaths.equals(that.indexPaths) && 
!!query.equals(that.query) && !(that.sequenceFilesOutputPath != null);
-            }
-          } else {
-            if (sequenceFilesOutputPath != null) {
-              return !!indexPaths.equals(that.indexPaths) && !(that.query != 
null) && !!sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath);
-            } else {
-              return !!indexPaths.equals(that.indexPaths) && !(that.query != 
null) && !(that.sequenceFilesOutputPath != null);
-            }
-          }
-        } else {
-          if (query != null) {
-            if (sequenceFilesOutputPath != null) {
-              return that.indexPaths == null && query.equals(that.query) && 
sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath);
-            } else {
-              return that.indexPaths == null && query.equals(that.query) && 
that.sequenceFilesOutputPath == null;
-            }
-          } else {
-            return that.indexPaths == null && that.query == null && 
(sequenceFilesOutputPath != null ? 
sequenceFilesOutputPath.equals(that.sequenceFilesOutputPath) : 
that.sequenceFilesOutputPath == null);
-          }
-        }
-      }
-    }
-
-  }
-
-  @Override
-  public int hashCode() {
-    int result = indexPaths != null ? indexPaths.hashCode() : 0;
-    result = 31 * result + (sequenceFilesOutputPath != null ? 
sequenceFilesOutputPath.hashCode() : 0);
-    result = 31 * result + (idField != null ? idField.hashCode() : 0);
-    result = 31 * result + (fields != null ? fields.hashCode() : 0);
-    result = 31 * result + (query != null ? query.hashCode() : 0);
-    result = 31 * result + maxHits;
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
 
b/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
deleted file mode 100644
index cd8137f..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/ReadOnlyFileSystemDirectory.java
+++ /dev/null
@@ -1,355 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.text;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.store.BaseDirectory;
-import org.apache.lucene.store.BufferedIndexInput;
-import org.apache.lucene.store.BufferedIndexOutput;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.Lock;
-import org.apache.lucene.store.LockFactory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.util.Collection;
-
-//TODO: is there a better way of doing this in Lucene 4.x?
-
-/**
- * This class implements a read-only Lucene Directory on top of a general 
FileSystem.
- * Currently it does not support locking.
- * <p/>
- * // TODO: Rename to FileSystemReadOnlyDirectory
- */
-@Deprecated
-public class ReadOnlyFileSystemDirectory extends BaseDirectory {
-
-  private final FileSystem fs;
-  private final Path directory;
-  private final int ioFileBufferSize;
-
-  private static final Logger log = 
LoggerFactory.getLogger(ReadOnlyFileSystemDirectory.class);
-
-      /**
-       * Constructor
-       *
-       * @param fs - filesystem
-       * @param directory - directory path
-       * @param create - if true create the directory
-       * @param conf - MR Job Configuration
-       * @throws IOException
-       */
-
-  public ReadOnlyFileSystemDirectory(FileSystem fs, Path directory, boolean 
create,
-                                     Configuration conf) throws IOException {
-
-    this.fs = fs;
-    this.directory = directory;
-    this.ioFileBufferSize = conf.getInt("io.file.buffer.size", 4096);
-
-    if (create) {
-      create();
-    }
-
-    boolean isDir = false;
-    try {
-      FileStatus status = fs.getFileStatus(directory);
-      if (status != null) {
-        isDir = status.isDir();
-      }
-    } catch (IOException e) {
-      log.error(e.getMessage(), e);
-    }
-    if (!isDir) {
-      throw new IOException(directory + " is not a directory");
-    }
-  }
-
-
-  private void create() throws IOException {
-    if (!fs.exists(directory)) {
-      fs.mkdirs(directory);
-    }
-
-    boolean isDir = false;
-    try {
-      FileStatus status = fs.getFileStatus(directory);
-      if (status != null) {
-        isDir = status.isDir();
-      }
-    } catch (IOException e) {
-      log.error(e.getMessage(), e);
-    }
-    if (!isDir) {
-      throw new IOException(directory + " is not a directory");
-    }
-
-    // clear old index files
-    FileStatus[] fileStatus =
-            fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
-    for (FileStatus status : fileStatus) {
-      if (!fs.delete(status.getPath(), true)) {
-        throw new IOException("Cannot delete index file "
-                + status.getPath());
-      }
-    }
-  }
-
-  public String[] list() throws IOException {
-    FileStatus[] fileStatus =
-            fs.listStatus(directory, LuceneIndexFileNameFilter.getFilter());
-    String[] result = new String[fileStatus.length];
-    for (int i = 0; i < fileStatus.length; i++) {
-      result[i] = fileStatus[i].getPath().getName();
-    }
-    return result;
-  }
-
-  @Override
-  public String[] listAll() throws IOException {
-    return list();
-  }
-
-  @Override
-  public boolean fileExists(String name) throws IOException {
-    return fs.exists(new Path(directory, name));
-  }
-
-  @Override
-  public long fileLength(String name) throws IOException {
-    return fs.getFileStatus(new Path(directory, name)).getLen();
-  }
-
-  @Override
-  public void deleteFile(String name) throws IOException {
-    if (!fs.delete(new Path(directory, name), true)) {
-      throw new IOException("Cannot delete index file " + name);
-    }
-  }
-
-  @Override
-  public IndexOutput createOutput(String name, IOContext context) throws 
IOException {
-    //TODO: What should we be doing with the IOContext here, if anything?
-    Path file = new Path(directory, name);
-    if (fs.exists(file) && !fs.delete(file, true)) {
-      // delete the existing one if applicable
-      throw new IOException("Cannot overwrite index file " + file);
-    }
-
-    return new FileSystemIndexOutput(file, ioFileBufferSize);
-  }
-
-  @Override
-  public void sync(Collection<String> names) throws IOException {
-    // do nothing, as this is read-only
-  }
-
-  @Override
-  public IndexInput openInput(String name, IOContext context) throws 
IOException {
-    return new FileSystemIndexInput(new Path(directory, name), 
ioFileBufferSize);
-  }
-
-  @Override
-  public Lock makeLock(final String name) {
-    return new Lock() {
-      public boolean obtain() {
-        return true;
-      }
-
-      public void release() {
-      }
-
-      public boolean isLocked() {
-        throw new UnsupportedOperationException();
-      }
-
-      public String toString() {
-        return "Lock@" + new Path(directory, name);
-      }
-    };
-  }
-
-  @Override
-  public void clearLock(String name) throws IOException {
-    // do nothing
-  }
-
-  @Override
-  public void close() throws IOException {
-    // do not close the file system
-  }
-
-  @Override
-  public void setLockFactory(LockFactory lockFactory) throws IOException {
-    // do nothing
-  }
-
-  @Override
-  public LockFactory getLockFactory() {
-    return null;
-  }
-
-  @Override
-  public String toString() {
-    return this.getClass().getName() + "@" + directory;
-  }
-
-  private class FileSystemIndexInput extends BufferedIndexInput implements 
Cloneable {
-
-    // shared by clones
-    private class Descriptor {
-      public final FSDataInputStream in;
-      public long position; // cache of in.getPos()
-
-      public Descriptor(Path file, int ioFileBufferSize) throws IOException {
-        this.in = fs.open(file, ioFileBufferSize);
-      }
-    }
-
-    private final Path filePath; // for debugging
-    private final Descriptor descriptor;
-    private final long length;
-    private boolean isOpen;
-    private boolean isClone;
-
-    public FileSystemIndexInput(Path path, int ioFileBufferSize)
-      throws IOException {
-      super("FSII_" + path.getName(), ioFileBufferSize);
-      filePath = path;
-      descriptor = new Descriptor(path, ioFileBufferSize);
-      length = fs.getFileStatus(path).getLen();
-      isOpen = true;
-    }
-
-    @Override
-    protected void readInternal(byte[] b, int offset, int len)
-      throws IOException {
-      long position = getFilePointer();
-      if (position != descriptor.position) {
-        descriptor.in.seek(position);
-        descriptor.position = position;
-      }
-      int total = 0;
-      do {
-        int i = descriptor.in.read(b, offset + total, len - total);
-        if (i == -1) {
-          throw new IOException("Read past EOF");
-        }
-        descriptor.position += i;
-        total += i;
-      } while (total < len);
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (!isClone) {
-        if (isOpen) {
-          descriptor.in.close();
-          isOpen = false;
-        } else {
-          throw new IOException("Index file " + filePath + " already closed");
-        }
-      }
-    }
-
-    @Override
-    protected void seekInternal(long position) {
-      // handled in readInternal()
-    }
-
-    @Override
-    public long length() {
-      return length;
-    }
-
-    @Override
-    protected void finalize() throws Throwable {
-      super.finalize();
-      if (!isClone && isOpen) {
-        close(); // close the file
-      }
-    }
-
-    @Override
-    public BufferedIndexInput clone() {
-      FileSystemIndexInput clone = (FileSystemIndexInput) super.clone();
-      clone.isClone = true;
-      return clone;
-    }
-  }
-
-  private class FileSystemIndexOutput extends BufferedIndexOutput {
-
-    private final Path filePath; // for debugging
-    private final FSDataOutputStream out;
-    private boolean isOpen;
-
-    public FileSystemIndexOutput(Path path, int ioFileBufferSize)
-      throws IOException {
-      filePath = path;
-      // overwrite is true by default
-      out = fs.create(path, true, ioFileBufferSize);
-      isOpen = true;
-    }
-
-    @Override
-    public void flushBuffer(byte[] b, int offset, int size) throws IOException 
{
-      out.write(b, offset, size);
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (isOpen) {
-        super.close();
-        out.close();
-        isOpen = false;
-      } else {
-        throw new IOException("Index file " + filePath + " already closed");
-      }
-    }
-
-    @Override
-    public void seek(long pos) throws IOException {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public long length() throws IOException {
-      return out.getPos();
-    }
-
-    @Override
-    protected void finalize() throws Throwable {
-      super.finalize();
-      if (isOpen) {
-        close(); // close the file
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
deleted file mode 100644
index 84953c2..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorage.java
+++ /dev/null
@@ -1,139 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-
-import com.google.common.base.Strings;
-import com.google.common.io.Closeables;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DocumentStoredFieldVisitor;
-import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import static org.apache.commons.lang.StringUtils.isBlank;
-
-/**
- * Generates a sequence file from a Lucene index with a specified id field as 
the key and a content field as the value.
- * Configure this class with a {@link LuceneStorageConfiguration} bean.
- */
-@Deprecated
-public class SequenceFilesFromLuceneStorage {
-  private static final Logger log = 
LoggerFactory.getLogger(SequenceFilesFromLuceneStorage.class);
-
-  /**
-   * Generates a sequence files from a Lucene index via the given {@link 
LuceneStorageConfiguration}
-   *
-   * @param lucene2seqConf configuration bean
-   * @throws java.io.IOException if index cannot be opened or sequence file 
could not be written
-   */
-  public void run(final LuceneStorageConfiguration lucene2seqConf) throws 
IOException {
-    List<Path> indexPaths = lucene2seqConf.getIndexPaths();
-    int processedDocs = 0;
-
-    for (Path indexPath : indexPaths) {
-      Directory directory = FSDirectory.open(new 
File(indexPath.toUri().getPath()));
-      IndexReader reader = DirectoryReader.open(directory);
-      IndexSearcher searcher = new IndexSearcher(reader);
-
-      LuceneIndexHelper.fieldShouldExistInIndex(reader, 
lucene2seqConf.getIdField());
-      for (String field : lucene2seqConf.getFields()) {
-        LuceneIndexHelper.fieldShouldExistInIndex(reader, field);
-      }
-
-      Configuration configuration = lucene2seqConf.getConfiguration();
-      FileSystem fileSystem = FileSystem.get(configuration);
-      Path sequenceFilePath = new 
Path(lucene2seqConf.getSequenceFilesOutputPath(), indexPath.getName());
-      final SequenceFile.Writer sequenceFileWriter = new 
SequenceFile.Writer(fileSystem, configuration,
-          sequenceFilePath, Text.class, Text.class);
-
-      SeqFileWriterCollector writerCollector = new 
SeqFileWriterCollector(lucene2seqConf, sequenceFileWriter,
-          processedDocs);
-      searcher.search(lucene2seqConf.getQuery(), writerCollector);
-      log.info("Wrote " + writerCollector.processedDocs + " documents in " + 
sequenceFilePath.toUri());
-      processedDocs = writerCollector.processedDocs;
-      Closeables.close(sequenceFileWriter, false);
-      directory.close();
-      //searcher.close();
-      reader.close();
-    }
-  }
-
-  private static class SeqFileWriterCollector extends Collector {
-    private final LuceneStorageConfiguration lucene2seqConf;
-    private final SequenceFile.Writer sequenceFileWriter;
-    public int processedDocs;
-    AtomicReaderContext arc;
-
-    SeqFileWriterCollector(LuceneStorageConfiguration lucene2seqConf, 
SequenceFile.Writer sequenceFileWriter,
-                           int processedDocs) {
-      this.lucene2seqConf = lucene2seqConf;
-      this.sequenceFileWriter = sequenceFileWriter;
-      this.processedDocs = processedDocs;
-    }
-
-    @Override
-    public void setScorer(Scorer scorer) throws IOException {
-      //don't care about scoring, we just want the matches
-    }
-
-    @Override
-    public void collect(int docNum) throws IOException {
-      if (processedDocs < lucene2seqConf.getMaxHits()) {
-        final DocumentStoredFieldVisitor storedFieldVisitor = 
lucene2seqConf.getStoredFieldVisitor();
-        arc.reader().document(docNum, storedFieldVisitor);
-
-        Document doc = storedFieldVisitor.getDocument();
-        List<String> fields = lucene2seqConf.getFields();
-        Text theKey = new 
Text(Strings.nullToEmpty(doc.get(lucene2seqConf.getIdField())));
-        Text theValue = new Text();
-        LuceneSeqFileHelper.populateValues(doc, theValue, fields);
-        //if they are both empty, don't write
-        if (isBlank(theKey.toString()) && isBlank(theValue.toString())) {
-          return;
-        }
-        sequenceFileWriter.append(theKey, theValue);
-        processedDocs++;
-      }
-    }
-
-    @Override
-    public void setNextReader(AtomicReaderContext context) throws IOException {
-      arc = context;
-    }
-
-    @Override
-    public boolean acceptsDocsOutOfOrder() {
-      return true;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
deleted file mode 100644
index 9685b85..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageDriver.java
+++ /dev/null
@@ -1,140 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.util.Version;
-import org.apache.mahout.common.AbstractJob;
-import org.apache.mahout.common.commandline.DefaultOptionCreator;
-
-/**
- * Driver class for the lucene2seq program. Converts text contents of stored 
fields of a lucene index into a Hadoop
- * SequenceFile. The key of the sequence file is the document ID and the value 
is the concatenated text of the specified
- * stored field(s).
- *
- * Deprecated as of Mahout 0.11.0
- */
-
-@Deprecated
-public class SequenceFilesFromLuceneStorageDriver extends AbstractJob {
-
-  static final String OPTION_ID_FIELD = "idField";
-  static final String OPTION_FIELD = "fields";
-  static final String OPTION_QUERY = "query";
-  static final String OPTION_MAX_HITS = "maxHits";
-
-  static final Query DEFAULT_QUERY = new MatchAllDocsQuery();
-  static final int DEFAULT_MAX_HITS = Integer.MAX_VALUE;
-
-  static final String SEPARATOR_FIELDS = ",";
-  static final String QUERY_DELIMITER = "'";
-  private static final Pattern COMPILE = Pattern.compile(QUERY_DELIMITER);
-
-  public static void main(String[] args) throws Exception {
-    ToolRunner.run(new SequenceFilesFromLuceneStorageDriver(), args);
-  }
-
-  @Override
-  public int run(String[] args) throws Exception {
-    addOutputOption();
-    addInputOption();
-    //addOption(OPTION_LUCENE_DIRECTORY, "d", "Lucene directory / directories. 
Comma separated.", true);
-    addOption(OPTION_ID_FIELD, "id", "The field in the index containing the 
id", true);
-    addOption(OPTION_FIELD, "f", "The stored field(s) in the index containing 
text", true);
-
-    addOption(OPTION_QUERY, "q", "(Optional) Lucene query. Defaults to " + 
DEFAULT_QUERY.getClass().getSimpleName());
-    addOption(OPTION_MAX_HITS, "n", "(Optional) Max hits. Defaults to " + 
DEFAULT_MAX_HITS);
-    addOption(DefaultOptionCreator.methodOption().create());
-
-    if (parseArguments(args) == null) {
-      return -1;
-    }
-
-    Configuration configuration = getConf();
-
-    String[] paths = getInputPath().toString().split(",");
-    List<Path> indexPaths = new ArrayList<>();
-    for (String path : paths) {
-      indexPaths.add(new Path(path));
-    }
-
-    Path sequenceFilesOutputPath = getOutputPath();
-
-    String idField = getOption(OPTION_ID_FIELD);
-    String fields = getOption(OPTION_FIELD);
-
-    LuceneStorageConfiguration lucene2SeqConf = 
newLucene2SeqConfiguration(configuration,
-            indexPaths,
-            sequenceFilesOutputPath,
-            idField,
-            Arrays.asList(fields.split(SEPARATOR_FIELDS)));
-
-    Query query = DEFAULT_QUERY;
-    if (hasOption(OPTION_QUERY)) {
-      try {
-        String queryString = 
COMPILE.matcher(getOption(OPTION_QUERY)).replaceAll("");
-        QueryParser queryParser = new QueryParser(Version.LUCENE_46, 
queryString,
-            new StandardAnalyzer(Version.LUCENE_46));
-        query = queryParser.parse(queryString);
-      } catch (ParseException e) {
-        throw new IllegalArgumentException(e.getMessage(), e);
-      }
-    }
-    lucene2SeqConf.setQuery(query);
-
-    int maxHits = DEFAULT_MAX_HITS;
-    if (hasOption(OPTION_MAX_HITS)) {
-      String maxHitsString = getOption(OPTION_MAX_HITS);
-      maxHits = Integer.valueOf(maxHitsString);
-    }
-    lucene2SeqConf.setMaxHits(maxHits);
-
-    if (hasOption(DefaultOptionCreator.METHOD_OPTION)
-        && getOption(DefaultOptionCreator.METHOD_OPTION).equals("sequential")) 
{
-      new SequenceFilesFromLuceneStorage().run(lucene2SeqConf);
-    } else {
-      new SequenceFilesFromLuceneStorageMRJob().run(lucene2SeqConf);
-    }
-    return 0;
-  }
-
-  public LuceneStorageConfiguration newLucene2SeqConfiguration(Configuration 
configuration,
-                                                               List<Path> 
indexPaths,
-                                                               Path 
sequenceFilesOutputPath,
-                                                               String idField,
-                                                               List<String> 
fields) {
-    return new LuceneStorageConfiguration(
-            configuration,
-            indexPaths,
-            sequenceFilesOutputPath,
-            idField,
-            fields);
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
deleted file mode 100644
index 787bf15..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMRJob.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package org.apache.mahout.text;
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import com.google.common.base.Joiner;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
-
-import java.io.IOException;
-
-/**
- * Generates a sequence file from a Lucene index via MapReduce. Uses a 
specified id field as the key and a content field
- * as the value. Configure this class with a {@link 
LuceneStorageConfiguration} bean.
- */
-@Deprecated
-public class SequenceFilesFromLuceneStorageMRJob {
-
-  public void run(LuceneStorageConfiguration lucene2seqConf) {
-    try {
-      Configuration configuration = lucene2seqConf.serialize();
-
-      Job job = new Job(configuration, "LuceneIndexToSequenceFiles: " + 
lucene2seqConf.getIndexPaths() + " -> M/R -> "
-          + lucene2seqConf.getSequenceFilesOutputPath());
-
-      job.setMapOutputKeyClass(Text.class);
-      job.setMapOutputValueClass(Text.class);
-
-      job.setOutputKeyClass(Text.class);
-      job.setOutputValueClass(Text.class);
-
-      job.setOutputFormatClass(SequenceFileOutputFormat.class);
-
-      job.setMapperClass(SequenceFilesFromLuceneStorageMapper.class);
-
-      job.setInputFormatClass(LuceneSegmentInputFormat.class);
-
-      FileInputFormat.setInputPaths(job, 
Joiner.on(',').skipNulls().join(lucene2seqConf.getIndexPaths().iterator()));
-      FileOutputFormat.setOutputPath(job, 
lucene2seqConf.getSequenceFilesOutputPath());
-
-      job.setJarByClass(SequenceFilesFromLuceneStorageMRJob.class);
-      job.setNumReduceTasks(0);
-
-      job.waitForCompletion(true);
-    } catch (IOException | InterruptedException | ClassNotFoundException e) {
-      throw new RuntimeException(e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
----------------------------------------------------------------------
diff --git 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
 
b/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
deleted file mode 100644
index 5feceef..0000000
--- 
a/integration/src/main/java/org/apache/mahout/text/SequenceFilesFromLuceneStorageMapper.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.text;
-
-import com.google.common.base.Strings;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DocumentStoredFieldVisitor;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.store.IOContext;
-
-import java.io.IOException;
-import java.util.List;
-
-/**
- * Maps document IDs to key value pairs with ID field as the key and the 
concatenated stored field(s)
- * as value.
- */
-@Deprecated
-public class SequenceFilesFromLuceneStorageMapper extends Mapper<Text, 
NullWritable, Text, Text> {
-
-  public enum DataStatus { EMPTY_KEY, EMPTY_VALUE, EMPTY_BOTH }
-
-  private LuceneStorageConfiguration l2sConf;
-  private SegmentReader segmentReader;
-
-  @Override
-  protected void setup(Context context) throws IOException, 
InterruptedException {
-    Configuration configuration = context.getConfiguration();
-    l2sConf = new LuceneStorageConfiguration(configuration);
-    LuceneSegmentInputSplit inputSplit = (LuceneSegmentInputSplit) 
context.getInputSplit();
-    SegmentCommitInfo segmentInfo = inputSplit.getSegment(configuration);
-    segmentReader = new SegmentReader(segmentInfo, 
LuceneSeqFileHelper.USE_TERM_INFOS, IOContext.READ);
-  }
-
-  @Override
-  protected void map(Text key, NullWritable text, Context context) throws 
IOException, InterruptedException {
-    int docId = Integer.valueOf(key.toString());
-    DocumentStoredFieldVisitor storedFieldVisitor = 
l2sConf.getStoredFieldVisitor();
-    segmentReader.document(docId, storedFieldVisitor);
-    Document document = storedFieldVisitor.getDocument();
-    List<String> fields = l2sConf.getFields();
-    Text theKey = new 
Text(Strings.nullToEmpty(document.get(l2sConf.getIdField())));
-    Text theValue = new Text();
-    LuceneSeqFileHelper.populateValues(document, theValue, fields);
-    //if they are both empty, don't write
-    if (StringUtils.isBlank(theKey.toString()) && 
StringUtils.isBlank(theValue.toString())) {
-      context.getCounter(DataStatus.EMPTY_BOTH).increment(1);
-      return;
-    }
-    if (StringUtils.isBlank(theKey.toString())) {
-      context.getCounter(DataStatus.EMPTY_KEY).increment(1);
-    } else if (StringUtils.isBlank(theValue.toString())) {
-      context.getCounter(DataStatus.EMPTY_VALUE).increment(1);
-    }
-    context.write(theKey, theValue);
-  }
-
-  @Override
-  protected void cleanup(Context context) throws IOException, 
InterruptedException {
-    segmentReader.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
----------------------------------------------------------------------
diff --git 
a/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
 
b/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
deleted file mode 100644
index 3164092..0000000
--- 
a/integration/src/test/java/org/apache/mahout/text/AbstractLuceneStorageTest.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.apache.lucene.util.Version;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.Pair;
-import org.apache.mahout.text.doc.MultipleFieldsDocument;
-import org.apache.mahout.text.doc.NumericFieldDocument;
-import org.apache.mahout.text.doc.SingleFieldDocument;
-import org.apache.mahout.text.doc.TestDocument;
-
-/**
- * Abstract test for working with Lucene storage.
- */
-@Deprecated
-public abstract class AbstractLuceneStorageTest extends MahoutTestCase {
-
-  protected Path indexPath1;
-  protected Path indexPath2;
-  protected List<TestDocument> docs = new ArrayList<>();
-  protected List<TestDocument> misshapenDocs = new ArrayList<>();
-
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-    indexPath1 = getTestTempDirPath("index1");
-    indexPath2 = getTestTempDirPath("index2");
-    for (int i = 0; i < 2000; i++) {
-      docs.add(new SingleFieldDocument(String.valueOf(i), "This is test 
document " + i));
-    }
-    misshapenDocs.add(new SingleFieldDocument("", "This doc has an empty id"));
-    misshapenDocs.add(new SingleFieldDocument("empty_value", ""));
-  }
-
-  protected void commitDocuments(Directory directory, Iterable<TestDocument> 
theDocs) throws IOException{
-    IndexWriter indexWriter = new IndexWriter(directory, new 
IndexWriterConfig(Version.LUCENE_46, new StandardAnalyzer(Version.LUCENE_46)));
-
-    for (TestDocument singleFieldDocument : theDocs) {
-      indexWriter.addDocument(singleFieldDocument.asLuceneDocument());
-    }
-
-    indexWriter.commit();
-    indexWriter.close();
-  }
-
-  protected void commitDocuments(Directory directory, TestDocument... 
documents) throws IOException {
-    commitDocuments(directory, Arrays.asList(documents));
-  }
-
-  protected void assertMultipleFieldsDocumentEquals(MultipleFieldsDocument 
expected, Pair<Text, Text> actual) {
-    assertEquals(expected.getId(), actual.getFirst().toString());
-    assertEquals(expected.getField() + " " + expected.getField1() + " " + 
expected.getField2(), actual.getSecond().toString());
-  }
-
-  protected void assertNumericFieldEquals(NumericFieldDocument expected, 
Pair<Text, Text> actual) {
-    assertEquals(expected.getId(), actual.getFirst().toString());
-    assertEquals(expected.getField() + " " + expected.getNumericField(), 
actual.getSecond().toString());
-  }
-
-  protected FSDirectory getDirectory(File indexPath) throws IOException {
-    return FSDirectory.open(indexPath);
-  }
-
-  protected File getIndexPath1AsFile() {
-    return new File(indexPath1.toUri().getPath());
-  }
-
-  protected Path getIndexPath1() {
-    return indexPath1;
-  }
-
-  protected File getIndexPath2AsFile() {
-    return new File(indexPath2.toUri().getPath());
-  }
-
-  protected Path getIndexPath2() {
-    return indexPath2;
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputFormatTest.java
----------------------------------------------------------------------
diff --git 
a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputFormatTest.java
 
b/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputFormatTest.java
deleted file mode 100644
index ee81a32..0000000
--- 
a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputFormatTest.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.JobID;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.text.doc.SingleFieldDocument;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Collections;
-import java.util.List;
-@Deprecated
-public class LuceneSegmentInputFormatTest extends AbstractLuceneStorageTest {
-
-  private LuceneSegmentInputFormat inputFormat;
-  private JobContext jobContext;
-  private Configuration conf;
-
-  @Before
-  public void before() throws Exception {
-    inputFormat = new LuceneSegmentInputFormat();
-    LuceneStorageConfiguration lucene2SeqConf = new
-    LuceneStorageConfiguration(getConfiguration(), 
Collections.singletonList(indexPath1), new Path("output"), "id", 
Collections.singletonList("field"));
-    conf = lucene2SeqConf.serialize();
-
-    jobContext = getJobContext(conf, new JobID());
-  }
-
-  @After
-  public void after() throws IOException {
-    HadoopUtil.delete(conf, indexPath1);
-  }
-
-  @Test
-  public void testGetSplits() throws IOException, InterruptedException {
-    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple 
document 1");
-    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple 
document 2");
-    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple 
document 3");
-
-    //generate 3 segments
-    commitDocuments(getDirectory(getIndexPath1AsFile()), doc1);
-    commitDocuments(getDirectory(getIndexPath1AsFile()), doc2);
-    commitDocuments(getDirectory(getIndexPath1AsFile()), doc3);
-
-    List<LuceneSegmentInputSplit> splits = inputFormat.getSplits(jobContext);
-    Assert.assertEquals(3, splits.size());
-  }
-
-  // Use reflection to abstract this incompatibility between Hadoop 1 & 2 APIs.
-  private JobContext getJobContext(Configuration conf, JobID jobID) throws
-      ClassNotFoundException, NoSuchMethodException, IllegalAccessException,
-      InvocationTargetException, InstantiationException {
-    Class<? extends JobContext> clazz;
-    if (!JobContext.class.isInterface()) {
-      clazz = JobContext.class;
-    } else {
-      clazz = (Class<? extends JobContext>)
-          Class.forName("org.apache.hadoop.mapreduce.task.JobContextImpl");
-    }
-    return clazz.getConstructor(Configuration.class, JobID.class)
-        .newInstance(conf, jobID);
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
----------------------------------------------------------------------
diff --git 
a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
 
b/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
deleted file mode 100644
index 5375610..0000000
--- 
a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentInputSplitTest.java
+++ /dev/null
@@ -1,88 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentReader;
-import org.apache.lucene.store.IOContext;
-import org.apache.mahout.common.HadoopUtil;
-import org.apache.mahout.text.doc.SingleFieldDocument;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.util.List;
-
-import static java.util.Arrays.asList;
-@Deprecated
-public class LuceneSegmentInputSplitTest extends AbstractLuceneStorageTest {
-
-  private Configuration configuration;
-
-  @Before
-  public void before() throws IOException {
-    configuration = getConfiguration();
-  }
-
-  @After
-  public void after() throws IOException {
-    HadoopUtil.delete(configuration, indexPath1);
-  }
-
-  @Test
-  public void testGetSegment() throws Exception {
-    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple 
document 1");
-    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple 
document 2");
-    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple 
document 3");
-
-    List<SingleFieldDocument> docs = asList(doc1, doc2, doc3);
-    for (SingleFieldDocument doc : docs) {
-      commitDocuments(getDirectory(getIndexPath1AsFile()), doc);
-    }
-
-    assertSegmentContainsOneDoc("_0");
-    assertSegmentContainsOneDoc("_1");
-    assertSegmentContainsOneDoc("_2");
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testGetSegmentNonExistingSegment() throws Exception {
-    SingleFieldDocument doc1 = new SingleFieldDocument("1", "This is simple 
document 1");
-    SingleFieldDocument doc2 = new SingleFieldDocument("2", "This is simple 
document 2");
-    SingleFieldDocument doc3 = new SingleFieldDocument("3", "This is simple 
document 3");
-
-    List<SingleFieldDocument> docs = asList(doc1, doc2, doc3);
-    for (SingleFieldDocument doc : docs) {
-      commitDocuments(getDirectory(getIndexPath1AsFile()), doc);
-    }
-
-    LuceneSegmentInputSplit inputSplit = new 
LuceneSegmentInputSplit(indexPath1, "_3", 1000);
-    inputSplit.getSegment(configuration);
-  }
-
-  private void assertSegmentContainsOneDoc(String segmentName) throws 
IOException {
-    LuceneSegmentInputSplit inputSplit = new 
LuceneSegmentInputSplit(indexPath1, segmentName, 1000);
-    SegmentCommitInfo segment = inputSplit.getSegment(configuration);
-    SegmentReader segmentReader = new SegmentReader(segment, 1, 
IOContext.READ);//SegmentReader.get(true, segment, 1);
-    assertEquals(segmentName, segment.info.name);
-    assertEquals(1, segmentReader.numDocs());
-  }
-
-
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
----------------------------------------------------------------------
diff --git 
a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
 
b/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
deleted file mode 100644
index 8a23ecb..0000000
--- 
a/integration/src/test/java/org/apache/mahout/text/LuceneSegmentRecordReaderTest.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Collections;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfos;
-import org.apache.mahout.common.HadoopUtil;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import static org.apache.mahout.text.doc.SingleFieldDocument.FIELD;
-import static org.apache.mahout.text.doc.SingleFieldDocument.ID_FIELD;
-@Deprecated
-public class LuceneSegmentRecordReaderTest extends AbstractLuceneStorageTest {
-  private Configuration configuration;
-
-  private LuceneSegmentRecordReader recordReader;
-
-  private SegmentInfos segmentInfos;
-
-  @Before
-  public void before() throws IOException, InterruptedException {
-    LuceneStorageConfiguration lucene2SeqConf = new 
LuceneStorageConfiguration(getConfiguration(),
-        Collections.singletonList(getIndexPath1()), new Path("output"), 
ID_FIELD,
-        Collections.singletonList(FIELD));
-    configuration = lucene2SeqConf.serialize();
-    recordReader = new LuceneSegmentRecordReader();
-    commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
-    commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(500, 
1000));
-    segmentInfos = new SegmentInfos();
-    segmentInfos.read(getDirectory(getIndexPath1AsFile()));
-  }
-
-  @After
-  public void after() throws IOException {
-    HadoopUtil.delete(configuration, getIndexPath1());
-  }
-
-  @Test
-  public void testKey() throws Exception {
-    for (SegmentCommitInfo segmentInfo : segmentInfos) {
-      int docId = 0;
-      LuceneSegmentInputSplit inputSplit = new 
LuceneSegmentInputSplit(getIndexPath1(),
-          segmentInfo.info.name, segmentInfo.sizeInBytes());
-      TaskAttemptContext context = getTaskAttemptContext(configuration, new 
TaskAttemptID());
-      recordReader.initialize(inputSplit, context);
-      for (int i = 0; i < 500; i++){
-        recordReader.nextKeyValue();
-        //we can't be sure of the order we are getting the segments, so we 
have to fudge here a bit on the id,
-        // but it is either id: i or i + 500
-        assertTrue("i = " + i + " docId= " +
-            docId, 
String.valueOf(docId).equals(recordReader.getCurrentKey().toString()) ||
-            
String.valueOf(docId+500).equals(recordReader.getCurrentKey().toString()));
-        assertEquals(NullWritable.get(), recordReader.getCurrentValue());
-        docId++;
-      }
-    }
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testNonExistingIdField() throws Exception {
-    configuration = new LuceneStorageConfiguration(getConfiguration(),
-        Collections.singletonList(getIndexPath1()), new Path("output"), 
"nonExistingId",
-        Collections.singletonList(FIELD)).serialize();
-    SegmentCommitInfo segmentInfo = segmentInfos.iterator().next();
-    LuceneSegmentInputSplit inputSplit = new 
LuceneSegmentInputSplit(getIndexPath1(),
-        segmentInfo.info.name, segmentInfo.sizeInBytes());
-    TaskAttemptContext context = getTaskAttemptContext(configuration, new 
TaskAttemptID());
-    recordReader.initialize(inputSplit, context);
-  }
-
-  @Test(expected = IllegalArgumentException.class)
-  public void testNonExistingField() throws Exception {
-    configuration = new LuceneStorageConfiguration(getConfiguration(), 
Collections.singletonList(getIndexPath1()),
-        new Path("output"), ID_FIELD, 
Collections.singletonList("nonExistingField")).serialize();
-    SegmentCommitInfo segmentInfo = segmentInfos.iterator().next();
-    LuceneSegmentInputSplit inputSplit = new 
LuceneSegmentInputSplit(getIndexPath1(),
-        segmentInfo.info.name, segmentInfo.sizeInBytes());
-    TaskAttemptContext context = getTaskAttemptContext(configuration, new 
TaskAttemptID());
-    recordReader.initialize(inputSplit, context);
-  }
-
-  // Use reflection to abstract this incompatibility between Hadoop 1 & 2 APIs.
-  private TaskAttemptContext getTaskAttemptContext(Configuration conf, 
TaskAttemptID jobID) throws
-      ClassNotFoundException, NoSuchMethodException, IllegalAccessException,
-      InvocationTargetException, InstantiationException {
-    Class<? extends TaskAttemptContext> clazz;
-    if (!TaskAttemptContext.class.isInterface()) {
-      clazz = TaskAttemptContext.class;
-    } else {
-      clazz = (Class<? extends TaskAttemptContext>)
-          
Class.forName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl");
-    }
-    return clazz.getConstructor(Configuration.class, TaskAttemptID.class)
-        .newInstance(conf, jobID);
-  }
-}

http://git-wip-us.apache.org/repos/asf/mahout/blob/82e78a8c/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
----------------------------------------------------------------------
diff --git 
a/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
 
b/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
deleted file mode 100644
index e24066c..0000000
--- 
a/integration/src/test/java/org/apache/mahout/text/LuceneStorageConfigurationTest.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.text;
-
-import java.io.IOException;
-import java.util.Collections;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-@Deprecated
-public class LuceneStorageConfigurationTest extends MahoutTestCase {
-  
-  @Test
-  public void testSerialization() throws Exception {
-    Configuration configuration = getConfiguration();
-    Path indexPath = new Path("indexPath");
-    Path outputPath = new Path("outputPath");
-    LuceneStorageConfiguration luceneStorageConfiguration =
-      new LuceneStorageConfiguration(configuration, 
Collections.singletonList(indexPath), outputPath,
-          "id", Collections.singletonList("field"));
-
-    Configuration serializedConfiguration = 
luceneStorageConfiguration.serialize();
-
-    LuceneStorageConfiguration deSerializedConfiguration = new 
LuceneStorageConfiguration(serializedConfiguration);
-
-    assertEquals(luceneStorageConfiguration, deSerializedConfiguration);
-  }
-  
-  @Test(expected = IllegalArgumentException.class)
-  public void testSerializationNotSerialized() throws IOException {
-    new LuceneStorageConfiguration(getConfiguration());
-  }
-}

[2/2] mahout git commit: MAHOUT-1782: Remove code for lucene2seq, this closes apache/mahout#170

Reply via email to