Author: tomwhite
Date: Tue Jan 15 02:15:50 2008
New Revision: 612069

URL: http://svn.apache.org/viewvc?rev=612069&view=rev
Log:
HADOOP-2532.  Add to MapFile a getClosest method that returns the key that 
comes just before if the key is not present.  Contributed by stack.

Added:
    lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=612069&r1=612068&r2=612069&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jan 15 02:15:50 2008
@@ -80,6 +80,9 @@
     HADOOP-1873. Implement user permissions for Map/Reduce framework.
     (Hairong Kuang via shv)
 
+    HADOOP-2532.  Add to MapFile a getClosest method that returns the key
+    that comes just before if the key is not present.  (stack via tomwhite)
+
   IMPROVEMENTS
 
     HADOOP-2045.  Change committer list on website to a table, so that

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java?rev=612069&r1=612068&r2=612069&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java Tue Jan 15 
02:15:50 2008
@@ -368,13 +368,31 @@
     /** 
      * Positions the reader at the named key, or if none such exists, at the
      * first entry after the named key.
-     * 
+     *
      * @return  0   - exact match found
      *          < 0 - positioned at next record
      *          1   - no more records in file
      */
     private synchronized int seekInternal(WritableComparable key)
       throws IOException {
+      return seekInternal(key, false);
+    }
+
+    /** 
+     * Positions the reader at the named key, or if none such exists, at the
+     * key that falls just before or just after dependent on how the
+     * <code>before</code> parameter is set.
+     * 
+     * @param before - IF true, and <code>key</code> does not exist, position
+     * file at entry that falls just before <code>key</code>.  Otherwise,
+     * position file at record that sorts just after.
+     * @return  0   - exact match found
+     *          < 0 - positioned at next record
+     *          1   - no more records in file
+     */
+    private synchronized int seekInternal(WritableComparable key,
+        final boolean before)
+      throws IOException {
       readIndex();                                // make sure index is read
 
       if (seekIndex != -1                         // seeked before
@@ -397,10 +415,18 @@
       
       if (nextKey == null)
         nextKey = comparator.newKey();
-
+      long oldPosition = -1;
+      if (before) {
+        oldPosition = data.getPosition();
+      }
       while (data.next(nextKey)) {
         int c = comparator.compare(key, nextKey);
         if (c <= 0) {                             // at or beyond desired
+          if (before && c != 0) {
+            // Need to back up to previous record.
+            data.seek(oldPosition);
+            data.next(nextKey);
+          }
           return c;
         }
       }
@@ -447,15 +473,34 @@
 
     /** 
      * Finds the record that is the closest match to the specified key.
+     * Returns <code>key</code> or if it does not exist, at the first entry
+     * after the named key.
+     * 
+-     * @param key       - key that we're trying to find
+-     * @param val       - data value if key is found
+-     * @return          - the key that was the closest match or null if eof.
+     */
+    public synchronized WritableComparable getClosest(WritableComparable key,
+      Writable val)
+    throws IOException {
+      return getClosest(key, val, false);
+    }
+
+    /** 
+     * Finds the record that is the closest match to the specified key.
      * 
      * @param key       - key that we're trying to find
      * @param val       - data value if key is found
-     * @return          - returns the key that was the closest match or null 
if eof.
+     * @param before    - IF true, and <code>key</code> does not exist, return
+     * the first entry that falls just before the <code>key</code>.  Otherwise,
+     * return the record that sorts just after.
+     * @return          - the key that was the closest match or null if eof.
      */
-    public synchronized WritableComparable getClosest(WritableComparable key, 
Writable val)
+    public synchronized WritableComparable getClosest(WritableComparable key,
+        Writable val, final boolean before)
       throws IOException {
       
-      if (seekInternal(key) > 0) {
+      if (seekInternal(key, before) > 0) {
         return null;
       }
       data.getCurrentValue(val);
@@ -465,7 +510,7 @@
     /** Close the map. */
     public synchronized void close() throws IOException {
       if (!indexClosed) {
-       index.close();
+        index.close();
       }
       data.close();
     }

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java
URL: 
http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java?rev=612069&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java Tue Jan 
15 02:15:50 2008
@@ -0,0 +1,84 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import junit.framework.TestCase;
+
+public class TestMapFile extends TestCase {
+  private static Configuration conf = new Configuration();
+
+  /**
+   * Test getClosest feature.
+   * @throws Exception
+   */
+  public void testGetClosest() throws Exception {
+    // Write a mapfile of simple data: keys are 
+    Path dirName = new Path(System.getProperty("test.build.data",".") +
+      getName() + ".mapfile"); 
+    FileSystem fs = FileSystem.getLocal(conf);
+    Path qualifiedDirName = fs.makeQualified(dirName);
+    MapFile.Writer writer = new MapFile.Writer(conf, fs,
+      qualifiedDirName.toString(), Text.class, Text.class);
+    // Make an index entry for each insertion.
+    writer.setIndexInterval(1);
+    // Add entries up to 100 in intervals of ten.
+    final int FIRST_KEY = 10;
+    for (int i = FIRST_KEY; i < 100; i += 10) {
+      String iStr = Integer.toString(i);
+      Text t = new Text("00".substring(iStr.length()) + iStr);
+      writer.append(t, t);
+    }
+    writer.close();
+    // Now do getClosest on created mapfile.
+    MapFile.Reader reader = new MapFile.Reader(fs, qualifiedDirName.toString(),
+      conf);
+    Text key = new Text("55");
+    Text value = new Text();
+    Text closest = (Text)reader.getClosest(key, value);
+    // Assert that closest after 55 is 60
+    assertTrue(closest.equals(new Text("60")));
+    // Get closest that falls before the passed key: 50
+    closest = (Text)reader.getClosest(key, value, true);
+    assertTrue(closest.equals(new Text("50")));
+    // Test get closest when we pass explicit key
+    final Text TWENTY = new Text("20");
+    closest = (Text)reader.getClosest(TWENTY, value);
+    assertTrue(closest.equals(new Text(TWENTY)));
+    closest = (Text)reader.getClosest(TWENTY, value, true);
+    assertTrue(closest.equals(new Text(TWENTY)));
+    // Test what happens at boundaries.  Assert if searching a key that is
+    // less than first key in the mapfile, that the first key is returned.
+    key = new Text("00");
+    closest = (Text)reader.getClosest(key, value);
+    assertEquals(Integer.parseInt(closest.toString()), FIRST_KEY);
+    closest = (Text)reader.getClosest(key, value, true);
+    assertEquals(Integer.parseInt(closest.toString()), FIRST_KEY);
+    // Assert that null is returned if key is > last entry in mapfile.
+    key = new Text("99");
+    closest = (Text)reader.getClosest(key, value);
+    assertNull(closest);
+    closest = (Text)reader.getClosest(key, value, true);
+    assertNull(closest);
+  }
+}
\ No newline at end of file


Reply via email to