Author: tomwhite Date: Tue Jan 15 02:15:50 2008 New Revision: 612069 URL: http://svn.apache.org/viewvc?rev=612069&view=rev Log: HADOOP-2532. Add to MapFile a getClosest method that returns the key that comes just before if the key is not present. Contributed by stack.
Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=612069&r1=612068&r2=612069&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Tue Jan 15 02:15:50 2008 @@ -80,6 +80,9 @@ HADOOP-1873. Implement user permissions for Map/Reduce framework. (Hairong Kuang via shv) + HADOOP-2532. Add to MapFile a getClosest method that returns the key + that comes just before if the key is not present. (stack via tomwhite) + IMPROVEMENTS HADOOP-2045. Change committer list on website to a table, so that Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java?rev=612069&r1=612068&r2=612069&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/io/MapFile.java Tue Jan 15 02:15:50 2008 @@ -368,13 +368,31 @@ /** * Positions the reader at the named key, or if none such exists, at the * first entry after the named key. - * + * * @return 0 - exact match found * < 0 - positioned at next record * 1 - no more records in file */ private synchronized int seekInternal(WritableComparable key) throws IOException { + return seekInternal(key, false); + } + + /** + * Positions the reader at the named key, or if none such exists, at the + * key that falls just before or just after dependent on how the + * <code>before</code> parameter is set. + * + * @param before - IF true, and <code>key</code> does not exist, position + * file at entry that falls just before <code>key</code>. Otherwise, + * position file at record that sorts just after. + * @return 0 - exact match found + * < 0 - positioned at next record + * 1 - no more records in file + */ + private synchronized int seekInternal(WritableComparable key, + final boolean before) + throws IOException { readIndex(); // make sure index is read if (seekIndex != -1 // seeked before @@ -397,10 +415,18 @@ if (nextKey == null) nextKey = comparator.newKey(); - + long oldPosition = -1; + if (before) { + oldPosition = data.getPosition(); + } while (data.next(nextKey)) { int c = comparator.compare(key, nextKey); if (c <= 0) { // at or beyond desired + if (before && c != 0) { + // Need to back up to previous record. + data.seek(oldPosition); + data.next(nextKey); + } return c; } } @@ -447,15 +473,34 @@ /** * Finds the record that is the closest match to the specified key. + * Returns <code>key</code> or if it does not exist, at the first entry + * after the named key. + * +- * @param key - key that we're trying to find +- * @param val - data value if key is found +- * @return - the key that was the closest match or null if eof. + */ + public synchronized WritableComparable getClosest(WritableComparable key, + Writable val) + throws IOException { + return getClosest(key, val, false); + } + + /** + * Finds the record that is the closest match to the specified key. * * @param key - key that we're trying to find * @param val - data value if key is found - * @return - returns the key that was the closest match or null if eof. + * @param before - IF true, and <code>key</code> does not exist, return + * the first entry that falls just before the <code>key</code>. Otherwise, + * return the record that sorts just after. + * @return - the key that was the closest match or null if eof. */ - public synchronized WritableComparable getClosest(WritableComparable key, Writable val) + public synchronized WritableComparable getClosest(WritableComparable key, + Writable val, final boolean before) throws IOException { - if (seekInternal(key) > 0) { + if (seekInternal(key, before) > 0) { return null; } data.getCurrentValue(val); @@ -465,7 +510,7 @@ /** Close the map. */ public synchronized void close() throws IOException { if (!indexClosed) { - index.close(); + index.close(); } data.close(); } Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java?rev=612069&view=auto ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java (added) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/io/TestMapFile.java Tue Jan 15 02:15:50 2008 @@ -0,0 +1,84 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.io; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import junit.framework.TestCase; + +public class TestMapFile extends TestCase { + private static Configuration conf = new Configuration(); + + /** + * Test getClosest feature. + * @throws Exception + */ + public void testGetClosest() throws Exception { + // Write a mapfile of simple data: keys are + Path dirName = new Path(System.getProperty("test.build.data",".") + + getName() + ".mapfile"); + FileSystem fs = FileSystem.getLocal(conf); + Path qualifiedDirName = fs.makeQualified(dirName); + MapFile.Writer writer = new MapFile.Writer(conf, fs, + qualifiedDirName.toString(), Text.class, Text.class); + // Make an index entry for each insertion. + writer.setIndexInterval(1); + // Add entries up to 100 in intervals of ten. + final int FIRST_KEY = 10; + for (int i = FIRST_KEY; i < 100; i += 10) { + String iStr = Integer.toString(i); + Text t = new Text("00".substring(iStr.length()) + iStr); + writer.append(t, t); + } + writer.close(); + // Now do getClosest on created mapfile. + MapFile.Reader reader = new MapFile.Reader(fs, qualifiedDirName.toString(), + conf); + Text key = new Text("55"); + Text value = new Text(); + Text closest = (Text)reader.getClosest(key, value); + // Assert that closest after 55 is 60 + assertTrue(closest.equals(new Text("60"))); + // Get closest that falls before the passed key: 50 + closest = (Text)reader.getClosest(key, value, true); + assertTrue(closest.equals(new Text("50"))); + // Test get closest when we pass explicit key + final Text TWENTY = new Text("20"); + closest = (Text)reader.getClosest(TWENTY, value); + assertTrue(closest.equals(new Text(TWENTY))); + closest = (Text)reader.getClosest(TWENTY, value, true); + assertTrue(closest.equals(new Text(TWENTY))); + // Test what happens at boundaries. Assert if searching a key that is + // less than first key in the mapfile, that the first key is returned. + key = new Text("00"); + closest = (Text)reader.getClosest(key, value); + assertEquals(Integer.parseInt(closest.toString()), FIRST_KEY); + closest = (Text)reader.getClosest(key, value, true); + assertEquals(Integer.parseInt(closest.toString()), FIRST_KEY); + // Assert that null is returned if key is > last entry in mapfile. + key = new Text("99"); + closest = (Text)reader.getClosest(key, value); + assertNull(closest); + closest = (Text)reader.getClosest(key, value, true); + assertNull(closest); + } +} \ No newline at end of file