Author: cdouglas
Date: Fri Sep 11 07:38:01 2009
New Revision: 813698
URL: http://svn.apache.org/viewvc?rev=813698&view=rev
Log:
HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the header
would cause the reader to read the sync marker as a record. Contributed by Jay
Booth
Added:
hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestSequenceFileSync.java
Modified:
hadoop/common/trunk/CHANGES.txt
hadoop/common/trunk/src/java/org/apache/hadoop/io/SequenceFile.java
Modified: hadoop/common/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/CHANGES.txt?rev=813698&r1=813697&r2=813698&view=diff
==============================================================================
--- hadoop/common/trunk/CHANGES.txt (original)
+++ hadoop/common/trunk/CHANGES.txt Fri Sep 11 07:38:01 2009
@@ -1008,6 +1008,10 @@
HADOOP-6181. Fix .eclipse.templates/.classpath for avro and jets3t jar
files. (Carlos Valiente via szetszwo)
+ HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the
+ header would cause the reader to read the sync marker as a record. (Jay
+ Booth via cdouglas)
+
Release 0.20.1 - Unreleased
INCOMPATIBLE CHANGES
Modified: hadoop/common/trunk/src/java/org/apache/hadoop/io/SequenceFile.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/src/java/org/apache/hadoop/io/SequenceFile.java?rev=813698&r1=813697&r2=813698&view=diff
==============================================================================
--- hadoop/common/trunk/src/java/org/apache/hadoop/io/SequenceFile.java
(original)
+++ hadoop/common/trunk/src/java/org/apache/hadoop/io/SequenceFile.java Fri Sep
11 07:38:01 2009
@@ -1397,6 +1397,7 @@
private byte[] syncCheck = new byte[SYNC_HASH_SIZE];
private boolean syncSeen;
+ private long headerEnd;
private long end;
private int keyLength;
private int recordLength;
@@ -1546,6 +1547,7 @@
if (version > 1) { // if version > 1
in.readFully(sync); // read sync bytes
+ headerEnd = in.getPos(); // record end of header
}
// Initialize... *not* if this we are constructing a temporary Reader
@@ -2210,6 +2212,14 @@
return;
}
+ if (position < headerEnd) {
+ // seek directly to first record
+ in.seek(headerEnd);
+ // note the sync marker "seen" in the header
+ syncSeen = true;
+ return;
+ }
+
try {
seek(position+4); // skip escape
in.readFully(syncCheck);
Added:
hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestSequenceFileSync.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestSequenceFileSync.java?rev=813698&view=auto
==============================================================================
---
hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestSequenceFileSync.java
(added)
+++
hadoop/common/trunk/src/test/core/org/apache/hadoop/io/TestSequenceFileSync.java
Fri Sep 11 07:38:01 2009
@@ -0,0 +1,107 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.io;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+import java.net.URI;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import static org.junit.Assert.*;
+
+public class TestSequenceFileSync {
+ private static final int NUMRECORDS = 2000;
+ private static final int RECORDSIZE = 80;
+ private static final Random rand = new Random();
+
+ private final static String REC_FMT = "%d RECORDID %d : ";
+
+
+ private static void forOffset(SequenceFile.Reader reader,
+ IntWritable key, Text val, int iter, long off, int expectedRecord)
+ throws IOException {
+ val.clear();
+ reader.sync(off);
+ reader.next(key, val);
+ assertEquals(key.get(), expectedRecord);
+ final String test = String.format(REC_FMT, expectedRecord, expectedRecord);
+ assertEquals("Invalid value " + val, 0, val.find(test, 0));
+ }
+
+ @Test
+ public void testLowSyncpoint() throws IOException {
+ final Configuration conf = new Configuration();
+ final FileSystem fs = FileSystem.getLocal(conf);
+ final Path path = new Path(System.getProperty("test.build.data", "/tmp"),
+ "sequencefile.sync.test");
+ final IntWritable input = new IntWritable();
+ final Text val = new Text();
+ SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
+ IntWritable.class, Text.class);
+ try {
+ writeSequenceFile(writer, NUMRECORDS);
+ for (int i = 0; i < 5 ; i++) {
+ final SequenceFile.Reader reader =
+ new SequenceFile.Reader(fs, path, conf);
+ try {
+ forOffset(reader, input, val, i, 0, 0);
+ forOffset(reader, input, val, i, 65, 0);
+ forOffset(reader, input, val, i, 2000, 21);
+ forOffset(reader, input, val, i, 0, 0);
+ } finally {
+ reader.close();
+ }
+ }
+ } finally {
+ fs.delete(path, false);
+ }
+ }
+
+ public static void writeSequenceFile(SequenceFile.Writer writer,
+ int numRecords) throws IOException {
+ final IntWritable key = new IntWritable();
+ final Text val = new Text();
+ for (int numWritten = 0; numWritten < numRecords; ++numWritten) {
+ key.set(numWritten);
+ randomText(val, numWritten, RECORDSIZE);
+ writer.append(key, val);
+ }
+ writer.close();
+ }
+
+ static void randomText(Text val, int id, int recordSize) {
+ val.clear();
+ final StringBuilder ret = new StringBuilder(recordSize);
+ ret.append(String.format(REC_FMT, id, id));
+ recordSize -= ret.length();
+ for (int i = 0; i < recordSize; ++i) {
+ ret.append(rand.nextInt(9));
+ }
+ val.set(ret.toString());
+ }
+}