Author: cdouglas
Date: Thu Mar 26 08:40:32 2009
New Revision: 758557
URL: http://svn.apache.org/viewvc?rev=758557&view=rev
Log:
HADOOP-5459. Fix undetected CRC errors where intermediate output is closed
before it has been completely consumed.
Added:
hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestIFileStreams.java
Modified:
hadoop/core/branches/branch-0.20/CHANGES.txt
hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IFileInputStream.java
Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=758557&r1=758556&r2=758557&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Thu Mar 26 08:40:32 2009
@@ -808,6 +808,9 @@
HADOOP-5066. Building binary tarball should not build docs/javadocs, copy
src, or run jdiff. (Giridharan Kesavan via cdouglas)
+ HADOOP-5459. Fix undetected CRC errors where intermediate output is closed
+ before it has been completely consumed. (cdouglas)
+
Release 0.19.2 - Unreleased
BUG FIXES
Modified:
hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IFileInputStream.java
URL:
http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IFileInputStream.java?rev=758557&r1=758556&r2=758557&view=diff
==============================================================================
---
hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IFileInputStream.java
(original)
+++
hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/IFileInputStream.java
Thu Mar 26 08:40:32 2009
@@ -18,6 +18,7 @@
package org.apache.hadoop.mapred;
+import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
@@ -36,7 +37,7 @@
private final long dataLength;
private DataChecksum sum;
private long currentOffset = 0;
- private byte b[];
+ private final byte b[] = new byte[1];
private byte csum[] = null;
private int checksumSize;
@@ -52,14 +53,24 @@
checksumSize = sum.getChecksumSize();
length = len;
dataLength = length - checksumSize;
- b = new byte[1];
}
/**
- * Close the input stream.
+ * Close the input stream. Note that we need to read to the end of the
+ * stream to validate the checksum.
*/
@Override
public void close() throws IOException {
+ if (currentOffset < dataLength) {
+ byte[] t = new byte[Math.min((int)
+ (Integer.MAX_VALUE & (dataLength - currentOffset)), 32 * 1024)];
+ while (currentOffset < dataLength) {
+ int n = read(t, 0, t.length);
+ if (0 == n) {
+ throw new EOFException("Could not validate checksum");
+ }
+ }
+ }
in.close();
}
Added:
hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestIFileStreams.java
URL:
http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestIFileStreams.java?rev=758557&view=auto
==============================================================================
---
hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestIFileStreams.java
(added)
+++
hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestIFileStreams.java
Thu Mar 26 08:40:32 2009
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred;
+
+import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+
+import junit.framework.TestCase;
+
+public class TestIFileStreams extends TestCase {
+
+ public void testIFileStream() throws Exception {
+ final int DLEN = 100;
+ DataOutputBuffer dob = new DataOutputBuffer(DLEN + 4);
+ IFileOutputStream ifos = new IFileOutputStream(dob);
+ for (int i = 0; i < DLEN; ++i) {
+ ifos.write(i);
+ }
+ ifos.close();
+ DataInputBuffer dib = new DataInputBuffer();
+ dib.reset(dob.getData(), DLEN + 4);
+ IFileInputStream ifis = new IFileInputStream(dib, 104);
+ for (int i = 0; i < DLEN; ++i) {
+ assertEquals(i, ifis.read());
+ }
+ ifis.close();
+ }
+
+ public void testBadIFileStream() throws Exception {
+ final int DLEN = 100;
+ DataOutputBuffer dob = new DataOutputBuffer(DLEN + 4);
+ IFileOutputStream ifos = new IFileOutputStream(dob);
+ for (int i = 0; i < DLEN; ++i) {
+ ifos.write(i);
+ }
+ ifos.close();
+ DataInputBuffer dib = new DataInputBuffer();
+ final byte[] b = dob.getData();
+ ++b[17];
+ dib.reset(b, DLEN + 4);
+ IFileInputStream ifis = new IFileInputStream(dib, 104);
+ int i = 0;
+ try {
+ while (i < DLEN) {
+ if (17 == i) {
+ assertEquals(18, ifis.read());
+ } else {
+ assertEquals(i, ifis.read());
+ }
+ ++i;
+ }
+ ifis.close();
+ } catch (ChecksumException e) {
+ assertEquals("Unexpected bad checksum", DLEN - 1, i);
+ return;
+ }
+ fail("Did not detect bad data in checksum");
+ }
+
+ public void testBadLength() throws Exception {
+ final int DLEN = 100;
+ DataOutputBuffer dob = new DataOutputBuffer(DLEN + 4);
+ IFileOutputStream ifos = new IFileOutputStream(dob);
+ for (int i = 0; i < DLEN; ++i) {
+ ifos.write(i);
+ }
+ ifos.close();
+ DataInputBuffer dib = new DataInputBuffer();
+ dib.reset(dob.getData(), DLEN + 4);
+ IFileInputStream ifis = new IFileInputStream(dib, 100);
+ int i = 0;
+ try {
+ while (i < DLEN - 8) {
+ assertEquals(i++, ifis.read());
+ }
+ ifis.close();
+ } catch (ChecksumException e) {
+ assertEquals("Checksum before close", i, DLEN - 8);
+ return;
+ }
+ fail("Did not detect bad data in checksum");
+ }
+
+}