Author: eli
Date: Thu May 10 23:15:53 2012
New Revision: 1336945
URL: http://svn.apache.org/viewvc?rev=1336945&view=rev
Log:
HADOOP-8361. Avoid out-of-memory problems when deserializing strings.
Contributed by Colin Patrick McCabe
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
Modified: hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1336945&r1=1336944&r2=1336945&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt
(original)
+++ hadoop/common/trunk/hadoop-common-project/hadoop-common/CHANGES.txt Thu May
10 23:15:53 2012
@@ -305,6 +305,9 @@ Release 2.0.0 - UNRELEASED
HADOOP-8340. SNAPSHOT build versions should compare as less than their
eventual
final release. (todd)
+ HADOOP-8361. Avoid out-of-memory problems when deserializing strings.
+ (Colin Patrick McCabe via eli)
+
OPTIMIZATIONS
BUG FIXES
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java?rev=1336945&r1=1336944&r2=1336945&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
(original)
+++
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
Thu May 10 23:15:53 2012
@@ -254,7 +254,7 @@ public class FileStatus implements Writa
// Writable
//////////////////////////////////////////////////
public void write(DataOutput out) throws IOException {
- Text.writeString(out, getPath().toString());
+ Text.writeString(out, getPath().toString(), Text.ONE_MEGABYTE);
out.writeLong(getLen());
out.writeBoolean(isDirectory());
out.writeShort(getReplication());
@@ -262,16 +262,16 @@ public class FileStatus implements Writa
out.writeLong(getModificationTime());
out.writeLong(getAccessTime());
getPermission().write(out);
- Text.writeString(out, getOwner());
- Text.writeString(out, getGroup());
+ Text.writeString(out, getOwner(), Text.ONE_MEGABYTE);
+ Text.writeString(out, getGroup(), Text.ONE_MEGABYTE);
out.writeBoolean(isSymlink());
if (isSymlink()) {
- Text.writeString(out, getSymlink().toString());
+ Text.writeString(out, getSymlink().toString(), Text.ONE_MEGABYTE);
}
}
public void readFields(DataInput in) throws IOException {
- String strPath = Text.readString(in);
+ String strPath = Text.readString(in, Text.ONE_MEGABYTE);
this.path = new Path(strPath);
this.length = in.readLong();
this.isdir = in.readBoolean();
@@ -280,10 +280,10 @@ public class FileStatus implements Writa
modification_time = in.readLong();
access_time = in.readLong();
permission.readFields(in);
- owner = Text.readString(in);
- group = Text.readString(in);
+ owner = Text.readString(in, Text.ONE_MEGABYTE);
+ group = Text.readString(in, Text.ONE_MEGABYTE);
if (in.readBoolean()) {
- this.symlink = new Path(Text.readString(in));
+ this.symlink = new Path(Text.readString(in, Text.ONE_MEGABYTE));
} else {
this.symlink = null;
}
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java?rev=1336945&r1=1336944&r2=1336945&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java
(original)
+++
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java
Thu May 10 23:15:53 2012
@@ -84,8 +84,8 @@ public class PermissionStatus implements
/** {@inheritDoc} */
public void readFields(DataInput in) throws IOException {
- username = Text.readString(in);
- groupname = Text.readString(in);
+ username = Text.readString(in, Text.ONE_MEGABYTE);
+ groupname = Text.readString(in, Text.ONE_MEGABYTE);
permission = FsPermission.read(in);
}
@@ -110,8 +110,8 @@ public class PermissionStatus implements
String username,
String groupname,
FsPermission permission) throws IOException {
- Text.writeString(out, username);
- Text.writeString(out, groupname);
+ Text.writeString(out, username, Text.ONE_MEGABYTE);
+ Text.writeString(out, groupname, Text.ONE_MEGABYTE);
permission.write(out);
}
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java?rev=1336945&r1=1336944&r2=1336945&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
(original)
+++
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java
Thu May 10 23:15:53 2012
@@ -53,6 +53,8 @@ import org.apache.hadoop.classification.
public class Text extends BinaryComparable
implements WritableComparable<BinaryComparable> {
+ static final int SHORT_STRING_MAX = 1024 * 1024;
+
private static ThreadLocal<CharsetEncoder> ENCODER_FACTORY =
new ThreadLocal<CharsetEncoder>() {
protected CharsetEncoder initialValue() {
@@ -412,6 +414,8 @@ public class Text extends BinaryComparab
return bytes;
}
+ static final public int ONE_MEGABYTE = 1024 * 1024;
+
/** Read a UTF8 encoded string from in
*/
public static String readString(DataInput in) throws IOException {
@@ -420,7 +424,17 @@ public class Text extends BinaryComparab
in.readFully(bytes, 0, length);
return decode(bytes);
}
-
+
+ /** Read a UTF8 encoded string with a maximum size
+ */
+ public static String readString(DataInput in, int maxLength)
+ throws IOException {
+ int length = WritableUtils.readVIntInRange(in, 0, maxLength - 1);
+ byte [] bytes = new byte[length];
+ in.readFully(bytes, 0, length);
+ return decode(bytes);
+ }
+
/** Write a UTF8 encoded string to out
*/
public static int writeString(DataOutput out, String s) throws IOException {
@@ -431,6 +445,22 @@ public class Text extends BinaryComparab
return length;
}
+ /** Write a UTF8 encoded string with a maximum size to out
+ */
+ public static int writeString(DataOutput out, String s, int maxLength)
+ throws IOException {
+ ByteBuffer bytes = encode(s);
+ int length = bytes.limit();
+ if (length >= maxLength) {
+ throw new IOException("string was too long to write! Expected " +
+ "less than " + maxLength + " bytes, but got " +
+ length + " bytes.");
+ }
+ WritableUtils.writeVInt(out, length);
+ out.write(bytes.array(), 0, length);
+ return length;
+ }
+
////// states for validateUTF8
private static final int LEAD_BYTE = 0;
Modified:
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
URL:
http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java?rev=1336945&r1=1336944&r2=1336945&view=diff
==============================================================================
---
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
(original)
+++
hadoop/common/trunk/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java
Thu May 10 23:15:53 2012
@@ -20,6 +20,7 @@ package org.apache.hadoop.io;
import junit.framework.TestCase;
+import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.Random;
@@ -107,7 +108,6 @@ public class TestText extends TestCase {
}
}
-
public void testIO() throws Exception {
DataOutputBuffer out = new DataOutputBuffer();
DataInputBuffer in = new DataInputBuffer();
@@ -136,6 +136,40 @@ public class TestText extends TestCase {
assertTrue(before.equals(after2));
}
}
+
+ public void doTestLimitedIO(String str, int strLen) throws IOException {
+ DataOutputBuffer out = new DataOutputBuffer();
+ DataInputBuffer in = new DataInputBuffer();
+
+ out.reset();
+ try {
+ Text.writeString(out, str, strLen);
+ fail("expected writeString to fail when told to write a string " +
+ "that was too long! The string was '" + str + "'");
+ } catch (IOException e) {
+ }
+ Text.writeString(out, str, strLen + 1);
+
+ // test that it reads correctly
+ in.reset(out.getData(), out.getLength());
+ in.mark(strLen);
+ String after;
+ try {
+ after = Text.readString(in, strLen);
+ fail("expected readString to fail when told to read a string " +
+ "that was too long! The string was '" + str + "'");
+ } catch (IOException e) {
+ }
+ in.reset();
+ after = Text.readString(in, strLen + 1);
+ assertTrue(str.equals(after));
+ }
+
+ public void testLimitedIO() throws Exception {
+ doTestLimitedIO("abcd", 4);
+ doTestLimitedIO("", 0);
+ doTestLimitedIO("1", 1);
+ }
public void testCompare() throws Exception {
DataOutputBuffer out1 = new DataOutputBuffer();