Author: cutting
Date: Wed Mar 25 11:44:09 2009
New Revision: 758232
URL: http://svn.apache.org/viewvc?rev=758232&view=rev
Log:
HADOOP-1491. In contrib/index, better control memory usage. Contributed by
Ning Li.
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
Modified: hadoop/core/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Mar 25 11:44:09 2009
@@ -181,6 +181,9 @@
HADOOP-4788. Set Fair scheduler to assign both a map and a reduce on each
heartbeat by default. (matei)
+ HADOOP-5491. In contrib/index, better control memory usage.
+ (Ning Li via cutting)
+
OPTIMIZATIONS
BUG FIXES
Modified:
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
(original)
+++
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateCombiner.java
Wed Mar 25 11:44:09 2009
@@ -40,6 +40,8 @@
static final Log LOG = LogFactory.getLog(IndexUpdateCombiner.class);
IndexUpdateConfiguration iconf;
+ long maxSizeInBytes;
+ long nearMaxSizeInBytes;
/* (non-Javadoc)
* @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object,
java.util.Iterator, org.apache.hadoop.mapred.OutputCollector,
org.apache.hadoop.mapred.Reporter)
@@ -48,17 +50,47 @@
OutputCollector<Shard, IntermediateForm> output, Reporter reporter)
throws IOException {
- LOG.info("Construct a form writer for " + key);
- IntermediateForm form = new IntermediateForm();
- form.configure(iconf);
+ String message = key.toString();
+ IntermediateForm form = null;
+
while (values.hasNext()) {
IntermediateForm singleDocForm = values.next();
- form.process(singleDocForm);
+ long formSize = form == null ? 0 : form.totalSizeInBytes();
+ long singleDocFormSize = singleDocForm.totalSizeInBytes();
+
+ if (form != null && formSize + singleDocFormSize > maxSizeInBytes) {
+ closeForm(form, message);
+ output.collect(key, form);
+ form = null;
+ }
+
+ if (form == null && singleDocFormSize >= nearMaxSizeInBytes) {
+ output.collect(key, singleDocForm);
+ } else {
+ if (form == null) {
+ form = createForm(message);
+ }
+ form.process(singleDocForm);
+ }
}
- form.closeWriter();
- LOG.info("Closed the form writer for " + key + ", form = " + form);
- output.collect(key, form);
+ if (form != null) {
+ closeForm(form, message);
+ output.collect(key, form);
+ }
+ }
+
+ private IntermediateForm createForm(String message) throws IOException {
+ LOG.info("Construct a form writer for " + message);
+ IntermediateForm form = new IntermediateForm();
+ form.configure(iconf);
+ return form;
+ }
+
+ private void closeForm(IntermediateForm form, String message)
+ throws IOException {
+ form.closeWriter();
+ LOG.info("Closed the form writer for " + message + ", form = " + form);
}
/* (non-Javadoc)
@@ -66,6 +98,8 @@
*/
public void configure(JobConf job) {
iconf = new IndexUpdateConfiguration(job);
+ maxSizeInBytes = iconf.getMaxRAMSizeInBytes();
+ nearMaxSizeInBytes = maxSizeInBytes - (maxSizeInBytes >>> 3); // 7/8 of max
}
/* (non-Javadoc)
Modified:
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
(original)
+++
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IndexUpdateConfiguration.java
Wed Mar 25 11:44:09 2009
@@ -235,4 +235,20 @@
conf.setBoolean("sea.use.compound.file", useCompoundFile);
}
+ /**
+ * Get the max ram index size in bytes. The default is 50M.
+ * @return the max ram index size in bytes
+ */
+ public long getMaxRAMSizeInBytes() {
+ return conf.getLong("sea.max.ramsize.bytes", 50L << 20);
+ }
+
+ /**
+ * Set the max ram index size in bytes.
+ * @param b the max ram index size in bytes
+ */
+ public void setMaxRAMSizeInBytes(long b) {
+ conf.setLong("sea.max.ramsize.bytes", b);
+ }
+
}
Modified:
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
(original)
+++
hadoop/core/trunk/src/contrib/index/src/java/org/apache/hadoop/contrib/index/mapred/IntermediateForm.java
Wed Mar 25 11:44:09 2009
@@ -151,6 +151,19 @@
}
}
+ /**
+ * The total size of files in the directory and ram used by the index writer.
+ * It does not include memory used by the delete list.
+ * @return the total size in bytes
+ */
+ public long totalSizeInBytes() throws IOException {
+ long size = dir.sizeInBytes();
+ if (writer != null) {
+ size += writer.ramSizeInBytes();
+ }
+ return size;
+ }
+
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
Modified:
hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java?rev=758232&r1=758231&r2=758232&view=diff
==============================================================================
---
hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
(original)
+++
hadoop/core/trunk/src/contrib/index/src/test/org/apache/hadoop/contrib/index/mapred/TestIndexUpdater.java
Wed Mar 25 11:44:09 2009
@@ -151,6 +151,7 @@
iconf.setIndexMaxFieldLength(2);
iconf.setIndexUseCompoundFile(true);
iconf.setIndexMaxNumSegments(1);
+ iconf.setMaxRAMSizeInBytes(20480);
long versionNumber = -1;
long generation = -1;