Author: rohini
Date: Wed Mar 23 20:43:27 2016
New Revision: 1736379
URL: http://svn.apache.org/viewvc?rev=1736379&view=rev
Log:
PIG-4847: POPartialAgg processing and spill improvements (rohini)
Modified:
pig/trunk/conf/pig.properties
pig/trunk/src/org/apache/pig/PigConfiguration.java
pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java
Modified: pig/trunk/conf/pig.properties
URL:
http://svn.apache.org/viewvc/pig/trunk/conf/pig.properties?rev=1736379&r1=1736378&r2=1736379&view=diff
==============================================================================
--- pig/trunk/conf/pig.properties (original)
+++ pig/trunk/conf/pig.properties Wed Mar 23 20:43:27 2016
@@ -193,18 +193,16 @@
#
# pig.spill.gc.activation.size=40000000
-# For heaps of 1GB and less, SpillableMemoryManager spill will be triggered
-# if the fraction of biggest heap exceeds the usage threshold. Default is 0.7
-# pig.spill.memory.usage.threshold.fraction=0.7
+# Spill will be triggered if the fraction of Old Generation heap exceeds the
usage or collection threshold.
+# For bigger heap sizes, using a fixed size for collection and usage
thresholds will
+# utilize memory better than a percentage of the heap.
+# So usage threshold is calculated as
+# Max(HeapSize * pig.spill.memory.usage.threshold.fraction, HeapSize -
pig.spill.unused.memory.threshold.size)
+# So collection threshold is calculated as
+# Max(HeapSize * pig.spill.collection.threshold.fraction, HeapSize -
pig.spill.unused.memory.threshold.size)
-# For heaps of 1GB and less, SpillableMemoryManager spill will be triggered
-# if the fraction of big heap exceeds the collection threshold. Default is 0.7
+# pig.spill.memory.usage.threshold.fraction=0.7
# pig.spill.collection.threshold.fraction=0.7
-
-# For heaps bigger than 1GB, we want to use a fixed size for collection and
-# usage thresholds to better utilize memory. SpillableMemoryManager spill will
be triggered
-# if the unused heap size falls below this threshold.
-# Default is 350 MB
# pig.spill.unused.memory.threshold.size=367001600
# Maximum amount of data to replicate using the distributed cache when doing
Modified: pig/trunk/src/org/apache/pig/PigConfiguration.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/PigConfiguration.java?rev=1736379&r1=1736378&r2=1736379&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/PigConfiguration.java (original)
+++ pig/trunk/src/org/apache/pig/PigConfiguration.java Wed Mar 23 20:43:27 2016
@@ -390,19 +390,24 @@ public class PigConfiguration {
// SpillableMemoryManager settings
/**
- * SpillableMemoryManager spill will be triggered if the fraction of
biggest heap exceeds the usage threshold
+ * Spill will be triggered if the fraction of biggest heap exceeds the
usage threshold.
+ * If {@link PigConfiguration.PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE} is
non-zero, then usage threshold is calculated as
+ * Max(HeapSize * PIG_SPILL_MEMORY_USAGE_THRESHOLD_FRACTION, HeapSize -
PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE)
* Default is 0.7
*/
public static final String PIG_SPILL_MEMORY_USAGE_THRESHOLD_FRACTION =
"pig.spill.memory.usage.threshold.fraction";
/**
- * SpillableMemoryManager spill will be triggered if the fraction of
biggest heap exceeds the collection threshold
+ * Spill will be triggered if the fraction of biggest heap exceeds the
collection threshold.
+ * If {@link PigConfiguration.PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE} is
non-zero, then collection threshold is calculated as
+ * Max(HeapSize * PIG_SPILL_COLLECTION_THRESHOLD_FRACTION, HeapSize -
PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE)
* Default is 0.7
*/
public static final String PIG_SPILL_COLLECTION_THRESHOLD_FRACTION =
"pig.spill.collection.threshold.fraction";
/**
- * SpillableMemoryManager spill will be triggered when unused memory falls
below the threshold.
+ * Spill will be triggered when unused memory falls below the threshold.
+ * Default is 350MB
*/
public static final String PIG_SPILL_UNUSED_MEMORY_THRESHOLD_SIZE =
"pig.spill.unused.memory.threshold.size";
Modified: pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java?rev=1736379&r1=1736378&r2=1736379&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java
(original)
+++ pig/trunk/src/org/apache/pig/impl/util/SpillableMemoryManager.java Wed Mar
23 20:43:27 2016
@@ -146,12 +146,13 @@ public class SpillableMemoryManager impl
long tenuredHeapSize = tenuredHeap.getUsage().getMax();
memoryThresholdSize = (long)(tenuredHeapSize *
memoryThresholdFraction);
collectionThresholdSize = (long)(tenuredHeapSize *
collectionMemoryThresholdFraction);
- if (tenuredHeapSize > ONE_GB) {
- // If heap is 1G which is most default we will be spilling around
~700MB with 300MB still unused with default 0.7 threshold
+ if (unusedMemoryThreshold > 0) {
+ // For a 1G heap we will be spilling around ~700MB with 300MB
still unused with default 0.7 threshold
// For bigger heaps, we still want to spill when there is 300MB
unused (plus another 50MB for buffer) and not at 70%.
// For eg: For 4G we want to start spilling at 3.65GB and not at
2.8GB(70%) for better use of memory
- memoryThresholdSize = tenuredHeapSize - unusedMemoryThreshold;
- collectionThresholdSize = tenuredHeapSize - unusedMemoryThreshold;
+ long unusedThreshold = tenuredHeapSize - unusedMemoryThreshold;
+ memoryThresholdSize = Math.max(memoryThresholdSize,
unusedThreshold);
+ collectionThresholdSize = Math.max(collectionThresholdSize,
unusedThreshold);
}
// we want to set both collection and usage threshold alerts to be