Because we want to spill the big bags first since they are the ones that are
going to make a difference. If we really are short on memory, there are
probably a few big bags that we need to put on disk to free up the memory.
ben
On Monday 24 March 2008 07:05:54 pi song wrote:
> Can anyone tell me why in Memory Manager we do sort resource list by size
> before we spill?
>
> Cheers,
> Pi
>
> ---Code----
> Collections.sort(spillables, new
> Comparator<WeakReference<Spillable>>() {
>
> /**
> * We don't lock anything, so this sort may not be stable
> if a WeakReference suddenly
> * becomes null, but it will be close enough.
> */
> public int compare(WeakReference<Spillable> o1Ref,
> WeakReference<Spillable> o2Ref) {
> Spillable o1 = o1Ref.get();
> Spillable o2 = o2Ref.get();
> if (o1 == null && o2 == null) {
> return 0;
> }
> if (o1 == null) {
> return -1;
> }
> if (o2 == null) {
> return 1;
> }
> long o1Size = o1.getMemorySize();
> long o2Size = o2.getMemorySize();
>
> if (o1Size == o2Size) {
> return 0;
> }
> if (o1Size < o2Size) {
> return -1;
> }
> return 1;
> }
> });
> long estimatedFreed = 0;
> int count = 0;
> for (i = spillables.iterator(); i.hasNext();) {
> count++ ;
> Spillable s = i.next().get();
> // Still need to check for null here, even after we removed
> // above, because the reference may have gone bad on us
> // since the last check.
> if (s == null) {
> i.remove();
> continue;
> }
> long toBeFreed = s.getMemorySize();
> /////pilog.info("Spilling element# " + count + " " +
> s.getClass().getSimpleName() + "#" +
> ((org.apache.pig.data.DataBag)s).bagId + " Size=" +toBeFreed) ;
> s.spill();
> estimatedFreed += toBeFreed;
> if (estimatedFreed > toFree) {
> /////pilog.info("Met the target estimatedFreed:" +
> estimatedFreed + " > toFree:" + toFree) ;
> break;
> }
> }