[ 
https://issues.apache.org/jira/browse/MAHOUT-235?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12795704#action_12795704
 ] 

Ted Dunning edited comment on MAHOUT-235 at 12/31/09 11:24 PM:
---------------------------------------------------------------

I just looked at this.  Didn't find the problem, but did make some comments in 
the code and clarified it a bit.  My guess is that the main partition loop 
leaves b and c in the wrong order.  If i get another chance to look at this, I 
will put in some assertions that match what I think the loop invariants should 
be.  That will probably make things obvious.

Here is my commented version fo the key sort routine:
{noformat}
  private static void quickSort0(int start, int end, IntComparator comp, 
Swapper swap) {
    int length = end - start;
    if (length < 7) {
      insertionSort(start, end, comp, swap);
      return;
    }
    int middle = (start + end) / 2;
    if (length > 7) {
      int bottom = start;
      int top = end - 1;
      if (length > 40) {
        // for lots of data, bottom, middle and top are medians near the 
beginning, middle or end of the data
        int skosh = length / 8;
        bottom = med3(bottom, bottom + skosh, bottom + (2 * skosh), comp);
        middle = med3(middle - skosh, middle, middle + skosh, comp);
        top = med3(top - (2 * skosh), top - skosh, top, comp);
      }
      middle = med3(bottom, middle, top, comp);
    }

    int partitionIndex = middle; // an index, not a value.

    // regions from a to b and from c to d are what we will recursively sort
    int a, b, c, d;
    a = b = start;
    c = d = end - 1;
    while (b <= c) {
      // copy all values equal to the partition value to before a..b.  In the 
process, advance b
      // as long as values less than the partition or equal are found, also 
stop when a..b collides with c..d
      int comparison = comp.compare(b, partitionIndex);
      while (b <= c && comparison <= 0) {
        if (comparison == 0) {
          swap.swap(a, b);
          a++;
        }
        b++;
        comparison = comp.compare(b, partitionIndex);
      }
      // at this point [start..a) has partition values, [a..b) has values < 
partition
      // also, either b>c or v[b] > partition value

      comparison = comp.compare(c, partitionIndex);
      while (c >= b && comparison >= 0) {
        if (comparison == 0) {
          swap.swap(c, d);
          d--;
        }
        c--;
        comparison = comp.compare(c, partitionIndex);
      }
      // now we also know that [d..end] contains partition values,
      // [c..d) contains values > partition value
      // also, either b>c or (v[b] > partition OR v[c] < partition)

      if (b <= c) {
        // v[b] > partition OR v[c] < partition
        // swapping will let us continue to grow the two regions
        swap.swap(b, c);
        b++;
        c--;
      }
    }
    // now we know
    // b = c+1
    // [start..a) and [d..end) contain partition value
    // all of [a..b) are less than partition
    // all of [c..d) are greater than partition

    // shift [a..b) to beginning
    length = Math.min(a - start, b - a);
    int l = start;
    int h = b - length;
    while (length-- > 0) {
      swap.swap(l, h);
      l++;
      h++;
    }

    // shift [c..d) to end
    length = Math.min(d - c, end - 1 - d);
    l = b;
    h = end - length;
     while (length-- > 0) {
      swap.swap(l, h);
      l++;
      h++;
    }

    // recurse left and right
    length = b - a;
    if (length > 0) {
      quickSort0(start, start + length, comp, swap);
    }

    length = d - c;
    if (length > 0) {
      quickSort0(end - length, end, comp, swap);
    }
  }

  /**
   * In-place insertion sort that is fast for pre-sorted data.
   *
   * @param start Where to start sorting (inclusive)
   * @param end   Where to stop (exclusive)
   * @param comp  Sort order.
   * @param swap  How to swap items.
   */
  private static void insertionSort(int start, int end, IntComparator comp, 
Swapper swap) {
    for (int i = start + 1; i < end; i++) {
      for (int j = i; j > start && comp.compare(j - 1, j) > 0; j--) {
        swap.swap(j - 1, j);
      }
    }
  }
{noformat}


      was (Author: tdunning):
    
I just looked at this.  Didn't find the problem, but did make some comments in 
the code and clarified it a bit.  My guess is that the main partition loop 
leaves b and c in the wrong order.  If i get another chance to look at this, I 
will put in some assertions that match what I think the loop invariants should 
be.  That will probably make things obvious.

Here is my commented version fo the key sort routine:
{unformatted}
  private static void quickSort0(int start, int end, IntComparator comp, 
Swapper swap) {
    int length = end - start;
    if (length < 7) {
      insertionSort(start, end, comp, swap);
      return;
    }
    int middle = (start + end) / 2;
    if (length > 7) {
      int bottom = start;
      int top = end - 1;
      if (length > 40) {
        // for lots of data, bottom, middle and top are medians near the 
beginning, middle or end of the data
        int skosh = length / 8;
        bottom = med3(bottom, bottom + skosh, bottom + (2 * skosh), comp);
        middle = med3(middle - skosh, middle, middle + skosh, comp);
        top = med3(top - (2 * skosh), top - skosh, top, comp);
      }
      middle = med3(bottom, middle, top, comp);
    }

    int partitionIndex = middle; // an index, not a value.

    // regions from a to b and from c to d are what we will recursively sort
    int a, b, c, d;
    a = b = start;
    c = d = end - 1;
    while (b <= c) {
      // copy all values equal to the partition value to before a..b.  In the 
process, advance b
      // as long as values less than the partition or equal are found, also 
stop when a..b collides with c..d
      int comparison = comp.compare(b, partitionIndex);
      while (b <= c && comparison <= 0) {
        if (comparison == 0) {
          swap.swap(a, b);
          a++;
        }
        b++;
        comparison = comp.compare(b, partitionIndex);
      }
      // at this point [start..a) has partition values, [a..b) has values < 
partition
      // also, either b>c or v[b] > partition value

      comparison = comp.compare(c, partitionIndex);
      while (c >= b && comparison >= 0) {
        if (comparison == 0) {
          swap.swap(c, d);
          d--;
        }
        c--;
        comparison = comp.compare(c, partitionIndex);
      }
      // now we also know that [d..end] contains partition values,
      // [c..d) contains values > partition value
      // also, either b>c or (v[b] > partition OR v[c] < partition)

      if (b <= c) {
        // v[b] > partition OR v[c] < partition
        // swapping will let us continue to grow the two regions
        swap.swap(b, c);
        b++;
        c--;
      }
    }
    // now we know
    // b = c+1
    // [start..a) and [d..end) contain partition value
    // all of [a..b) are less than partition
    // all of [c..d) are greater than partition

    // shift [a..b) to beginning
    length = Math.min(a - start, b - a);
    int l = start;
    int h = b - length;
    while (length-- > 0) {
      swap.swap(l, h);
      l++;
      h++;
    }

    // shift [c..d) to end
    length = Math.min(d - c, end - 1 - d);
    l = b;
    h = end - length;
     while (length-- > 0) {
      swap.swap(l, h);
      l++;
      h++;
    }

    // recurse left and right
    length = b - a;
    if (length > 0) {
      quickSort0(start, start + length, comp, swap);
    }

    length = d - c;
    if (length > 0) {
      quickSort0(end - length, end, comp, swap);
    }
  }

  /**
   * In-place insertion sort that is fast for pre-sorted data.
   *
   * @param start Where to start sorting (inclusive)
   * @param end   Where to stop (exclusive)
   * @param comp  Sort order.
   * @param swap  How to swap items.
   */
  private static void insertionSort(int start, int end, IntComparator comp, 
Swapper swap) {
    for (int i = start + 1; i < end; i++) {
      for (int j = i; j > start && comp.compare(j - 1, j) > 0; j--) {
        swap.swap(j - 1, j);
      }
    }
  }
{unformatted}

  
> GenericSorting.java also needs replacing
> ----------------------------------------
>
>                 Key: MAHOUT-235
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-235
>             Project: Mahout
>          Issue Type: Bug
>          Components: Math
>    Affects Versions: 0.3
>            Reporter: Benson Margulies
>         Attachments: oops.diff
>
>
> It looks like GenericSorting.java is more code of the same dubious parentage 
> that needs the same treatment.

-- 
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.

Reply via email to