Revision: 8456
Author:   [email protected]
Date:     Wed Jun 29 01:35:10 2011
Log: Improvement to SmiLexicalCompare. Landing http://codereview.chromium.org/7261008 for Stephen Adams
http://code.google.com/p/v8/source/detail?r=8456

Added:
 /branches/bleeding_edge/src/misc-intrinsics.h
Modified:
 /branches/bleeding_edge/src/runtime.cc
 /branches/bleeding_edge/src/runtime.h
 /branches/bleeding_edge/test/mjsunit/array-sort.js

=======================================
--- /dev/null
+++ /branches/bleeding_edge/src/misc-intrinsics.h       Wed Jun 29 01:35:10 2011
@@ -0,0 +1,89 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_MISC_INTRINSICS_H_
+#define V8_MISC_INTRINSICS_H_
+
+#include "../include/v8.h"
+#include "globals.h"
+
+namespace v8 {
+namespace internal {
+
+// Returns the index of the leading 1 bit, counting the least significant bit at +// index 0. (1 << IntegerLog2(x)) is a mask for the most significant bit of x.
+// Result is undefined if input is zero.
+int IntegerLog2(uint32_t value);
+
+#if defined(__GNUC__)
+
+inline int IntegerLog2(uint32_t value) {
+  return 31 - __builtin_clz(value);
+}
+
+#elif defined(_MSC_VER)
+
+#pragma intrinsic(_BitScanReverse)
+
+inline int IntegerLog2(uint32_t value) {
+ unsigned long result; // NOLINT: MSVC intrinsic demands this type.
+  _BitScanReverse(&result, value);
+  return result;
+}
+
+#else
+
+// Default version using regular operations. Code taken from:
+// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog
+inline int IntegerLog2(uint32_t value) {
+  int result, shift;
+
+  shift = (value > 0xFFFF) << 4;
+  value >>= shift;
+  result = shift;
+
+  shift = (value > 0xFF) << 3;
+  value >>= shift;
+  result |= shift;
+
+  shift = (value > 0xF) << 2;
+  value >>= shift;
+  result |= shift;
+
+  shift = (value > 0x3) << 1;
+  value >>= shift;
+  result |= shift;
+
+  result |= (value >> 1);
+
+  return result;
+}
+#endif
+
+} }  // namespace v8::internal
+
+#endif  // V8_MISC_INTRINSICS_H_
=======================================
--- /branches/bleeding_edge/src/runtime.cc      Wed Jun 29 01:32:12 2011
+++ /branches/bleeding_edge/src/runtime.cc      Wed Jun 29 01:35:10 2011
@@ -45,6 +45,7 @@
 #include "json-parser.h"
 #include "liveedit.h"
 #include "liveobjectlist-inl.h"
+#include "misc-intrinsics.h"
 #include "parser.h"
 #include "platform.h"
 #include "runtime-profiler.h"
@@ -6642,50 +6643,69 @@
   // If the integers are equal so are the string representations.
   if (x_value == y_value) return Smi::FromInt(EQUAL);

-  // If one of the integers are zero the normal integer order is the
+  // If one of the integers is zero the normal integer order is the
   // same as the lexicographic order of the string representations.
-  if (x_value == 0 || y_value == 0) return Smi::FromInt(x_value - y_value);
+  if (x_value == 0 || y_value == 0)
+    return Smi::FromInt(x_value < y_value ? LESS : GREATER);

   // If only one of the integers is negative the negative number is
   // smallest because the char code of '-' is less than the char code
   // of any digit.  Otherwise, we make both values positive.
+
+  // Use unsigned values otherwise the logic is incorrect for -MIN_INT on
+  // architectures using 32-bit Smis.
+  uint32_t x_scaled = x_value;
+  uint32_t y_scaled = y_value;
   if (x_value < 0 || y_value < 0) {
     if (y_value >= 0) return Smi::FromInt(LESS);
     if (x_value >= 0) return Smi::FromInt(GREATER);
-    x_value = -x_value;
-    y_value = -y_value;
+    x_scaled = -x_value;
+    y_scaled = -y_value;
   }

-  // Arrays for the individual characters of the two Smis.  Smis are
-  // 31 bit integers and 10 decimal digits are therefore enough.
-  // TODO(isolates): maybe we should simply allocate 20 bytes on the stack.
- int* x_elms = isolate->runtime_state()->smi_lexicographic_compare_x_elms(); - int* y_elms = isolate->runtime_state()->smi_lexicographic_compare_y_elms();
-
-
-  // Convert the integers to arrays of their decimal digits.
-  int x_index = 0;
-  int y_index = 0;
-  while (x_value > 0) {
-    x_elms[x_index++] = x_value % 10;
-    x_value /= 10;
-  }
-  while (y_value > 0) {
-    y_elms[y_index++] = y_value % 10;
-    y_value /= 10;
-  }
-
-  // Loop through the arrays of decimal digits finding the first place
-  // where they differ.
-  while (--x_index >= 0 && --y_index >= 0) {
-    int diff = x_elms[x_index] - y_elms[y_index];
-    if (diff != 0) return Smi::FromInt(diff);
+  static const uint32_t kPowersOf10[] = {
+    1, 10, 100, 1000, 10*1000, 100*1000,
+    1000*1000, 10*1000*1000, 100*1000*1000,
+    1000*1000*1000
+  };
+
+  // If the integers have the same number of decimal digits they can be
+  // compared directly as the numeric order is the same as the
+  // lexicographic order.  If one integer has fewer digits, it is scaled
+  // by some power of 10 to have the same number of digits as the longer
+  // integer.  If the scaled integers are equal it means the shorter
+  // integer comes first in the lexicographic order.
+
+  // From http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+  int x_log2 = IntegerLog2(x_scaled);
+  int x_log10 = ((x_log2 + 1) * 1233) >> 12;
+  x_log10 -= x_scaled < kPowersOf10[x_log10];
+
+  int y_log2 = IntegerLog2(y_scaled);
+  int y_log10 = ((y_log2 + 1) * 1233) >> 12;
+  y_log10 -= y_scaled < kPowersOf10[y_log10];
+
+  int tie = EQUAL;
+
+  if (x_log10 < y_log10) {
+    // X has fewer digits.  We would like to simply scale up X but that
+    // might overflow, e.g when comparing 9 with 1_000_000_000, 9 would
+    // be scaled up to 9_000_000_000. So we scale up by the next
+    // smallest power and scale down Y to drop one digit. It is OK to
+    // drop one digit from the longer integer since the final digit is
+    // past the length of the shorter integer.
+    x_scaled *= kPowersOf10[y_log10 - x_log10 - 1];
+    y_scaled /= 10;
+    tie = LESS;
+  } else if (y_log10 < x_log10) {
+    y_scaled *= kPowersOf10[x_log10 - y_log10 - 1];
+    x_scaled /= 10;
+    tie = GREATER;
   }

-  // If one array is a suffix of the other array, the longest array is
-  // the representation of the largest of the Smis in the
-  // lexicographic ordering.
-  return Smi::FromInt(x_index - y_index);
+  if (x_scaled < y_scaled) return Smi::FromInt(LESS);
+  if (x_scaled > y_scaled) return Smi::FromInt(GREATER);
+  return Smi::FromInt(tie);
 }


=======================================
--- /branches/bleeding_edge/src/runtime.h       Wed Jun 29 00:41:42 2011
+++ /branches/bleeding_edge/src/runtime.h       Wed Jun 29 01:35:10 2011
@@ -519,12 +519,6 @@
   StringInputBuffer* string_locale_compare_buf2() {
     return &string_locale_compare_buf2_;
   }
-  int* smi_lexicographic_compare_x_elms() {
-    return smi_lexicographic_compare_x_elms_;
-  }
-  int* smi_lexicographic_compare_y_elms() {
-    return smi_lexicographic_compare_y_elms_;
-  }

  private:
   RuntimeState() {}
@@ -536,8 +530,6 @@
   StringInputBuffer string_input_buffer_compare_bufy_;
   StringInputBuffer string_locale_compare_buf1_;
   StringInputBuffer string_locale_compare_buf2_;
-  int smi_lexicographic_compare_x_elms_[10];
-  int smi_lexicographic_compare_y_elms_[10];

   friend class Isolate;
   friend class Runtime;
=======================================
--- /branches/bleeding_edge/test/mjsunit/array-sort.js Mon Dec 20 06:57:51 2010 +++ /branches/bleeding_edge/test/mjsunit/array-sort.js Wed Jun 29 01:35:10 2011
@@ -70,30 +70,59 @@
   a.sort();
assertArrayEquals([-1000000000, -10000000000, -1000000001, 1000000000, 10000000000, 1000000001], a);

-
-  for (var xb = 1; xb <= 1000 * 1000 * 1000; xb *= 10) {
+  // Other cases are tested implicitly in TestSmiLexicographicCompare.
+}
+
+TestNumberSort();
+
+function TestSmiLexicographicCompare() {
+
+  assertFalse(%_IsSmi(2147483648), 'Update test for >32 bit Smi');
+
+  // Collect a list of interesting Smis.
+  var seen = {};
+  var smis = [];
+  function add(x) {
+    if (x | 0 == x) {
+      x = x | 0;  // Canonicalizes to Smi if 32-bit signed and fits in Smi.
+    }
+    if (%_IsSmi(x) && !seen[x]) {
+      seen[x] = 1;
+      smis.push(x);
+    }
+  }
+  function addSigned(x) {
+    add(x);
+    add(-x);
+  }
+
+  var BIGGER_THAN_ANY_SMI = 10 * 1000 * 1000 * 1000;
+  for (var xb = 1; xb <= BIGGER_THAN_ANY_SMI; xb *= 10) {
     for (var xf = 0; xf <= 9; xf++) {
       for (var xo = -1; xo <= 1; xo++) {
-        for (var yb = 1; yb <= 1000 * 1000 * 1000; yb *= 10) {
-          for (var yf = 0; yf <= 9; yf++) {
-            for (var yo = -1; yo <= 1; yo++) {
-              var x = xb * xf + xo;
-              var y = yb * yf + yo;
-              if (!%_IsSmi(x)) continue;
-              if (!%_IsSmi(y)) continue;
-              var lex = %SmiLexicographicCompare(x, y);
-              if (lex < 0) lex = -1;
-              if (lex > 0) lex = 1;
- assertEquals(lex, (x == y) ? 0 : ((x + "") < (y + "") ? -1 : 1), x + " < " + y);
-            }
-          }
-        }
+        addSigned(xb * xf + xo);
       }
     }
   }
+
+  for (var yb = 1; yb <= BIGGER_THAN_ANY_SMI; yb *= 2) {
+    for (var yo = -2; yo <= 2; yo++) {
+      addSigned(yb + yo);
+    }
+  }
+
+  for (var i = 0; i < smis.length; i++) {
+    for (var j = 0; j < smis.length; j++) {
+      var x = smis[i];
+      var y = smis[j];
+      var lex = %SmiLexicographicCompare(x, y);
+      var expected = (x == y) ? 0 : ((x + "") < (y + "") ? -1 : 1);
+      assertEquals(lex, expected, x + " < " + y);
+    }
+  }
 }

-TestNumberSort();
+TestSmiLexicographicCompare();


 // Test lexicographical string sorting.

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to