[PATCH] D36423: [libc++] Introsort based sorting function

2017-09-20 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA added a comment.

ping


https://reviews.llvm.org/D36423



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36423: [libc++] Introsort based sorting function

2017-08-21 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA updated this revision to Diff 111998.
DIVYA added a comment.

- added test qsort_worst_uint32 in algorithm.bench.cpp


https://reviews.llvm.org/D36423

Files:
  benchmarks/GenerateInput.hpp
  benchmarks/algorithms.bench.cpp
  include/algorithm

Index: include/algorithm
===
--- include/algorithm
+++ include/algorithm
@@ -642,6 +642,7 @@
 #include  // needed to provide swap_ranges.
 #include 
 #include 
+#include 
 #include 
 
 #if defined(__IBMCPP__)
@@ -3994,7 +3995,14 @@
 
 template 
 void
-__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+_Compare);
+
+// Using introsort algorithm for sorting
+template 
+void
+__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, 
+   typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit)
 {
 // _Compare is known to be a reference type
 typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
@@ -4029,6 +4037,12 @@
 _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp);
 return;
 }
+if (__depth_limit == 0)
+{
+__partial_sort<_Compare>(__first,__last,__last,__comp);
+return;
+}
+
 // __len > 5
 _RandomAccessIterator __m = __first;
 _RandomAccessIterator __lm1 = __last;
@@ -4172,19 +4186,33 @@
 // sort smaller range with recursive call and larger with tail recursion elimination
 if (__i - __first < __last - __i)
 {
-_VSTD::__sort<_Compare>(__first, __i, __comp);
-// _VSTD::__sort<_Compare>(__i+1, __last, __comp);
+_VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit);
+// _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit);
 __first = ++__i;
 }
 else
 {
-_VSTD::__sort<_Compare>(__i+1, __last, __comp);
-// _VSTD::__sort<_Compare>(__first, __i, __comp);
+_VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, __depth_limit);
+// _VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit);
 __last = __i;
 }
+--__depth_limit;
 }
 }
 
+template 
+void
+__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+{
+
+  // Threshold(or depth limit) for introsort is taken to be 2*log2(size)
+  typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
+  const difference_type __dp = __last - __first;
+  difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp);
+  __depth_limit *= 2;
+  __intro_sort<_Compare>(__first, __last, __comp, __depth_limit);
+}
+
 // This forwarder keeps the top call and the recursive calls using the same instantiation, forcing a reference _Compare
 template 
 inline _LIBCPP_INLINE_VISIBILITY
Index: benchmarks/algorithms.bench.cpp
===
--- benchmarks/algorithms.bench.cpp
+++ benchmarks/algorithms.bench.cpp
@@ -5,7 +5,7 @@
 #include "benchmark/benchmark_api.h"
 #include "GenerateInput.hpp"
 
-constexpr std::size_t TestNumInputs = 1024;
+constexpr std::size_t TestNumInputs = 1024*64;
 
 template 
 void BM_Sort(benchmark::State& st, GenInputs gen) {
@@ -58,5 +58,8 @@
 BENCHMARK_CAPTURE(BM_Sort, single_element_strings,
 getDuplicateStringInputs)->Arg(TestNumInputs);
 
+BENCHMARK_CAPTURE(BM_Sort, qsort_worst_uint32,
+getQSortKiller)->Arg(TestNumInputs);
+
 
 BENCHMARK_MAIN()
Index: benchmarks/GenerateInput.hpp
===
--- benchmarks/GenerateInput.hpp
+++ benchmarks/GenerateInput.hpp
@@ -138,5 +138,40 @@
 return cinputs;
 }
 
+template 
+inline std::vector make_killer(size_t N) {
+std::vector inputs;
+uint32_t candidate = 0;
+uint32_t num_solid = 0;
+uint32_t gas = N - 1;
+
+std::vector tmp(N);
+inputs.resize(N);
+
+for (T i = 0; i < N; ++i) {
+tmp[i] = i;
+inputs[i] = gas;
+}
+
+std::sort(tmp.begin(), tmp.end(), [&](T x, T y) {
+if (inputs[x] == gas && inputs[y] == gas) {
+if (x == candidate) inputs[x] = num_solid++;
+else inputs[y] = num_solid++;
+}
+
+if (inputs[x] == gas) candidate = x;
+else if (inputs[y] == gas) candidate = y;
+
+return inputs[x] < inputs[y];
+});
+return inputs;
+}
+
+
+template 
+inline std::vector getQSortKiller(size_t N){
+std::vector inputs = make_killer(N);
+return inputs;
+}
 
 #endif // BENCHMARK_GENERATE_INPUT_HPP
___
cfe-commits mailing list
cfe-commits@lists.llvm.org

[PATCH] D36423: [libc++] Introsort based sorting function

2017-08-14 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA added a comment.

Added benchmark from Aditya's repo into the libcxx benchmark code base


https://reviews.llvm.org/D36423



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36423: [libc++] Introsort based sorting function

2017-08-14 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA updated this revision to Diff 111027.

https://reviews.llvm.org/D36423

Files:
  benchmarks/algorithms.bench.cpp
  include/algorithm
  include/rng_utils.h
  include/test_configs.h
  include/test_utils.h

Index: include/test_utils.h
===
--- /dev/null
+++ include/test_utils.h
@@ -0,0 +1,270 @@
+#ifndef TEST_UTILS_H
+#define TEST_UTILS_H
+
+#include "rng_utils.h"
+#include
+#include
+
+// TODO: Add more aggregates.
+struct aggregate {
+  int first;
+  int second;
+  int third;
+  int fourth;
+  aggregate() : first(0), second(0), third(0), fourth(0)
+  {}
+  // This is a hacky constructor for ::find on associative containers to work.
+  aggregate(int i)
+: first(i), second(i), third(i), fourth(i)
+  {}
+  aggregate(int i, int j, int k, int l)
+: first(i), second(j), third(k), fourth(l)
+  {}
+
+  aggregate& operator++() {
+++first;
+++second;
+++third;
+++fourth;
+return *this;
+  }
+  aggregate operator++(int) {
+aggregate N(*this);
+++(*this);
+return N;
+  }
+
+  bool operator<(const aggregate& i) const {
+return first < i.first;
+  }
+
+  bool operator>(const aggregate& i) const {
+return i < *this;
+  }
+
+  bool operator==(const aggregate& i) const {
+return first == i.first;
+  }
+
+  bool operator!=(const aggregate& i) const {
+return !(*this == i);
+  }
+};
+
+// Hasher for aggregate data type.
+namespace std {
+  template <>
+  struct hash
+  {
+std::size_t operator()(const aggregate& k) const
+{
+  using std::hash;
+  // Hash and combine using bit-shift.
+  return ((hash()(k.first)
+   ^ (hash()(k.second) << 1)) >> 1)
+   ^ (hash()(k.third) << 1)
+   ^ (hash()(k.fourth) << 1);
+}
+  };
+}
+
+template
+struct remove_const { typedef T type; };
+
+// value_type of a std::map is std::pair
+template
+struct remove_const { typedef std::pair type; };
+
+template
+T get_rand(random_device , int max) {
+  return r.get_rand(T(0), T(max));
+}
+
+template<>
+std::pair get_rand>(random_device , int max) {
+  return std::make_pair(r.get_rand(0, max), r.get_rand(0, max));
+}
+
+template<>
+aggregate get_rand(random_device , int max) {
+  return aggregate(r.get_rand(0, max));
+}
+
+template<>
+std::pair
+get_rand>(random_device , int max) {
+  return std::make_pair(r.get_rand(0, max), r.get_rand(0, max));
+}
+
+template
+T increment(T ) {
+  return ++i;
+}
+
+// value_type of a std::map is std::pair
+template<>
+std::pair increment>(std::pair ) {
+  return std::make_pair(++i.first, i.second);
+}
+
+template<>
+std::pair
+increment>(std::pair ) {
+  return std::make_pair(++i.first, i.second);
+}
+
+template
+T init() {
+  return T(0);
+}
+
+template<>
+std::pair init>() {
+  return std::make_pair(0, 0);
+}
+
+template<>
+std::pair init>() {
+  return std::make_pair(0, 0);
+}
+
+template  class Container, class value_type>
+void fill_random(Container ,
+int max = RAND_MAX) {
+  random_device r;
+  for (auto  : v)
+e = get_rand(r, max);
+}
+
+template 
+void fill_random(T begin, T end, int max = RAND_MAX) {
+  typedef typename std::iterator_traits::value_type value_type;
+  random_device r;
+  for (auto it = begin; it != end; ++it)
+*it = get_rand(r, max);
+}
+
+// It can work with char* or std::string.
+template
+void fill_random_chars(T begin, T end, bool upper) {
+  char max = upper ? 'Z' : 'z';
+  char min = upper ? 'A' : 'a';
+  auto it = begin;
+  typedef typename std::iterator_traits::value_type value_type;
+  random_device r;
+  for (; it != end -1; ++it) {
+*it = get_rand(r, max) * (max - min) + min;
+assert(*it >= min);
+assert(*it <= max);
+  }
+  *it = '\0';
+}
+
+// TODO: Create a template class such that all the
+// APIs of STL containers can be exercised in a concise way.
+// for example insert, push_back, pop_back, push_front, pop_front, advance
+
+// TODO: Benchmark memory allocated on heap vs. stack.
+template 
+void fill_seq(T begin, T end) {
+  typedef typename std::iterator_traits::value_type value_type;
+  //random_device r;
+  value_type j = init();// = get_rand(r, RAND_MAX);
+  for (auto it = begin; it != end; ++it, increment(j))
+*it = j;
+}
+
+template  class Container, class value_type>
+void fill_seq(Container ) {
+  fill_seq(std::begin(v), std::end(v));
+}
+
+// Size of vector \p v to be constructed is \p size
+template 
+void make_killer(int size, std::vector& v) {
+  int candidate = 0;
+  int num_solid = 0;
+  int gas = size - 1;
+
+  std::vector tmp(size);
+  v.resize(size);
+
+  for (T i = 0; i < size; ++i) {
+   

[PATCH] D36423: [libc++] Introsort based sorting function

2017-08-09 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA added a comment.

Link to algorithm.bench.cpp benchmark 
https://github.com/hiraditya/std-benchmark/blob/master/cxx/algorithm.bench.cpp




Comment at: include/algorithm:4208
+
+  // Threshold(or depth limit) for introsort is taken to be 2*log2(size)
+  typedef typename iterator_traits<_RandomAccessIterator>::difference_type 
difference_type;

bcraig wrote:
> This comment says basically the same thing as the code.  The comment would be 
> more useful if it said why 2*log2(size) is used.
We tested the code with depth limit from log2(size) to 4*log2(size).It was 
giving good performance around 2*log2(size).So the depth limit was fixed a this 
value.


https://reviews.llvm.org/D36423



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36423: [libc++] Introsort based sorting function

2017-08-08 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA added a comment.

benchmarks/algorithms.bench.cpp Results

With old code (in ns)

BM_sort_std_common/16384  : 730752

BM_sort_std_common/32768  :  1.58E+06 

BM_sort_std_ascending/16384:17160.5   

BM_sort_std_ascending/32768 :   35350.1   

BM_sort_std_descending/16384  : 35809 

BM_sort_std_descending/32768  : 72133 

BM_sort_std_list_with_vector/16384  : 124250

BM_sort_std_list_with_vector/32768  : 247705

BM_sort_std_worst_quick/16384   :   1.03E+07  

BM_sort_std_worst_quick/32768:  4.04E+07

With new code (in ns)

BM_sort_std_common/16384  :  
720510   
BM_sort_std_common/32768:   
1.55E+06  
BM_sort_std_ascending/16384: 
17164.9  
BM_sort_std_ascending/32768: 
34726.7  
BM_sort_std_descending/16384   :  
35671   
BM_sort_std_descending/32768 :   
72100.7  
BM_sort_std_list_with_vector/16384   : 
125816   
BM_sort_std_list_with_vector/32768  :   
247450  
BM_sort_std_worst_quick/16384  :
987016
BM_sort_std_worst_quick/32768:  
2.14E+06


https://reviews.llvm.org/D36423



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D36423: [libc++] Introsort based sorting function

2017-08-07 Thread DIVYA SHANMUGHAN via Phabricator via cfe-commits
DIVYA created this revision.

The sorting algorithm currently employed in libc+ library uses quicksort with 
tail recursion elimination, as a result of which the worst case complexity 
turns out to be O(N^2).
This patch reduces the worst case time complexity, by employing Introsort 
algorithm. Introsort is a sorting technique, which begins with quicksort and 
when the recursion depth (or depth limit) goes beyond a threshold value, then 
it switches to Heapsort .As a result the worst case complexity reduces to 
O(NlogN)

Worked in collaboration with Aditya Kumar.


https://reviews.llvm.org/D36423

Files:
  include/algorithm


Index: include/algorithm
===
--- include/algorithm
+++ include/algorithm
@@ -642,6 +642,7 @@
 #include  // needed to provide swap_ranges.
 #include 
 #include 
+#include 
 #include 
 
 #if defined(__IBMCPP__)
@@ -3994,7 +3995,14 @@
 
 template 
 void
-__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare 
__comp)
+__partial_sort(_RandomAccessIterator, _RandomAccessIterator, 
_RandomAccessIterator,
+_Compare);
+
+// Using introsort algorithm for sorting
+template 
+void
+__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, 
_Compare __comp, 
+   typename iterator_traits<_RandomAccessIterator>::difference_type 
__depth_limit)
 {
 // _Compare is known to be a reference type
 typedef typename iterator_traits<_RandomAccessIterator>::difference_type 
difference_type;
@@ -4029,6 +4037,12 @@
 _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp);
 return;
 }
+if (__depth_limit == 0)
+{
+__partial_sort<_Compare>(__first,__last,__last,__comp);
+return;
+}
+
 // __len > 5
 _RandomAccessIterator __m = __first;
 _RandomAccessIterator __lm1 = __last;
@@ -4172,19 +4186,33 @@
 // sort smaller range with recursive call and larger with tail 
recursion elimination
 if (__i - __first < __last - __i)
 {
-_VSTD::__sort<_Compare>(__first, __i, __comp);
-// _VSTD::__sort<_Compare>(__i+1, __last, __comp);
+_VSTD::__intro_sort<_Compare>(__first, __i, __comp, __depth_limit);
+// _VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, 
__depth_limit);
 __first = ++__i;
 }
 else
 {
-_VSTD::__sort<_Compare>(__i+1, __last, __comp);
-// _VSTD::__sort<_Compare>(__first, __i, __comp);
+_VSTD::__intro_sort<_Compare>(__i+1, __last, __comp, 
__depth_limit);
+// _VSTD::__intro_sort<_Compare>(__first, __i, __comp, 
__depth_limit);
 __last = __i;
 }
+--__depth_limit;
 }
 }
 
+template 
+void
+__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare 
__comp)
+{
+
+  // Threshold(or depth limit) for introsort is taken to be 2*log2(size)
+  typedef typename iterator_traits<_RandomAccessIterator>::difference_type 
difference_type;
+  const difference_type __dp = __last - __first;
+  difference_type __depth_limit = __last == __first ? 0 : _VSTD::log2(__dp);
+  __depth_limit *=2;
+  __intro_sort<_Compare>(__first, __last, __comp, __depth_limit);
+}
+
 // This forwarder keeps the top call and the recursive calls using the same 
instantiation, forcing a reference _Compare
 template 
 inline _LIBCPP_INLINE_VISIBILITY


Index: include/algorithm
===
--- include/algorithm
+++ include/algorithm
@@ -642,6 +642,7 @@
 #include  // needed to provide swap_ranges.
 #include 
 #include 
+#include 
 #include 
 
 #if defined(__IBMCPP__)
@@ -3994,7 +3995,14 @@
 
 template 
 void
-__sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+__partial_sort(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+_Compare);
+
+// Using introsort algorithm for sorting
+template 
+void
+__intro_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, 
+   typename iterator_traits<_RandomAccessIterator>::difference_type __depth_limit)
 {
 // _Compare is known to be a reference type
 typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type;
@@ -4029,6 +4037,12 @@
 _VSTD::__insertion_sort_3<_Compare>(__first, __last, __comp);
 return;
 }
+if (__depth_limit == 0)
+{
+__partial_sort<_Compare>(__first,__last,__last,__comp);
+return;
+}
+
 // __len > 5
 _RandomAccessIterator __m = __first;
 _RandomAccessIterator __lm1 = __last;
@@ -4172,19 +4186,33 @@
 // sort smaller range with recursive call and larger with tail recursion elimination
 if (__i - __first < __last - __i)
 {
-