[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2018-05-15 Thread Marshall Clow via Phabricator via cfe-commits
mclow.lists added a comment.
Herald added subscribers: bixia, christof.

What we need to do here is to here is to add a new macro 
`_LIBCPP_ABI_REGEX_MEMORY` in `<__config>` (around line 89) and then make these 
changes available only when this macro is defined.

By default - we get no change.
If `_LIBCPP_ABI_REGEX_MEMORY` is defined, we get the new behavior.

Then we can start adding other ABI changes as well.


https://reviews.llvm.org/D39308



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2017-10-27 Thread Tim Shen via Phabricator via cfe-commits
timshen updated this revision to Diff 120711.
timshen added a comment.

Remove the uses of variadic template and auto.

I'm not sure of how to implement this without a ABI change (the addition of 
__storage_).


https://reviews.llvm.org/D39308

Files:
  libcxx/include/regex

Index: libcxx/include/regex
===
--- libcxx/include/regex
+++ libcxx/include/regex
@@ -765,6 +765,8 @@
 #include 
 #include 
 
+#include <__debug>
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
 #endif
@@ -1404,16 +1406,8 @@
 _LIBCPP_INLINE_VISIBILITY
 explicit __owns_one_state(__node<_CharT>* __s)
 : base(__s) {}
-
-virtual ~__owns_one_state();
 };
 
-template 
-__owns_one_state<_CharT>::~__owns_one_state()
-{
-delete this->first();
-}
-
 // __empty_state
 
 template 
@@ -1507,20 +1501,12 @@
 explicit __owns_two_states(__node<_CharT>* __s1, base* __s2)
 : base(__s1), __second_(__s2) {}
 
-virtual ~__owns_two_states();
-
 _LIBCPP_INLINE_VISIBILITY
 base*  second() const {return __second_;}
 _LIBCPP_INLINE_VISIBILITY
 base*& second()   {return __second_;}
 };
 
-template 
-__owns_two_states<_CharT>::~__owns_two_states()
-{
-delete __second_;
-}
-
 // __loop
 
 template 
@@ -2493,17 +2479,51 @@
 typedef typename _Traits::locale_type   locale_type;
 
 private:
+typedef _VSTD::__state<_CharT> __state;
+typedef _VSTD::__node<_CharT> __node;
+
+// Shared, append-only storage for regexes.
+class __storage
+{
+  // invariant: __v_ is never nullptr.
+  shared_ptr>> __v_;
+
+public:
+  __storage() : __v_(make_shared>>()) {}
+  __storage(const __storage&) = default;
+  __storage(__storage&& __rhs) : __v_(std::move(__rhs.__v_))
+  { __rhs.__clear(); }
+
+  __storage& operator=(const __storage&) = default;
+  __storage& operator=(__storage&& __rhs)
+  {
+__v_ = std::move(__rhs.__v_);
+__rhs.__clear();
+return *this;
+  }
+
+  __node* __push(__node* __n)
+  {
+_LIBCPP_ASSERT(__v_.use_count() == 1,
+   "__push should be called only "
+   "when the storage is not shared");
+__v_->emplace_back(unique_ptr<__node>(__n));
+return __n;
+  }
+
+  void __clear()
+  { __v_ = make_shared>>(); }
+};
+
 _Traits   __traits_;
 flag_type __flags_;
 unsigned __marked_count_;
 unsigned __loop_count_;
 int __open_count_;
-shared_ptr<__empty_state<_CharT> > __start_;
+__storage __storage_;
+__empty_state<_CharT>* __start_;
 __owns_one_state<_CharT>* __end_;
 
-typedef _VSTD::__state<_CharT> __state;
-typedef _VSTD::__node<_CharT> __node;
-
 public:
 // constants:
 static const regex_constants::syntax_option_type icase = regex_constants::icase;
@@ -2521,40 +2541,40 @@
 _LIBCPP_INLINE_VISIBILITY
 basic_regex()
 : __flags_(), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {}
 _LIBCPP_INLINE_VISIBILITY
 explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p, __p + __traits_.length(__p));}
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(const value_type* __p, size_t __len, flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p, __p + __len);}
 // basic_regex(const basic_regex&) = default;
 // basic_regex(basic_regex&&) = default;
 template 
 _LIBCPP_INLINE_VISIBILITY
 explicit basic_regex(const basic_string& __p,
  flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p.begin(), __p.end());}
 template 
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(_ForwardIterator __first, _ForwardIterator __last,
 flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__first, __last);}
 #ifndef _LIBCPP_CXX03_LANG
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(initializer_list __il,
 flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 

[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2017-10-26 Thread Tim Shen via Phabricator via cfe-commits
timshen added a comment.

In https://reviews.llvm.org/D39308#907424, @mclow.lists wrote:

> A couple of notes.


Sorry for the oversights, when a C++11(-only :) contributor doesn't care about 
ABI stability, nor exceptions, he contributes naive code. :P I'll fix them.

> - This change means that  now requires C++11 (the new `__push` 
> function w/ the varargs).  I don't know how important that is; but I'm pretty 
> sure libc++ currently provides `` in C++03 mode.
> - This is an ABI change; existing code that was compiled with the "old" 
> `` implementation will not interoperate with this.

Can you give an example on what would go wrong, while it's not expected to go 
wrong?

> - I think that there may be some exception-safety issues in `__push`; if the 
> `push_back` throws, I think we leak.


https://reviews.llvm.org/D39308



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2017-10-25 Thread Marshall Clow via Phabricator via cfe-commits
mclow.lists added a comment.

I can confirm that with this patch the (large) regex that used to cause a stack 
overflow does not any more.


https://reviews.llvm.org/D39308



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2017-10-25 Thread Marshall Clow via Phabricator via cfe-commits
mclow.lists added a comment.

A couple of notes.

- This change means that  now requires C++11 (the new `__push` function 
w/ the varargs).  I don't know how important that is; but I'm pretty sure 
libc++ currently provides `` in C++03 mode.
- This is an ABI change; existing code that was compiled with the "old" 
`` implementation will not interoperate with this.
- I think that there may be some exception-safety issues in `__push`; if the 
`push_back` throws, I think we leak.




Comment at: libcxx/include/regex:4642
+{
+  auto __ret = new _NodeType(std::forward<_Args>(__args)...);
+  __storage_.__push(__ret);

Why `auto` here? 

How about `unique_ptr<_NodeType>` instead, and then
__storage_.__push(__ret.get());
return __ret.release()



https://reviews.llvm.org/D39308



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2017-10-25 Thread Tim Shen via Phabricator via cfe-commits
timshen updated this revision to Diff 120334.
timshen added a comment.

Add an assertion to __push.


https://reviews.llvm.org/D39308

Files:
  libcxx/include/regex

Index: libcxx/include/regex
===
--- libcxx/include/regex
+++ libcxx/include/regex
@@ -765,6 +765,8 @@
 #include 
 #include 
 
+#include <__debug>
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
 #endif
@@ -1404,16 +1406,8 @@
 _LIBCPP_INLINE_VISIBILITY
 explicit __owns_one_state(__node<_CharT>* __s)
 : base(__s) {}
-
-virtual ~__owns_one_state();
 };
 
-template 
-__owns_one_state<_CharT>::~__owns_one_state()
-{
-delete this->first();
-}
-
 // __empty_state
 
 template 
@@ -1507,20 +1501,12 @@
 explicit __owns_two_states(__node<_CharT>* __s1, base* __s2)
 : base(__s1), __second_(__s2) {}
 
-virtual ~__owns_two_states();
-
 _LIBCPP_INLINE_VISIBILITY
 base*  second() const {return __second_;}
 _LIBCPP_INLINE_VISIBILITY
 base*& second()   {return __second_;}
 };
 
-template 
-__owns_two_states<_CharT>::~__owns_two_states()
-{
-delete __second_;
-}
-
 // __loop
 
 template 
@@ -2493,17 +2479,50 @@
 typedef typename _Traits::locale_type   locale_type;
 
 private:
+typedef _VSTD::__state<_CharT> __state;
+typedef _VSTD::__node<_CharT> __node;
+
+// Shared, append-only storage for regexes.
+class __storage
+{
+  // invariant: __v_ is never nullptr.
+  shared_ptr>> __v_;
+
+public:
+  __storage() : __v_(make_shared>>()) {}
+  __storage(const __storage&) = default;
+  __storage(__storage&& __rhs) : __v_(std::move(__rhs.__v_))
+  { __rhs.__clear(); }
+
+  __storage& operator=(const __storage&) = default;
+  __storage& operator=(__storage&& __rhs)
+  {
+__v_ = std::move(__rhs.__v_);
+__rhs.__clear();
+return *this;
+  }
+
+  void __push(__node* __n)
+  {
+_LIBCPP_ASSERT(__v_.use_count() == 1,
+   "__push should be called only "
+   "when the storage is not shared");
+__v_->emplace_back(__n);
+  }
+
+  void __clear()
+  { __v_ = make_shared>>(); }
+};
+
 _Traits   __traits_;
 flag_type __flags_;
 unsigned __marked_count_;
 unsigned __loop_count_;
 int __open_count_;
-shared_ptr<__empty_state<_CharT> > __start_;
+__storage __storage_;
+__empty_state<_CharT>* __start_;
 __owns_one_state<_CharT>* __end_;
 
-typedef _VSTD::__state<_CharT> __state;
-typedef _VSTD::__node<_CharT> __node;
-
 public:
 // constants:
 static const regex_constants::syntax_option_type icase = regex_constants::icase;
@@ -2521,40 +2540,40 @@
 _LIBCPP_INLINE_VISIBILITY
 basic_regex()
 : __flags_(), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {}
 _LIBCPP_INLINE_VISIBILITY
 explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p, __p + __traits_.length(__p));}
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(const value_type* __p, size_t __len, flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p, __p + __len);}
 // basic_regex(const basic_regex&) = default;
 // basic_regex(basic_regex&&) = default;
 template 
 _LIBCPP_INLINE_VISIBILITY
 explicit basic_regex(const basic_string& __p,
  flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p.begin(), __p.end());}
 template 
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(_ForwardIterator __first, _ForwardIterator __last,
 flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__first, __last);}
 #ifndef _LIBCPP_CXX03_LANG
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(initializer_list __il,
 flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__il.begin(), __il.end());}
 #endif  // _LIBCPP_CXX03_LANG
 
@@ -2656,7 +2675,8 @@
 locale_type imbue(locale_type __loc)
 {
 

[PATCH] D39308: [libcxx] Keep track of heap allocated regex states

2017-10-25 Thread Tim Shen via Phabricator via cfe-commits
timshen created this revision.
Herald added a subscriber: sanjoy.
Herald added a reviewer: EricWF.

Build abstraction on regex's allocation

This fixes a fuzzer crasher from a huge input (provided by Marshall), which 
seems to be a stackoverflow during destruction. However, I can't reproduce 
Marshall's crasher even before the change.

No new tests are added, but all old tests pass. I didn't really run valgrind on 
tests to check for memory leak, as the new allocation is basically correct by 
construction (only one call site uses `new`, no explicit `delete` calls).


https://reviews.llvm.org/D39308

Files:
  libcxx/include/regex

Index: libcxx/include/regex
===
--- libcxx/include/regex
+++ libcxx/include/regex
@@ -1404,16 +1404,8 @@
 _LIBCPP_INLINE_VISIBILITY
 explicit __owns_one_state(__node<_CharT>* __s)
 : base(__s) {}
-
-virtual ~__owns_one_state();
 };
 
-template 
-__owns_one_state<_CharT>::~__owns_one_state()
-{
-delete this->first();
-}
-
 // __empty_state
 
 template 
@@ -1507,20 +1499,12 @@
 explicit __owns_two_states(__node<_CharT>* __s1, base* __s2)
 : base(__s1), __second_(__s2) {}
 
-virtual ~__owns_two_states();
-
 _LIBCPP_INLINE_VISIBILITY
 base*  second() const {return __second_;}
 _LIBCPP_INLINE_VISIBILITY
 base*& second()   {return __second_;}
 };
 
-template 
-__owns_two_states<_CharT>::~__owns_two_states()
-{
-delete __second_;
-}
-
 // __loop
 
 template 
@@ -2493,17 +2477,45 @@
 typedef typename _Traits::locale_type   locale_type;
 
 private:
+typedef _VSTD::__state<_CharT> __state;
+typedef _VSTD::__node<_CharT> __node;
+
+// Shared, append-only storage for regexes.
+class __storage
+{
+  // invariant: __v_ is never nullptr.
+  shared_ptr>> __v_;
+
+public:
+  __storage() : __v_(make_shared>>()) {}
+  __storage(const __storage&) = default;
+  __storage(__storage&& __rhs) : __v_(std::move(__rhs.__v_))
+  { __rhs.__clear(); }
+
+  __storage& operator=(const __storage&) = default;
+  __storage& operator=(__storage&& __rhs)
+  {
+__v_ = std::move(__rhs.__v_);
+__rhs.__clear();
+return *this;
+  }
+
+  void __push(__node* __n)
+  { __v_->emplace_back(__n); }
+
+  void __clear()
+  { __v_ = make_shared>>(); }
+};
+
 _Traits   __traits_;
 flag_type __flags_;
 unsigned __marked_count_;
 unsigned __loop_count_;
 int __open_count_;
-shared_ptr<__empty_state<_CharT> > __start_;
+__storage __storage_;
+__empty_state<_CharT>* __start_;
 __owns_one_state<_CharT>* __end_;
 
-typedef _VSTD::__state<_CharT> __state;
-typedef _VSTD::__node<_CharT> __node;
-
 public:
 // constants:
 static const regex_constants::syntax_option_type icase = regex_constants::icase;
@@ -2521,40 +2533,40 @@
 _LIBCPP_INLINE_VISIBILITY
 basic_regex()
 : __flags_(), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {}
 _LIBCPP_INLINE_VISIBILITY
 explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p, __p + __traits_.length(__p));}
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(const value_type* __p, size_t __len, flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p, __p + __len);}
 // basic_regex(const basic_regex&) = default;
 // basic_regex(basic_regex&&) = default;
 template 
 _LIBCPP_INLINE_VISIBILITY
 explicit basic_regex(const basic_string& __p,
  flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__p.begin(), __p.end());}
 template 
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(_ForwardIterator __first, _ForwardIterator __last,
 flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)
 {__parse(__first, __last);}
 #ifndef _LIBCPP_CXX03_LANG
 _LIBCPP_INLINE_VISIBILITY
 basic_regex(initializer_list __il,
 flag_type __f = regex_constants::ECMAScript)
 : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0),
-  __end_(0)
+  __start_(0), __end_(0)