[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
mclow.lists added a comment. Herald added subscribers: bixia, christof. What we need to do here is to here is to add a new macro `_LIBCPP_ABI_REGEX_MEMORY` in `<__config>` (around line 89) and then make these changes available only when this macro is defined. By default - we get no change. If `_LIBCPP_ABI_REGEX_MEMORY` is defined, we get the new behavior. Then we can start adding other ABI changes as well. https://reviews.llvm.org/D39308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
timshen updated this revision to Diff 120711. timshen added a comment. Remove the uses of variadic template and auto. I'm not sure of how to implement this without a ABI change (the addition of __storage_). https://reviews.llvm.org/D39308 Files: libcxx/include/regex Index: libcxx/include/regex === --- libcxx/include/regex +++ libcxx/include/regex @@ -765,6 +765,8 @@ #include #include +#include <__debug> + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif @@ -1404,16 +1406,8 @@ _LIBCPP_INLINE_VISIBILITY explicit __owns_one_state(__node<_CharT>* __s) : base(__s) {} - -virtual ~__owns_one_state(); }; -template -__owns_one_state<_CharT>::~__owns_one_state() -{ -delete this->first(); -} - // __empty_state template @@ -1507,20 +1501,12 @@ explicit __owns_two_states(__node<_CharT>* __s1, base* __s2) : base(__s1), __second_(__s2) {} -virtual ~__owns_two_states(); - _LIBCPP_INLINE_VISIBILITY base* second() const {return __second_;} _LIBCPP_INLINE_VISIBILITY base*& second() {return __second_;} }; -template -__owns_two_states<_CharT>::~__owns_two_states() -{ -delete __second_; -} - // __loop template @@ -2493,17 +2479,51 @@ typedef typename _Traits::locale_type locale_type; private: +typedef _VSTD::__state<_CharT> __state; +typedef _VSTD::__node<_CharT> __node; + +// Shared, append-only storage for regexes. +class __storage +{ + // invariant: __v_ is never nullptr. + shared_ptr>> __v_; + +public: + __storage() : __v_(make_shared >>()) {} + __storage(const __storage&) = default; + __storage(__storage&& __rhs) : __v_(std::move(__rhs.__v_)) + { __rhs.__clear(); } + + __storage& operator=(const __storage&) = default; + __storage& operator=(__storage&& __rhs) + { +__v_ = std::move(__rhs.__v_); +__rhs.__clear(); +return *this; + } + + __node* __push(__node* __n) + { +_LIBCPP_ASSERT(__v_.use_count() == 1, + "__push should be called only " + "when the storage is not shared"); +__v_->emplace_back(unique_ptr<__node>(__n)); +return __n; + } + + void __clear() + { __v_ = make_shared >>(); } +}; + _Traits __traits_; flag_type __flags_; unsigned __marked_count_; unsigned __loop_count_; int __open_count_; -shared_ptr<__empty_state<_CharT> > __start_; +__storage __storage_; +__empty_state<_CharT>* __start_; __owns_one_state<_CharT>* __end_; -typedef _VSTD::__state<_CharT> __state; -typedef _VSTD::__node<_CharT> __node; - public: // constants: static const regex_constants::syntax_option_type icase = regex_constants::icase; @@ -2521,40 +2541,40 @@ _LIBCPP_INLINE_VISIBILITY basic_regex() : __flags_(), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {} _LIBCPP_INLINE_VISIBILITY explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p, __p + __traits_.length(__p));} _LIBCPP_INLINE_VISIBILITY basic_regex(const value_type* __p, size_t __len, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p, __p + __len);} // basic_regex(const basic_regex&) = default; // basic_regex(basic_regex&&) = default; template _LIBCPP_INLINE_VISIBILITY explicit basic_regex(const basic_string & __p, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p.begin(), __p.end());} template _LIBCPP_INLINE_VISIBILITY basic_regex(_ForwardIterator __first, _ForwardIterator __last, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__first, __last);} #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_regex(initializer_list __il, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0)
[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
timshen added a comment. In https://reviews.llvm.org/D39308#907424, @mclow.lists wrote: > A couple of notes. Sorry for the oversights, when a C++11(-only :) contributor doesn't care about ABI stability, nor exceptions, he contributes naive code. :P I'll fix them. > - This change means that now requires C++11 (the new `__push` > function w/ the varargs). I don't know how important that is; but I'm pretty > sure libc++ currently provides `` in C++03 mode. > - This is an ABI change; existing code that was compiled with the "old" > `` implementation will not interoperate with this. Can you give an example on what would go wrong, while it's not expected to go wrong? > - I think that there may be some exception-safety issues in `__push`; if the > `push_back` throws, I think we leak. https://reviews.llvm.org/D39308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
mclow.lists added a comment. I can confirm that with this patch the (large) regex that used to cause a stack overflow does not any more. https://reviews.llvm.org/D39308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
mclow.lists added a comment. A couple of notes. - This change means that now requires C++11 (the new `__push` function w/ the varargs). I don't know how important that is; but I'm pretty sure libc++ currently provides `` in C++03 mode. - This is an ABI change; existing code that was compiled with the "old" `` implementation will not interoperate with this. - I think that there may be some exception-safety issues in `__push`; if the `push_back` throws, I think we leak. Comment at: libcxx/include/regex:4642 +{ + auto __ret = new _NodeType(std::forward<_Args>(__args)...); + __storage_.__push(__ret); Why `auto` here? How about `unique_ptr<_NodeType>` instead, and then __storage_.__push(__ret.get()); return __ret.release() https://reviews.llvm.org/D39308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
timshen updated this revision to Diff 120334. timshen added a comment. Add an assertion to __push. https://reviews.llvm.org/D39308 Files: libcxx/include/regex Index: libcxx/include/regex === --- libcxx/include/regex +++ libcxx/include/regex @@ -765,6 +765,8 @@ #include #include +#include <__debug> + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif @@ -1404,16 +1406,8 @@ _LIBCPP_INLINE_VISIBILITY explicit __owns_one_state(__node<_CharT>* __s) : base(__s) {} - -virtual ~__owns_one_state(); }; -template -__owns_one_state<_CharT>::~__owns_one_state() -{ -delete this->first(); -} - // __empty_state template @@ -1507,20 +1501,12 @@ explicit __owns_two_states(__node<_CharT>* __s1, base* __s2) : base(__s1), __second_(__s2) {} -virtual ~__owns_two_states(); - _LIBCPP_INLINE_VISIBILITY base* second() const {return __second_;} _LIBCPP_INLINE_VISIBILITY base*& second() {return __second_;} }; -template -__owns_two_states<_CharT>::~__owns_two_states() -{ -delete __second_; -} - // __loop template @@ -2493,17 +2479,50 @@ typedef typename _Traits::locale_type locale_type; private: +typedef _VSTD::__state<_CharT> __state; +typedef _VSTD::__node<_CharT> __node; + +// Shared, append-only storage for regexes. +class __storage +{ + // invariant: __v_ is never nullptr. + shared_ptr>> __v_; + +public: + __storage() : __v_(make_shared >>()) {} + __storage(const __storage&) = default; + __storage(__storage&& __rhs) : __v_(std::move(__rhs.__v_)) + { __rhs.__clear(); } + + __storage& operator=(const __storage&) = default; + __storage& operator=(__storage&& __rhs) + { +__v_ = std::move(__rhs.__v_); +__rhs.__clear(); +return *this; + } + + void __push(__node* __n) + { +_LIBCPP_ASSERT(__v_.use_count() == 1, + "__push should be called only " + "when the storage is not shared"); +__v_->emplace_back(__n); + } + + void __clear() + { __v_ = make_shared >>(); } +}; + _Traits __traits_; flag_type __flags_; unsigned __marked_count_; unsigned __loop_count_; int __open_count_; -shared_ptr<__empty_state<_CharT> > __start_; +__storage __storage_; +__empty_state<_CharT>* __start_; __owns_one_state<_CharT>* __end_; -typedef _VSTD::__state<_CharT> __state; -typedef _VSTD::__node<_CharT> __node; - public: // constants: static const regex_constants::syntax_option_type icase = regex_constants::icase; @@ -2521,40 +2540,40 @@ _LIBCPP_INLINE_VISIBILITY basic_regex() : __flags_(), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {} _LIBCPP_INLINE_VISIBILITY explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p, __p + __traits_.length(__p));} _LIBCPP_INLINE_VISIBILITY basic_regex(const value_type* __p, size_t __len, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p, __p + __len);} // basic_regex(const basic_regex&) = default; // basic_regex(basic_regex&&) = default; template _LIBCPP_INLINE_VISIBILITY explicit basic_regex(const basic_string & __p, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p.begin(), __p.end());} template _LIBCPP_INLINE_VISIBILITY basic_regex(_ForwardIterator __first, _ForwardIterator __last, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__first, __last);} #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_regex(initializer_list __il, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__il.begin(), __il.end());} #endif // _LIBCPP_CXX03_LANG @@ -2656,7 +2675,8 @@ locale_type imbue(locale_type __loc) {
[PATCH] D39308: [libcxx] Keep track of heap allocated regex states
timshen created this revision. Herald added a subscriber: sanjoy. Herald added a reviewer: EricWF. Build abstraction on regex's allocation This fixes a fuzzer crasher from a huge input (provided by Marshall), which seems to be a stackoverflow during destruction. However, I can't reproduce Marshall's crasher even before the change. No new tests are added, but all old tests pass. I didn't really run valgrind on tests to check for memory leak, as the new allocation is basically correct by construction (only one call site uses `new`, no explicit `delete` calls). https://reviews.llvm.org/D39308 Files: libcxx/include/regex Index: libcxx/include/regex === --- libcxx/include/regex +++ libcxx/include/regex @@ -1404,16 +1404,8 @@ _LIBCPP_INLINE_VISIBILITY explicit __owns_one_state(__node<_CharT>* __s) : base(__s) {} - -virtual ~__owns_one_state(); }; -template -__owns_one_state<_CharT>::~__owns_one_state() -{ -delete this->first(); -} - // __empty_state template @@ -1507,20 +1499,12 @@ explicit __owns_two_states(__node<_CharT>* __s1, base* __s2) : base(__s1), __second_(__s2) {} -virtual ~__owns_two_states(); - _LIBCPP_INLINE_VISIBILITY base* second() const {return __second_;} _LIBCPP_INLINE_VISIBILITY base*& second() {return __second_;} }; -template -__owns_two_states<_CharT>::~__owns_two_states() -{ -delete __second_; -} - // __loop template @@ -2493,17 +2477,45 @@ typedef typename _Traits::locale_type locale_type; private: +typedef _VSTD::__state<_CharT> __state; +typedef _VSTD::__node<_CharT> __node; + +// Shared, append-only storage for regexes. +class __storage +{ + // invariant: __v_ is never nullptr. + shared_ptr>> __v_; + +public: + __storage() : __v_(make_shared >>()) {} + __storage(const __storage&) = default; + __storage(__storage&& __rhs) : __v_(std::move(__rhs.__v_)) + { __rhs.__clear(); } + + __storage& operator=(const __storage&) = default; + __storage& operator=(__storage&& __rhs) + { +__v_ = std::move(__rhs.__v_); +__rhs.__clear(); +return *this; + } + + void __push(__node* __n) + { __v_->emplace_back(__n); } + + void __clear() + { __v_ = make_shared >>(); } +}; + _Traits __traits_; flag_type __flags_; unsigned __marked_count_; unsigned __loop_count_; int __open_count_; -shared_ptr<__empty_state<_CharT> > __start_; +__storage __storage_; +__empty_state<_CharT>* __start_; __owns_one_state<_CharT>* __end_; -typedef _VSTD::__state<_CharT> __state; -typedef _VSTD::__node<_CharT> __node; - public: // constants: static const regex_constants::syntax_option_type icase = regex_constants::icase; @@ -2521,40 +2533,40 @@ _LIBCPP_INLINE_VISIBILITY basic_regex() : __flags_(), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {} _LIBCPP_INLINE_VISIBILITY explicit basic_regex(const value_type* __p, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p, __p + __traits_.length(__p));} _LIBCPP_INLINE_VISIBILITY basic_regex(const value_type* __p, size_t __len, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p, __p + __len);} // basic_regex(const basic_regex&) = default; // basic_regex(basic_regex&&) = default; template _LIBCPP_INLINE_VISIBILITY explicit basic_regex(const basic_string & __p, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__p.begin(), __p.end());} template _LIBCPP_INLINE_VISIBILITY basic_regex(_ForwardIterator __first, _ForwardIterator __last, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0) {__parse(__first, __last);} #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY basic_regex(initializer_list __il, flag_type __f = regex_constants::ECMAScript) : __flags_(__f), __marked_count_(0), __loop_count_(0), __open_count_(0), - __end_(0) + __start_(0), __end_(0)