[
https://issues.apache.org/jira/browse/STDCXX-249?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12566455#action_12566455
]
Martin Sebor commented on STDCXX-249:
-------------------------------------
Great numbers!
The patch looks good to me. Maybe just a few minor suggestions:
{code}
Index: istream.cc
===================================================================
--- istream.cc (revision 618349)
+++ istream.cc (working copy)
@@ -788,24 +788,23 @@
{
_RWSTD_ASSERT (0 != __is.rdbuf ());
+ const _TYPENAME basic_istream<_CharT, _Traits>::sentry
+ __ipfx (__is /* , noskipws = false */);
+
ios_base::iostate __err = ios_base::goodbit;
- _RWSTD_SIZE_T __gcount = 0;
+ typedef _RWSTD_SIZE_T _SizeT;
+ // count of characters read from stream
+ _SizeT __gcount = 0;
+
_TRY {
- const _TYPENAME basic_istream<_CharT, _Traits>::sentry
- __ipfx (__is /* , noskipws = false */);
-
if (__ipfx) {
- basic_streambuf<_CharT, _Traits>* const __rdbuf = __is.rdbuf ();
+ __str.clear ();
- // FIXME: code commented out to work around an HP aCC 3.14.10
- // bug #JAGac86264
-
- // typedef _TYPENAME
- // basic_string<_CharT, _Traits, _Allocator>::size_type
-
- const _RWSTD_SIZE_T __maxlen =
+ // maximum number of characters we can read
+ _RWSTD_SIZE_T __n =
__is.width () ? __is.width () : __str.max_size ();
+ // __is.width (0); // ensure reset in case of exception?
{code}
I agree we should be exception safe but unless this is the only place where we
don't take exceptions into consideration I'd do that in a separate pass
throughout all of iostreams.
{code}
- __str.clear ();
+ basic_streambuf<_CharT, _Traits>* const __rdbuf = __is.rdbuf ();
const ctype<_CharT> &__ctp =
_USE_FACET (ctype<_CharT>, __is.getloc ());
- // increment gcount only _after_ sbumpc() but _before_
- // the subsequent call to sgetc() to correctly reflect
- // the number of extracted characters in the presence
- // of exceptions thrown from streambuf virtuals
- for ( ; __maxlen != __gcount; __rdbuf->sbumpc (), ++__gcount) {
+#ifndef _RWSTD_NO_FRIEND_TEMPLATE
- const _TYPENAME _Traits::int_type __c (__rdbuf->sgetc ());
+ for ( ; __n != 0; ) {
{code}
Since there is no expression in the for statement wouldn't a while loop be
better here?
{code}
+ const _CharT* const __gptr = __rdbuf->gptr ();
+ const _CharT* const __egptr = __rdbuf->egptr ();
+
+ // maximum number of characters would want to extract
+ _SizeT __navail = __egptr - __gptr;
+ if (__n < __navail)
+ __navail = __n;
+
+ if (__navail) {
+
+ // find the delimeter in the squence if it exists, or
+ // get pointer to end of sequence
+ const _CharT* __pdel = __gptr;
+ for (/**/; __pdel != __egptr; ++__pdel) {
+
+ const _TYPENAME _Traits::int_type __c
+ (_Traits::to_int_type(*__pdel));
+
+ if (_Traits::eq_int_type (__c, _Traits::eof ())) {
+ __err = ios_base::eofbit;
+ break;
+ }
+
+ if (__ctp.is (__ctp.space, *__pdel))
+ break;
+ }
+
+ // __pdel is either pointing to a delimiter or one past
+ // the end of the input stream get area. if it is past
+ // the end, then set it to null so the above loop acts
+ // similar to strpbrk()
+ if (__pdel == __egptr) {
+ __pdel = 0;
+ }
+
+ if (__pdel) { // < __egptr
{code}
What does the comment mean here? That (__pdel < __egptr) holds? Would an
assertion instead be overkill here?
{code}
+ __navail = __pdel - __gptr + 1;
+ __n -= __navail - 1;
+ }
+ else if (__n == __navail) // == __egptr
{code}
And here that __pdel == __egptr? (Same as above.)
{code}
+ __n -= --__navail;
+ else
+ __n -= __navail;
+
+ // store characters excluding the delimiter
+ __str.append (__gptr, __navail - !!__pdel);
+
+ __gcount += __navail;
+
+ // advance gptr() by the number of extracted
+ // characters, including the delimiter
+ __rdbuf->gbump (__navail);
+
+ // we found a delimiter before the end of the get area,
break out
{code}
Can you trim the comment to 78 characters or less? :)
{code}
+ // of outer loop
+ if (__pdel) {
+ break;
+ }
+
+ if (2 > __n && _SizeT (__egptr - __gptr) != __navail) {
+ __err = ios_base::failbit;
+ break;
+ }
+ }
+ else {
+
+ // n data in buffer, trigger underflow()
+ // note that streambuf may be unbuffered
+ const _TYPENAME _Traits::int_type __c
+ = __rdbuf->sgetc ();
+
+ if (_Traits::eq_int_type (__c, _Traits::eof ())) {
+ __err = ios_base::eofbit;
+ break;
+ }
+
+ const _TYPENAME _Traits::char_type __ch
+ = _Traits::to_char_type (__c);
{code}
The equals should go on the line above.
{code}
+
+ if (__ctp.is (__ctp.space, __ch))
+ break;
+
+ __str.push_back (__ch);
+ --__n;
+
+ __rdbuf->sbumpc ();
+
+ // increment gcount only _after_ sbumpc() but _before_
+ // the subsequent call to sgetc() to correctly reflect
+ // the number of extracted characters in the presence
+ // of exceptions thrown from streambuf virtuals
+ ++__gcount;
+
+ // honestly, it seems that we should not append to __str
+ // until after sbumpc(). if we leave it like this, then
+ // __str will have the character in it, but the stream
+ // may also.
{code}
That wouldn't be good. The
[21.string.io.cpp|http://svn.apache.org/repos/asf/stdcxx/trunk/tests/strings/21.string.io.cpp]
test passes so maybe it needs to be tightened up to expose this problem? And
if/when it does expose it, I agree we should deal with it.
{code}
+ }
+ }
+
+#else // if defined (_RWSTD_NO_FRIEND_TEMPLATE)
+
+ for ( ; __n != 0; ) {
+
+ const _TYPENAME _Traits::int_type __c
+ = __rdbuf->sgetc ();
+
if (_Traits::eq_int_type (__c, _Traits::eof ())) {
__err = ios_base::eofbit;
break;
}
- // convert to char_type so that isspace works correctly
- const _TYPENAME _Traits::char_type
- __ch = _Traits::to_char_type (__c);
+ const _TYPENAME _Traits::char_type __ch
+ = _Traits::to_char_type (__c);
{code}
Same as above with the equals.
{code}
if (__ctp.is (__ctp.space, __ch))
break;
__str.push_back (__ch);
+ --__n;
+
+ __rdbuf->sbumpc ();
+
+ // increment gcount only _after_ sbumpc() but _before_
+ // the subsequent call to sgetc() to correctly reflect
+ // the number of extracted characters in the presence
+ // of exceptions thrown from streambuf virtuals
+ ++__gcount;
}
+#endif // if defined (_RWSTD_NO_FRIEND_TEMPLATE)
+
+
__is.width (0);
}
}
_CATCH (...) {
- __is.setstate (__is.badbit | _RW::__rw_rethrow);
+ __is.setstate (ios_base::badbit | _RW::__rw_rethrow);
{code}
I realize we're not consistent in our usage of the bits but I think we should
pick one of the two styles and use it consistently throughout, unless there's a
reason not to (such as if {{ios_base}} is an incomplete type).
{code}
}
if (!__gcount)
@@ -852,7 +965,7 @@
__is.setstate (__err);
return __is;
-}
+}
_EXPORT
Index: streambuf
===================================================================
--- streambuf (revision 618349)
+++ streambuf (working copy)
@@ -415,6 +415,12 @@
getline (basic_istream<_Char2, _Traits2>&,
basic_string<_Char2, _Traits2, _Allocator>&, _Char2);
+ _EXPORT
+ template <class _Char2, class _Traits2, class _Allocator>
+ friend basic_istream<_Char2, _Traits2>&
+ operator>> (basic_istream<_Char2, _Traits2>&,
+ basic_string<_Char2, _Traits2, _Allocator>&);
+
#endif // _RWSTD_NO_FRIEND_TEMPLATE
};
{code}
> std::operator>>(istream, string&) inefficient
> ---------------------------------------------
>
> Key: STDCXX-249
> URL: https://issues.apache.org/jira/browse/STDCXX-249
> Project: C++ Standard Library
> Issue Type: Improvement
> Components: 21. Strings
> Affects Versions: 4.1.2, 4.1.3, 4.1.4, 4.2.0
> Environment: all
> Reporter: Martin Sebor
> Assignee: Travis Vitek
> Priority: Minor
> Fix For: 4.2.1
>
> Attachments: stdcxx-249.patch
>
> Original Estimate: 8h
> Time Spent: 9h
> Remaining Estimate: 0h
>
> The string extractor reads and appends one character at a time. It could be
> made more efficient by extracting and appending all non-whitespace characters
> that are available in the buffer in chunks. See the definition of the
> operator here:
> http://svn.apache.org/viewvc/stdcxx/trunk/include/istream.cc?revision=383265
--
This message is automatically generated by JIRA.
-
You can reply to this email to add a comment to the issue online.