filter/source/xmlfilterdetect/filterdetect.cxx | 7 ++++++- sal/rtl/ustring.cxx | 24 +++++++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-)
New commits: commit ba08fe174660dc4abdda0aec02dc52b9c7fdb7b5 Author: Michael Stahl <[email protected]> Date: Thu Nov 6 18:32:35 2014 +0100 filter: use more appropriate conversion from ASCII for arbitrary input ... with unknown encoding, mapping non-ASCII chars to something that doesn't cause trouble (in FilterDetect::detect()). Change-Id: Ibf2a2e2fd7c0c00e55042d2ccad173fab7a1b0bd diff --git a/filter/source/xmlfilterdetect/filterdetect.cxx b/filter/source/xmlfilterdetect/filterdetect.cxx index a6f0089..b4d614f 100644 --- a/filter/source/xmlfilterdetect/filterdetect.cxx +++ b/filter/source/xmlfilterdetect/filterdetect.cxx @@ -115,7 +115,12 @@ OUString SAL_CALL FilterDetect::detect( com::sun::star::uno::Sequence< com::sun: } if ( nUniPos == 3 || ( nUniPos == 0 && !bTryUtf16 ) ) // UTF-8 or non-Unicode - resultString = OStringToOUString( read_uInt8s_ToOString( *pInStream, nSize ), RTL_TEXTENCODING_UTF8 ); + { + OString const str(read_uInt8s_ToOString(*pInStream, nSize)); + resultString = OUString(str.getStr(), str.getLength(), + RTL_TEXTENCODING_ASCII_US, + RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT|RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT); + } else if ( nUniPos == 2 || bTryUtf16 ) // UTF-16 resultString = read_uInt16s_ToOUString( *pInStream, nSize ); commit aaccd8560e555d7e3cf062ca3672b77702397a70 Author: Michael Stahl <[email protected]> Date: Thu Nov 6 18:31:24 2014 +0100 sal: it is impossible to have non-ASCII char here ...because rtl_ImplGetFastUTF8UnicodeLen has already checked that. Change-Id: I17f2b80f374073934a8f0b1a97099d4dec89ce4e diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index 1d30dcc..3e4f5c3 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -696,10 +696,7 @@ retry: pBuffer = (*ppThis)->buffer; do { - /* Check ASCII range */ - SAL_WARN_IF( ((unsigned char)*pStr) > 127, "rtl.string", - "rtl_string2UString_status() - UTF8 test encoding is wrong" ); - + assert(((unsigned char)*pStr) <= 127); *pBuffer = *pStr; pBuffer++; pStr++; commit 5936a64c2aa275992ce231183b35711d8da74ace Author: Michael Stahl <[email protected]> Date: Thu Nov 6 18:27:25 2014 +0100 sal: do not ignore conversion flags for RTL_TEXTENCODING_ASCII_US Keep the fast path fast, fall back to the text encoder in case there's a fly in the ointment. Change-Id: I94507856a7f3170f770adb741aa1e282d0d2400c diff --git a/sal/rtl/ustring.cxx b/sal/rtl/ustring.cxx index c7622bd..1d30dcc 100644 --- a/sal/rtl/ustring.cxx +++ b/sal/rtl/ustring.cxx @@ -641,23 +641,28 @@ static void rtl_string2UString_status( rtl_uString** ppThis, return; } pBuffer = (*ppThis)->buffer; + sal_Int32 nLenCopy(nLen); + const sal_Char *pStrCopy(pStr); do { /* Check ASCII range */ - SAL_WARN_IF( ((unsigned char)*pStr) > 127, "rtl.string", - "rtl_string2UString_status() - Found char > 127 and RTL_TEXTENCODING_ASCII_US is specified" ); + if (static_cast<unsigned char>(*pStrCopy) > 127) + { + rtl_uString_release(*ppThis); + goto retry; // cancel loop - try again with the converter + } - *pBuffer = *pStr; + *pBuffer = *pStrCopy; pBuffer++; - pStr++; - nLen--; + pStrCopy++; + nLenCopy--; } - while ( nLen ); + while (nLenCopy); if (pInfo != NULL) { *pInfo = 0; } } - else +retry: { rtl_uString* pTemp; rtl_uString* pTemp2 = NULL; _______________________________________________ Libreoffice-commits mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/libreoffice-commits
