The branch, master has been updated via b3ffcf8 lib/util/charset smb_panic() on incorrect use of strlen_m_ext via 7fe9bb8 lib/util/charset Add tests for strlen_m_ext() and convert_string_talloc() via d4f3a38 lib/util/charset Add wrapper to allow testing of strlen_m_ext() via 46db53b lib/util/charset correct calculation of UTF8 character sizes from 0e46e09 s3-samr: make getgrgid() failure a little more visible in _samr_CreateDomAlias().
http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit b3ffcf888ca5c72b403e696d556e5af35a95534f Author: Andrew Bartlett <abart...@samba.org> Date: Thu Mar 31 10:32:52 2011 +1100 lib/util/charset smb_panic() on incorrect use of strlen_m_ext This may save a developer some time in the future. Andrew Bartlett Autobuild-User: Andrew Bartlett <abart...@samba.org> Autobuild-Date: Thu Mar 31 02:40:31 CEST 2011 on sn-devel-104 commit 7fe9bb8c8410ce47c6256e2430905988ca3429a0 Author: Andrew Bartlett <abart...@samba.org> Date: Thu Mar 31 10:28:36 2011 +1100 lib/util/charset Add tests for strlen_m_ext() and convert_string_talloc() This convers a few more cases for convert_string_talloc() and introduces tests for strlen_m_ext() across complex multibyte strings. Andrew Bartlett commit d4f3a380e429b9b8a4960a11995846c2b0f8c85d Author: Andrew Bartlett <abart...@samba.org> Date: Thu Mar 31 10:27:41 2011 +1100 lib/util/charset Add wrapper to allow testing of strlen_m_ext() This allows test routines to force in particular character sets, and not rely on the smb.conf. Andrew Bartlett commit 46db53b1596cccdd22cf44dc376e39601f01c29c Author: Andrew Bartlett <abart...@samba.org> Date: Thu Mar 31 10:26:08 2011 +1100 lib/util/charset correct calculation of UTF8 character sizes Characters between 0x800 0x0FFFF are 3 bytes long. Andrew Bartlett ----------------------------------------------------------------------- Summary of changes: lib/util/charset/charset.h | 8 +++ lib/util/charset/tests/convert_string.c | 89 ++++++++++++++++++++++++++++++- lib/util/charset/util_str.c | 36 +++++++++++- 3 files changed, 128 insertions(+), 5 deletions(-) Changeset truncated at 500 lines: diff --git a/lib/util/charset/charset.h b/lib/util/charset/charset.h index 7374d6e..82396e1 100644 --- a/lib/util/charset/charset.h +++ b/lib/util/charset/charset.h @@ -111,6 +111,14 @@ struct smb_iconv_handle; #define strupper(s) strupper_m(s) char *strchr_m(const char *s, char c); +/** + * Calculate the number of units (8 or 16-bit, depending on the + * destination charset), that would be needed to convert the input + * string which is expected to be in in src_charset encoding to the + * destination charset (which should be a unicode charset). + */ +size_t strlen_m_ext_handle(struct smb_iconv_handle *ic, + const char *s, charset_t src_charset, charset_t dst_charset); size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset); size_t strlen_m_ext_term(const char *s, charset_t src_charset, charset_t dst_charset); diff --git a/lib/util/charset/tests/convert_string.c b/lib/util/charset/tests/convert_string.c index d57491c..5c7bdcb 100644 --- a/lib/util/charset/tests/convert_string.c +++ b/lib/util/charset/tests/convert_string.c @@ -180,6 +180,25 @@ static bool test_gd_iso8859_cp850(struct torture_context *tctx) (void *)&gd_output.data, &gd_output.length), "conversion from (dos charset) ISO8859-1 to UTF16LE"); torture_assert_data_blob_equal(tctx, gd_output, gd_utf16le, "conversion from (dos charset) ISO8859-1 to UTF16LE"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)gd_iso8859_1.data, + CH_DOS, CH_UTF16LE), + gd_output.length / 2, + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DOS, CH_UTF8, + gd_iso8859_1.data, gd_iso8859_1.length, + (void *)&gd_output.data, &gd_output.length), + "conversion from (dos charset) ISO8859-1 to UTF8"); + torture_assert_data_blob_equal(tctx, gd_output, gd_utf8, "conversion from (dos charset) ISO8859-1 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)gd_iso8859_1.data, + CH_DOS, CH_UTF8), + gd_output.length, + "checking strlen_m_ext of conversion from (dos charset) ISO8859-1 to UTF8"); return true; } @@ -292,6 +311,41 @@ static bool test_plato_cp850_utf8(struct torture_context *tctx) torture_assert(tctx, iconv_handle, "creating iconv handle"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF16LE, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf16le, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_utf8.data, + CH_UTF8, CH_UTF16LE), + plato_output.length / 2, + "checking strlen_m_ext of conversion of UTF8 to UTF16LE"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_output.data, plato_output.length, + (void *)&plato_output2.data, &plato_output2.length), + "conversion of UTF8 ancient greek to UTF16 failed"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, "conversion from UTF8 to UTF16LE incorrect"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF8, CH_UTF8, + plato_utf8.data, plato_utf8.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, + "conversion of UTF8 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_utf8.data, + CH_UTF8, CH_UTF8), + plato_output.length, + "checking strlen_m_ext of conversion of UTF8 to UTF8"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF8, CH_DOS, plato_utf8.data, plato_utf8.length, (void *)&plato_output.data, &plato_output.length) == false, @@ -322,8 +376,13 @@ static bool test_plato_cp850_utf8(struct torture_context *tctx) plato_utf16le.data, plato_utf16le.length, (void *)&plato_output.data, &plato_output.length), "conversion of UTF16 ancient greek to unix charset UTF8 failed"); - torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to (unix charset) CP850 incorrect"); - + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to (unix charset) UTF8 incorrect"); + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_UTF16LE, CH_UTF8, + plato_utf16le.data, plato_utf16le.length, + (void *)&plato_output.data, &plato_output.length), + "conversion of UTF16 ancient greek to UTF8 failed"); + torture_assert_data_blob_equal(tctx, plato_output, plato_utf8, "conversion from UTF16LE to UTF8 incorrect"); torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, CH_UTF16LE, CH_DISPLAY, plato_utf16le.data, plato_utf16le.length, @@ -338,6 +397,26 @@ static bool test_plato_cp850_utf8(struct torture_context *tctx) "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed"); torture_assert_data_blob_equal(tctx, plato_output2, plato_utf16le, "round trip conversion of UTF16 ancient greek to display charset UTF8 and back again failed"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_output.data, + CH_DISPLAY, CH_UTF16LE), + plato_output2.length / 2, + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); + + torture_assert(tctx, convert_string_talloc_handle(tctx, iconv_handle, + CH_DISPLAY, CH_UTF8, + plato_output.data, plato_output.length, + (void *)&plato_output2.data, &plato_output2.length), + "conversion of display charset UTF8 to UTF8"); + torture_assert_data_blob_equal(tctx, plato_output2, plato_utf8, + "conversion of display charset UTF8 to UTF8"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_output.data, + CH_DISPLAY, CH_UTF8), + plato_output2.length, + "checking strlen_m_ext of conversion of display charset UTF8 to UTF8"); return true; } @@ -402,6 +481,12 @@ static bool test_plato_latin_cp850_utf8(struct torture_context *tctx) "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed"); torture_assert_data_blob_equal(tctx, plato_latin_output2, plato_latin_utf16le, "round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again failed"); + torture_assert_int_equal(tctx, + strlen_m_ext_handle(iconv_handle, + (const char *)plato_latin_output.data, + CH_DISPLAY, CH_UTF16LE), + plato_latin_output2.length / 2, + "checking strlen_m_ext of round trip conversion of UTF16 latin charset greek to display charset UTF8 and back again"); return true; } diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c index 30961d0..f36c91e 100644 --- a/lib/util/charset/util_str.c +++ b/lib/util/charset/util_str.c @@ -140,11 +140,29 @@ _PUBLIC_ bool strcsequal(const char *s1,const char *s2) * string which is expected to be in in src_charset encoding to the * destination charset (which should be a unicode charset). */ -_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset) +_PUBLIC_ size_t strlen_m_ext_handle(struct smb_iconv_handle *ic, + const char *s, charset_t src_charset, charset_t dst_charset) { size_t count = 0; - struct smb_iconv_handle *ic = get_iconv_handle(); +#ifdef DEVELOPER + switch (dst_charset) { + case CH_DOS: + case CH_UNIX: + case CH_DISPLAY: + smb_panic("cannot call strlen_m_ext() with a variable dest charset (must be UTF16* or UTF8)"); + default: + break; + } + + switch (src_charset) { + case CH_UTF16LE: + case CH_UTF16BE: + smb_panic("cannot call strlen_m_ext() with a UTF16 src charset (must be DOS, UNIX, DISPLAY or UTF8)"); + default: + break; + } +#endif if (!s) { return 0; } @@ -184,7 +202,7 @@ _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst count += 1; } else if (c < 0x800) { count += 2; - } else if (c < 0x1000) { + } else if (c < 0x10000) { count += 3; } else { count += 4; @@ -203,6 +221,18 @@ _PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst return count; } +/** + * Calculate the number of units (8 or 16-bit, depending on the + * destination charset), that would be needed to convert the input + * string which is expected to be in in src_charset encoding to the + * destination charset (which should be a unicode charset). + */ +_PUBLIC_ size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset) +{ + struct smb_iconv_handle *ic = get_iconv_handle(); + return strlen_m_ext_handle(ic, s, src_charset, dst_charset); +} + _PUBLIC_ size_t strlen_m_ext_term(const char *s, const charset_t src_charset, const charset_t dst_charset) { -- Samba Shared Repository