This is an automated email from the ASF dual-hosted git repository.
chaokunyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fury.git
The following commit(s) were added to refs/heads/main by this push:
new caf0e48b feat(C++): The use of SIMD accelerated to implement and
optimize utf16 utf8 (#1732)
caf0e48b is described below
commit caf0e48bafb36ab5e1f617e9c02b98be321a4631
Author: PAN <[email protected]>
AuthorDate: Mon Jul 15 12:55:01 2024 +0800
feat(C++): The use of SIMD accelerated to implement and optimize utf16 utf8
(#1732)
<!--
**Thanks for contributing to Fury.**
**If this is your first time opening a PR on fury, you can refer to
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).**
Contribution Checklist
- The **Apache Fury (incubating)** community has restrictions on the
naming of pr titles. You can also find instructions in
[CONTRIBUTING.md](https://github.com/apache/fury/blob/main/CONTRIBUTING.md).
- Fury has a strong focus on performance. If the PR you submit will have
an impact on performance, please benchmark it first and provide the
benchmark result here.
-->
## What does this PR do?
Use SIMD acceleration to implement and optimize utf16 to utf8.
Adapter x86, arm and risc-v
Add 8 tests verify the function
<img width="261" alt="fury_cpp_simd_utf_1"
src="https://github.com/user-attachments/assets/029fe6ea-b4be-4e26-85d2-3c5e02e64899">
And efficiency has improved
<img width="401" alt="fury_cpp_simd_utf_2"
src="https://github.com/user-attachments/assets/6e86c125-f5a2-46df-b3bd-3d12496e9238">
done.
## Related issues
Closes #1546
<!--
Is there any related issue? Please attach here.
- #xxxx0
- #xxxx1
- #xxxx2
-->
## Does this PR introduce any user-facing change?
<!--
If any user-facing interface changes, please [open an
issue](https://github.com/apache/fury/issues/new/choose) describing the
need to do so and update the document if necessary.
-->
- [x] Does this PR introduce any public API change?
- [ ] Does this PR introduce any binary protocol compatibility change?
---
cpp/fury/util/string_util.cc | 304 +++++++++++++++++++++++++++++++++++++-
cpp/fury/util/string_util.h | 2 +
cpp/fury/util/string_util_test.cc | 200 +++++++++++++++++++++++++
3 files changed, 503 insertions(+), 3 deletions(-)
diff --git a/cpp/fury/util/string_util.cc b/cpp/fury/util/string_util.cc
index 1f57b76f..5413c72a 100644
--- a/cpp/fury/util/string_util.cc
+++ b/cpp/fury/util/string_util.cc
@@ -27,8 +27,37 @@
#include <riscv_vector.h>
#endif
+#include <chrono>
+#include <string>
+
namespace fury {
+// Swap bytes to convert from big endian to little endian
+inline uint16_t swapBytes(uint16_t value) {
+ return (value >> 8) | (value << 8);
+}
+
+inline void utf16ToUtf8(uint16_t code_unit, char *&output) {
+ if (code_unit < 0x80) {
+ *output++ = static_cast<char>(code_unit);
+ } else if (code_unit < 0x800) {
+ *output++ = static_cast<char>(0xC0 | (code_unit >> 6));
+ *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+ } else {
+ *output++ = static_cast<char>(0xE0 | (code_unit >> 12));
+ *output++ = static_cast<char>(0x80 | ((code_unit >> 6) & 0x3F));
+ *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+ }
+}
+
+inline void utf16SurrogatePairToUtf8(uint16_t high, uint16_t low, char *&utf8)
{
+ uint32_t code_point = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
+ *utf8++ = static_cast<char>((code_point >> 18) | 0xF0);
+ *utf8++ = static_cast<char>(((code_point >> 12) & 0x3F) | 0x80);
+ *utf8++ = static_cast<char>(((code_point >> 6) & 0x3F) | 0x80);
+ *utf8++ = static_cast<char>((code_point & 0x3F) | 0x80);
+}
+
#if defined(__x86_64__) || defined(_M_X64)
bool isLatin(const std::string &str) {
@@ -55,6 +84,90 @@ bool isLatin(const std::string &str) {
return true;
}
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian) {
+ std::string utf8;
+ utf8.reserve(utf16.size() *
+ 3); // Reserve enough space to avoid frequent reallocations
+
+ const __m256i limit1 = _mm256_set1_epi16(0x80);
+ const __m256i limit2 = _mm256_set1_epi16(0x800);
+ const __m256i surrogate_high_start = _mm256_set1_epi16(0xD800);
+ const __m256i surrogate_high_end = _mm256_set1_epi16(0xDBFF);
+ const __m256i surrogate_low_start = _mm256_set1_epi16(0xDC00);
+ const __m256i surrogate_low_end = _mm256_set1_epi16(0xDFFF);
+
+ char buffer[64]; // Buffer to hold temporary UTF-8 bytes
+ char *output = buffer;
+
+ size_t i = 0;
+ size_t n = utf16.size();
+
+ while (i + 16 <= n) {
+ __m256i in =
+ _mm256_loadu_si256(reinterpret_cast<const __m256i *>(utf16.data() +
i));
+
+ if (!is_little_endian) {
+ in = _mm256_or_si256(
+ _mm256_slli_epi16(in, 8),
+ _mm256_srli_epi16(in, 8)); // Swap bytes for big-endian
+ }
+
+ __m256i mask1 = _mm256_cmpgt_epi16(in, limit1);
+ __m256i mask2 = _mm256_cmpgt_epi16(in, limit2);
+ __m256i high_surrogate_mask =
+ _mm256_and_si256(_mm256_cmpgt_epi16(in, surrogate_high_start),
+ _mm256_cmpgt_epi16(in, surrogate_high_end));
+ __m256i low_surrogate_mask =
+ _mm256_and_si256(_mm256_cmpgt_epi16(in, surrogate_low_start),
+ _mm256_cmpgt_epi16(in, surrogate_low_end));
+
+ if (_mm256_testz_si256(mask1, mask1)) {
+ // All values < 0x80, 1 byte per character
+ for (int j = 0; j < 16; ++j) {
+ *output++ = static_cast<char>(utf16[i + j]);
+ }
+ } else if (_mm256_testz_si256(mask2, mask2)) {
+ // All values < 0x800, 2 bytes per character
+ for (int j = 0; j < 16; ++j) {
+ utf16ToUtf8(utf16[i + j], output);
+ }
+ } else {
+ // Mix of 1, 2, and 3 byte characters
+ for (int j = 0; j < 16; ++j) {
+ if (_mm256_testz_si256(high_surrogate_mask, high_surrogate_mask) &&
+ j + 1 < 16 &&
+ !_mm256_testz_si256(low_surrogate_mask, low_surrogate_mask)) {
+ // Surrogate pair
+ utf16SurrogatePairToUtf8(utf16[i + j], utf16[i + j + 1], output);
+ ++j;
+ } else {
+ utf16ToUtf8(utf16[i + j], output);
+ }
+ }
+ }
+
+ utf8.append(buffer, output - buffer);
+ output = buffer; // Reset output buffer pointer
+ i += 16;
+ }
+
+ // Handle remaining characters
+ while (i < n) {
+ if (i + 1 < n && utf16[i] >= 0xD800 && utf16[i] <= 0xDBFF &&
+ utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+ // Surrogate pair
+ utf16SurrogatePairToUtf8(utf16[i], utf16[i + 1], output);
+ ++i;
+ } else {
+ utf16ToUtf8(utf16[i], output);
+ }
+ ++i;
+ }
+ utf8.append(buffer, output - buffer);
+
+ return utf8;
+}
+
#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
bool isLatin(const std::string &str) {
@@ -80,6 +193,77 @@ bool isLatin(const std::string &str) {
return true;
}
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian) {
+ std::string utf8;
+ utf8.reserve(utf16.size() * 3);
+
+ uint16x8_t limit1 = vdupq_n_u16(0x80);
+ uint16x8_t limit2 = vdupq_n_u16(0x800);
+ uint16x8_t surrogate_high_start = vdupq_n_u16(0xD800);
+ uint16x8_t surrogate_high_end = vdupq_n_u16(0xDBFF);
+ uint16x8_t surrogate_low_start = vdupq_n_u16(0xDC00);
+ uint16x8_t surrogate_low_end = vdupq_n_u16(0xDFFF);
+
+ char buffer[64];
+ char *output = buffer;
+ size_t i = 0;
+ size_t n = utf16.size();
+
+ while (i + 8 <= n) {
+ uint16x8_t in =
+ vld1q_u16(reinterpret_cast<const uint16_t *>(utf16.data() + i));
+ if (!is_little_endian) {
+ in = vorrq_u16(vshlq_n_u16(in, 8),
+ vshrq_n_u16(in, 8)); // Swap bytes for big-endian
+ }
+
+ uint16x8_t mask1 = vcgtq_u16(in, limit1);
+ uint16x8_t mask2 = vcgtq_u16(in, limit2);
+ uint16x8_t high_surrogate_mask = vandq_u16(
+ vcgtq_u16(in, surrogate_high_start), vcltq_u16(in,
surrogate_high_end));
+ uint16x8_t low_surrogate_mask = vandq_u16(
+ vcgtq_u16(in, surrogate_low_start), vcltq_u16(in, surrogate_low_end));
+
+ if (vmaxvq_u16(mask1) == 0) {
+ for (int j = 0; j < 8; ++j) {
+ *output++ = static_cast<char>(utf16[i + j]);
+ }
+ } else if (vmaxvq_u16(mask2) == 0) {
+ for (int j = 0; j < 8; ++j) {
+ utf16ToUtf8(utf16[i + j], output);
+ }
+ } else {
+ for (int j = 0; j < 8; ++j) {
+ if (vmaxvq_u16(high_surrogate_mask) == 0 && j + 1 < 8 &&
+ vmaxvq_u16(low_surrogate_mask) != 0) {
+ utf16SurrogatePairToUtf8(utf16[i + j], utf16[i + j + 1], output);
+ ++j;
+ } else {
+ utf16ToUtf8(utf16[i + j], output);
+ }
+ }
+ }
+
+ utf8.append(buffer, output - buffer);
+ output = buffer;
+ i += 8;
+ }
+
+ while (i < n) {
+ if (i + 1 < n && utf16[i] >= 0xD800 && utf16[i] <= 0xDBFF &&
+ utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+ utf16SurrogatePairToUtf8(utf16[i], utf16[i + 1], output);
+ ++i;
+ } else {
+ utf16ToUtf8(utf16[i], output);
+ }
+ ++i;
+ }
+ utf8.append(buffer, output - buffer);
+
+ return utf8;
+}
+
#elif defined(__riscv) && __riscv_vector
bool isLatin(const std::string &str) {
@@ -87,11 +271,11 @@ bool isLatin(const std::string &str) {
size_t len = str.size();
size_t i = 0;
+ auto latin_mask = vmv_v_x_u8m1(0x80, 16);
for (; i + 16 <= len; i += 16) {
auto chars = vle8_v_u8m1(reinterpret_cast<const uint8_t *>(data + i), 16);
- auto mask = vmv_v_x_u8m1(0x80, 16);
- auto result = vand_vv_u8m1(chars, mask, 16);
- if (vmax_v_u8m1(result, 16) != 0) {
+ auto result = vand_vv_u8m1(chars, latin_mask, 16);
+ if (vfirst_m_b8(vmsne_vx_u8m1_b8(result, 0, 16))) {
return false;
}
}
@@ -105,6 +289,82 @@ bool isLatin(const std::string &str) {
return true;
}
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian) {
+ std::string utf8;
+ utf8.reserve(utf16.size() * 3);
+
+ auto limit1 = vmv_v_x_u16m1(0x80, 8);
+ auto limit2 = vmv_v_x_u16m1(0x800, 8);
+ auto surrogate_high_start = vmv_v_x_u16m1(0xD800, 8);
+ auto surrogate_high_end = vmv_v_x_u16m1(0xDBFF, 8);
+ auto surrogate_low_start = vmv_v_x_u16m1(0xDC00, 8);
+ auto surrogate_low_end = vmv_v_x_u16m1(0xDFFF, 8);
+
+ char buffer[48];
+ char *output = buffer;
+ size_t i = 0;
+ size_t n = utf16.size();
+
+ while (i + 8 <= n) {
+ auto in =
+ vle16_v_u16m1(reinterpret_cast<const uint16_t *>(utf16.data() + i), 8);
+ if (!is_little_endian) {
+ in = vor_vv_u16m1(vsrl_vx_u16m1(in, 8, 8), vsll_vx_u16m1(in, 8, 8), 8);
+ }
+
+ auto mask1 = vmsgt_vx_u16m1(in, 0x80, 8);
+ auto mask2 = vmsgt_vx_u16m1(in, 0x800, 8);
+ auto high_surrogate_mask = vmand_vv_u16m1(vmsgt_vx_u16m1(in, 0xD800, 8),
+ vmslt_vx_u16m1(in, 0xDBFF, 8),
8);
+ auto low_surrogate_mask = vmand_vv_u16m1(vmsgt_vx_u16m1(in, 0xDC00, 8),
+ vmslt_vx_u16m1(in, 0xDFFF, 8), 8);
+
+ if (vmslt_vx_u16m1(mask1, 0, 8)) {
+ for (int j = 0; j < 8; ++j) {
+ *output++ = static_cast<char>(vget_vx_u16m1(in, j));
+ }
+ } else if (vmslt_vx_u16m1(mask2, 0, 8)) {
+ for (int j = 0; j < 8; ++j) {
+ utf16ToUtf8(vget_vx_u16m1(in, j), output);
+ }
+ } else {
+ for (int j = 0; j < 8; ++j) {
+ if (vfirst_m_b8(
+ vmand_vv_b8(high_surrogate_mask,
+ vmsne_vx_u8m1_b8(vmv_v_x_u8m1(0, 8), 0, 8))) &&
+ j + 1 < 8 &&
+ vfirst_m_b8(
+ vmand_vv_b8(low_surrogate_mask,
+ vmsne_vx_u8m1_b8(vmv_v_x_u8m1(0, 8), 0, 8)))) {
+ utf16SurrogatePairToUtf8(vget_vx_u16m1(in, j),
+ vget_vx_u16m1(in, j + 1), output);
+ ++j;
+ } else {
+ utf16ToUtf8(vget_vx_u16m1(in, j), output);
+ }
+ }
+ }
+
+ utf8.append(buffer, output - buffer);
+ output = buffer;
+ i += 8;
+ }
+
+ while (i < n) {
+ if (i + 1 < n && utf16[i] >= 0xD800 && utf16[i] <= 0xDBFF &&
+ utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+ utf16SurrogatePairToUtf8(utf16[i], utf16[i + 1], output);
+ ++i;
+ } else {
+ utf16ToUtf8(utf16[i], output);
+ }
+ ++i;
+ }
+ utf8.append(buffer, output - buffer);
+
+ return utf8;
+}
+
#else
bool isLatin(const std::string &str) {
@@ -116,6 +376,44 @@ bool isLatin(const std::string &str) {
return true;
}
+// Fallback implementation without SIMD acceleration
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian) {
+ std::string utf8;
+ utf8.reserve(utf16.size() *
+ 3); // Reserve enough space to avoid frequent reallocations
+
+ size_t i = 0;
+ size_t n = utf16.size();
+ char buffer[4]; // Buffer to hold temporary UTF-8 bytes
+ char *output = buffer;
+
+ while (i < n) {
+ uint16_t code_unit = utf16[i];
+ if (!is_little_endian) {
+ code_unit = swapBytes(code_unit);
+ }
+ if (i + 1 < n && code_unit >= 0xD800 && code_unit <= 0xDBFF &&
+ utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+ // Surrogate pair
+ uint16_t high = code_unit;
+ uint16_t low = utf16[i + 1];
+ if (!is_little_endian) {
+ low = swapBytes(low);
+ }
+ utf16SurrogatePairToUtf8(high, low, output);
+ utf8.append(buffer, output - buffer);
+ output = buffer;
+ ++i;
+ } else {
+ utf16ToUtf8(code_unit, output);
+ utf8.append(buffer, output - buffer);
+ output = buffer;
+ }
+ ++i;
+ }
+ return utf8;
+}
+
#endif
} // namespace fury
diff --git a/cpp/fury/util/string_util.h b/cpp/fury/util/string_util.h
index 0824d1a2..9cb4cc7e 100644
--- a/cpp/fury/util/string_util.h
+++ b/cpp/fury/util/string_util.h
@@ -25,4 +25,6 @@ namespace fury {
bool isLatin(const std::string &str);
+std::string utf16ToUtf8(const std::u16string &utf16, bool is_little_endian);
+
} // namespace fury
diff --git a/cpp/fury/util/string_util_test.cc
b/cpp/fury/util/string_util_test.cc
index 045454db..9b2213b9 100644
--- a/cpp/fury/util/string_util_test.cc
+++ b/cpp/fury/util/string_util_test.cc
@@ -18,7 +18,9 @@
*/
#include <chrono>
+#include <codecvt>
#include <iostream>
+#include <locale>
#include <random>
#include "fury/util/logging.h"
@@ -98,6 +100,204 @@ TEST(StringUtilTest, TestIsLatinLogic) {
EXPECT_FALSE(isLatin("Javaone Keynote\u1234"));
}
+// Generate random UTF-16 string ensuring valid surrogate pairs
+std::u16string generateRandomUTF16String(size_t length) {
+ std::u16string str;
+ std::mt19937 generator(std::random_device{}());
+ std::uniform_int_distribution<uint32_t> distribution(0, 0x10FFFF);
+
+ while (str.size() < length) {
+ uint32_t code_point = distribution(generator);
+
+ if (code_point <= 0xD7FF ||
+ (code_point >= 0xE000 && code_point <= 0xFFFF)) {
+ str.push_back(static_cast<char16_t>(code_point));
+ } else if (code_point >= 0x10000 && code_point <= 0x10FFFF) {
+ code_point -= 0x10000;
+ str.push_back(static_cast<char16_t>((code_point >> 10) + 0xD800));
+ str.push_back(static_cast<char16_t>((code_point & 0x3FF) + 0xDC00));
+ }
+ }
+
+ return str;
+}
+
+// Basic implementation
+
+// Swap bytes to convert from big endian to little endian
+inline uint16_t swapBytes(uint16_t value) {
+ return (value >> 8) | (value << 8);
+}
+
+inline void utf16ToUtf8(uint16_t code_unit, char *&output) {
+ if (code_unit < 0x80) {
+ *output++ = static_cast<char>(code_unit);
+ } else if (code_unit < 0x800) {
+ *output++ = static_cast<char>(0xC0 | (code_unit >> 6));
+ *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+ } else {
+ *output++ = static_cast<char>(0xE0 | (code_unit >> 12));
+ *output++ = static_cast<char>(0x80 | ((code_unit >> 6) & 0x3F));
+ *output++ = static_cast<char>(0x80 | (code_unit & 0x3F));
+ }
+}
+
+inline void utf16SurrogatePairToUtf8(uint16_t high, uint16_t low, char *&utf8)
{
+ uint32_t code_point = 0x10000 + ((high - 0xD800) << 10) + (low - 0xDC00);
+ *utf8++ = static_cast<char>((code_point >> 18) | 0xF0);
+ *utf8++ = static_cast<char>(((code_point >> 12) & 0x3F) | 0x80);
+ *utf8++ = static_cast<char>(((code_point >> 6) & 0x3F) | 0x80);
+ *utf8++ = static_cast<char>((code_point & 0x3F) | 0x80);
+}
+
+std::string utf16ToUtf8BaseLine(const std::u16string &utf16,
+ bool is_little_endian) {
+ std::string utf8;
+ utf8.reserve(utf16.size() *
+ 3); // Reserve enough space to avoid frequent reallocations
+
+ size_t i = 0;
+ size_t n = utf16.size();
+ char buffer[4]; // Buffer to hold temporary UTF-8 bytes
+ char *output = buffer;
+
+ while (i < n) {
+ uint16_t code_unit = utf16[i];
+ if (!is_little_endian) {
+ code_unit = swapBytes(code_unit);
+ }
+ if (i + 1 < n && code_unit >= 0xD800 && code_unit <= 0xDBFF &&
+ utf16[i + 1] >= 0xDC00 && utf16[i + 1] <= 0xDFFF) {
+ // Surrogate pair
+ uint16_t high = code_unit;
+ uint16_t low = utf16[i + 1];
+ if (!is_little_endian) {
+ low = swapBytes(low);
+ }
+ utf16SurrogatePairToUtf8(high, low, output);
+ utf8.append(buffer, output - buffer);
+ output = buffer;
+ ++i;
+ } else {
+ utf16ToUtf8(code_unit, output);
+ utf8.append(buffer, output - buffer);
+ output = buffer;
+ }
+ ++i;
+ }
+ return utf8;
+}
+
+// Testing Basic Logic
+TEST(UTF16ToUTF8Test, BasicConversion) {
+ std::u16string utf16 = u"Hello, δΈη!";
+ std::string utf8 = fury::utf16ToUtf8(utf16, true);
+ ASSERT_EQ(utf8, u8"Hello, δΈη!");
+}
+
+// Testing Empty String
+TEST(UTF16ToUTF8Test, EmptyString) {
+ std::u16string utf16 = u"";
+ std::string utf8 = fury::utf16ToUtf8(utf16, true);
+ ASSERT_EQ(utf8, "");
+}
+
+// Testing emoji
+TEST(UTF16ToUTF8Test, SurrogatePairs) {
+ std::u16string utf16 = {0xD83D, 0xDE00}; // π emoji
+ std::string utf8 = fury::utf16ToUtf8(utf16, true);
+ ASSERT_EQ(utf8, "\xF0\x9F\x98\x80");
+}
+
+// Testing Boundary
+TEST(UTF16ToUTF8Test, BoundaryValues) {
+ std::u16string utf16 = {0x0000, 0xFFFF};
+ std::string utf8 = fury::utf16ToUtf8(utf16, true);
+ std::string expected_utf8 = std::string("\x00", 1) + "\xEF\xBF\xBF";
+ ASSERT_EQ(utf8, expected_utf8);
+}
+
+// Testing Special Characters
+TEST(UTF16ToUTF8Test, SpecialCharacters) {
+ std::u16string utf16 = u" \n\t";
+ std::string utf8 = fury::utf16ToUtf8(utf16, true);
+ ASSERT_EQ(utf8, " \n\t");
+}
+
+// Testing LittleEndian
+TEST(UTF16ToUTF8Test, LittleEndian) {
+ std::u16string utf16 = {0x61, 0x62}; // "ab"
+ std::string utf8 = fury::utf16ToUtf8(utf16, true);
+ ASSERT_EQ(utf8, "ab");
+}
+
+// Testing BigEndian
+TEST(UTF16ToUTF8Test, BigEndian) {
+ std::u16string utf16 = {0xFFFE, 0xFFFE};
+ std::string utf8 = fury::utf16ToUtf8(utf16, false);
+ ASSERT_EQ(utf8, "\xEF\xBF\xBE\xEF\xBF\xBE");
+}
+
+// Testing Performance
+TEST(UTF16ToUTF8Test, PerformanceTest) {
+ const size_t num_tests = 1000;
+ const size_t string_length = 1000;
+ // Default little_endian
+ bool is_little_endian = true;
+
+ // Random UTF-16
+ std::vector<std::u16string> test_strings;
+ for (size_t i = 0; i < num_tests; ++i) {
+ test_strings.push_back(generateRandomUTF16String(string_length));
+ }
+
+ // Lib
+ try {
+ auto start_time = std::chrono::high_resolution_clock::now();
+ for (const auto &str : test_strings) {
+ std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
convert;
+ std::string utf8 = convert.to_bytes(str);
+ }
+ auto end_time = std::chrono::high_resolution_clock::now();
+ auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+ end_time - start_time)
+ .count();
+ FURY_LOG(INFO) << "Standard library Running Time: " << duration << " ns";
+ } catch (const std::exception &e) {
+ FURY_LOG(FATAL) << "Caught exception: " << e.what();
+ }
+
+ // BaseLine
+ try {
+ auto start_time = std::chrono::high_resolution_clock::now();
+ for (const auto &str : test_strings) {
+ std::string utf8 = utf16ToUtf8BaseLine(str, is_little_endian);
+ }
+ auto end_time = std::chrono::high_resolution_clock::now();
+ auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+ end_time - start_time)
+ .count();
+ FURY_LOG(INFO) << "Baseline Running Time: " << duration << " ns";
+ } catch (const std::exception &e) {
+ FURY_LOG(FATAL) << "Caught exception: " << e.what();
+ }
+
+ // SIMD
+ try {
+ auto start_time = std::chrono::high_resolution_clock::now();
+ for (const auto &str : test_strings) {
+ std::string utf8 = fury::utf16ToUtf8(str, is_little_endian);
+ }
+ auto end_time = std::chrono::high_resolution_clock::now();
+ auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(
+ end_time - start_time)
+ .count();
+ FURY_LOG(INFO) << "SIMD Running Time: " << duration << " ns";
+ } catch (const std::exception &e) {
+ FURY_LOG(FATAL) << "Caught exception: " << e.what();
+ }
+}
+
} // namespace fury
int main(int argc, char **argv) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]