Author: dchapyshev
Date: Sat Aug 27 20:18:18 2016
New Revision: 72476

URL: http://svn.reactos.org/svn/reactos?rev=72476&view=rev
Log:
[RTL]
- Implement IS_TEXT_UNICODE_REVERSE_ASCII16, IS_TEXT_UNICODE_REVERSE_STATISTICS 
flags
- Fix IS_TEXT_UNICODE_STATISTICS

* It fixes 2 tests for RtlIsTextUnicode function. Now all rtlstr in winetest 
are passed successfully

Modified:
    trunk/reactos/sdk/lib/rtl/unicode.c

Modified: trunk/reactos/sdk/lib/rtl/unicode.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/sdk/lib/rtl/unicode.c?rev=72476&r1=72475&r2=72476&view=diff
==============================================================================
--- trunk/reactos/sdk/lib/rtl/unicode.c [iso-8859-1] (original)
+++ trunk/reactos/sdk/lib/rtl/unicode.c [iso-8859-1] Sat Aug 27 20:18:18 2016
@@ -1245,6 +1245,12 @@
     const WCHAR *s = buf;
     int i;
     unsigned int flags = MAXULONG, out_flags = 0;
+    UCHAR last_lo_byte = 0;
+    UCHAR last_hi_byte = 0;
+    ULONG hi_byte_diff = 0;
+    ULONG lo_byte_diff = 0;
+    ULONG weight = 3;
+    ULONG lead_byte = 0;
 
     if (len < sizeof(WCHAR))
     {
@@ -1279,19 +1285,47 @@
     if (*s == 0xFEFF) out_flags |= IS_TEXT_UNICODE_SIGNATURE;
     if (*s == 0xFFFE) out_flags |= IS_TEXT_UNICODE_REVERSE_SIGNATURE;
 
+    for (i = 0; i < len; i++)
+    {
+        UCHAR lo_byte = LOBYTE(s[i]);
+        UCHAR hi_byte = HIBYTE(s[i]);
+
+        lo_byte_diff += max(lo_byte, last_lo_byte) - min(lo_byte, 
last_lo_byte);
+        hi_byte_diff += max(hi_byte, last_hi_byte) - min(hi_byte, 
last_hi_byte);
+
+        last_lo_byte = lo_byte;
+        last_hi_byte = hi_byte;
+    }
+
+    if (NlsMbCodePageTag)
+    {
+        for (i = 0; i < len; i++)
+        {
+            if (NlsLeadByteInfo[s[i]])
+            {
+                ++lead_byte;
+                ++i;
+            }
+        }
+
+        if (lead_byte)
+        {
+            weight = (len / 2) - 1;
+
+            if (lead_byte < (weight / 3))
+                weight = 3;
+            else if (lead_byte < ((weight * 2) / 3))
+                weight = 2;
+            else
+                weight = 1;
+        }
+    }
+
     /* apply some statistical analysis */
-    if (flags & IS_TEXT_UNICODE_STATISTICS)
-    {
-        int stats = 0;
-
-        /* FIXME: checks only for ASCII characters in the unicode stream */
-        for (i = 0; i < len; i++)
-        {
-            if (s[i] <= 255) stats++;
-        }
-
-        if (stats > len / 2)
-            out_flags |= IS_TEXT_UNICODE_STATISTICS;
+    if ((flags & IS_TEXT_UNICODE_STATISTICS) &&
+        ((weight * hi_byte_diff) < lo_byte_diff))
+    {
+        out_flags |= IS_TEXT_UNICODE_STATISTICS;
     }
 
     /* Check for unicode NULL chars */
@@ -1328,6 +1362,16 @@
                 out_flags |= IS_TEXT_UNICODE_REVERSE_CONTROLS;
                 break;
             }
+        }
+
+        if (hi_byte_diff && !lo_byte_diff)
+        {
+            out_flags |= IS_TEXT_UNICODE_REVERSE_ASCII16;
+        }
+
+        if ((weight * lo_byte_diff) < hi_byte_diff)
+        {
+            out_flags |= IS_TEXT_UNICODE_REVERSE_STATISTICS;
         }
     }
 


Reply via email to