https://git.reactos.org/?p=reactos.git;a=commitdiff;h=e85664a3d8dfb955b56b2f5d27a8ccd85db9454b

commit e85664a3d8dfb955b56b2f5d27a8ccd85db9454b
Author:     Katayama Hirofumi MZ <katayama.hirofumi...@gmail.com>
AuthorDate: Sun Aug 18 22:46:56 2019 +0900
Commit:     GitHub <nore...@github.com>
CommitDate: Sun Aug 18 22:46:56 2019 +0900

    [NOTEPAD] Encoding detection (#1852)
    
    CORE-15548
    In notepad, if there is no BOM in the input file, then judge the text 
encoding.
---
 base/applications/notepad/text.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/base/applications/notepad/text.c b/base/applications/notepad/text.c
index 6e26a7ab474..d22cf4817ee 100644
--- a/base/applications/notepad/text.c
+++ b/base/applications/notepad/text.c
@@ -4,6 +4,7 @@
  *  Copyright 1998,99 Marcel Baur <mb...@g26.ethz.ch>
  *  Copyright 2002 Sylvain Petreolle <spetreo...@yahoo.fr>
  *  Copyright 2002 Andriy Palamarchuk
+ *  Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi...@gmail.com>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -47,6 +48,32 @@ static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, 
LPCWSTR pszAppendText, D
     return TRUE;
 }
 
+ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
+{
+    INT flags = IS_TEXT_UNICODE_STATISTICS;
+
+    if (dwSize <= 1)
+        return ENCODING_ANSI;
+
+    if (IsTextUnicode(pBytes, dwSize, &flags))
+    {
+        return ENCODING_UTF16LE;
+    }
+
+    if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & 
IS_TEXT_UNICODE_ILLEGAL_CHARS))
+    {
+        return ENCODING_UTF16BE;
+    }
+
+    /* is it UTF-8? */
+    if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, 
NULL, 0))
+    {
+        return ENCODING_UTF8;
+    }
+
+    return ENCODING_ANSI;
+}
+
 BOOL
 ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING 
*pencFile, int *piEoln)
 {
@@ -98,6 +125,10 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, 
ENCODING *pencFile,
         encFile = ENCODING_UTF8;
         dwPos += 3;
     }
+    else
+    {
+        encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
+    }
 
     switch(encFile)
     {

Reply via email to