https://git.reactos.org/?p=reactos.git;a=commitdiff;h=0a851eadcb790e4a52bac1e05153d0913e63e896

commit 0a851eadcb790e4a52bac1e05153d0913e63e896
Author:     Katayama Hirofumi MZ <[email protected]>
AuthorDate: Thu Feb 2 10:10:30 2023 +0900
Commit:     GitHub <[email protected]>
CommitDate: Thu Feb 2 10:10:30 2023 +0900

    [NOTEPAD] Speed up notepad loading (#5012)
    
    - Use EM_GETHANDLE/EM_SETHANDLE message to get/set the internal buffer 
handle.
    - Use LocalReAlloc to re-allocate the buffer.
    - Use file mapping to speed up loading.
    - Use also IS_TEXT_UNICODE_REVERSE_STATISTICS for IsTextUnicode.
    CORE-14641
---
 base/applications/notepad/dialog.c |  21 +--
 base/applications/notepad/main.h   |  15 +-
 base/applications/notepad/text.c   | 326 ++++++++++++++++++++-----------------
 3 files changed, 192 insertions(+), 170 deletions(-)

diff --git a/base/applications/notepad/dialog.c 
b/base/applications/notepad/dialog.c
index 6e45d05798e..06618147121 100644
--- a/base/applications/notepad/dialog.c
+++ b/base/applications/notepad/dialog.c
@@ -419,11 +419,9 @@ BOOL DoCloseFile(VOID)
 
 VOID DoOpenFile(LPCTSTR szFileName)
 {
-    static const TCHAR dotlog[] = _T(".LOG");
     HANDLE hFile;
-    LPTSTR pszText = NULL;
-    DWORD dwTextLen;
     TCHAR log[5];
+    HLOCAL hLocal;
 
     /* Close any files and prompt to save changes */
     if (!DoCloseFile())
@@ -437,21 +435,22 @@ VOID DoOpenFile(LPCTSTR szFileName)
         goto done;
     }
 
-    if (!ReadText(hFile, (LPWSTR *)&pszText, &dwTextLen, &Globals.encFile, 
&Globals.iEoln))
+    /* To make loading file quicker, we use the internal handle of EDIT 
control */
+    hLocal = (HLOCAL)SendMessageW(Globals.hEdit, EM_GETHANDLE, 0, 0);
+    if (!ReadText(hFile, &hLocal, &Globals.encFile, &Globals.iEoln))
     {
         ShowLastError();
         goto done;
     }
-    SetWindowText(Globals.hEdit, pszText);
+    SendMessageW(Globals.hEdit, EM_SETHANDLE, (WPARAM)hLocal, 0);
+    /* No need of EM_SETMODIFY and EM_EMPTYUNDOBUFFER here. EM_SETHANDLE does 
instead. */
 
-    SendMessage(Globals.hEdit, EM_SETMODIFY, FALSE, 0);
-    SendMessage(Globals.hEdit, EM_EMPTYUNDOBUFFER, 0, 0);
     SetFocus(Globals.hEdit);
 
     /*  If the file starts with .LOG, add a time/date at the end and set 
cursor after
-     *  See http://support.microsoft.com/?kbid=260563
+     *  See 
http://web.archive.org/web/20090627165105/http://support.microsoft.com/kb/260563
      */
-    if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log, 
dotlog))
+    if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log, 
_T(".LOG")))
     {
         static const TCHAR lf[] = _T("\r\n");
         SendMessage(Globals.hEdit, EM_SETSEL, 
GetWindowTextLength(Globals.hEdit), -1);
@@ -471,8 +470,6 @@ VOID DoOpenFile(LPCTSTR szFileName)
 done:
     if (hFile != INVALID_HANDLE_VALUE)
         CloseHandle(hFile);
-    if (pszText)
-        HeapFree(GetProcessHeap(), 0, pszText);
 }
 
 VOID DIALOG_FileNew(VOID)
@@ -590,7 +587,7 @@ DIALOG_FileSaveAs_Hook(HWND hDlg, UINT msg, WPARAM wParam, 
LPARAM lParam)
 
                 hCombo = GetDlgItem(hDlg, ID_EOLN);
                 if (hCombo)
-                    Globals.iEoln = (int) SendMessage(hCombo, CB_GETCURSEL, 0, 
0);
+                    Globals.iEoln = (EOLN)SendMessage(hCombo, CB_GETCURSEL, 0, 
0);
             }
             break;
     }
diff --git a/base/applications/notepad/main.h b/base/applications/notepad/main.h
index ce7fc850877..e2140c2b52b 100644
--- a/base/applications/notepad/main.h
+++ b/base/applications/notepad/main.h
@@ -47,9 +47,12 @@ typedef enum
 // #define MIN_ENCODING   0
 // #define MAX_ENCODING   3
 
-#define EOLN_CRLF           0
-#define EOLN_LF             1
-#define EOLN_CR             2
+typedef enum
+{
+    EOLN_CRLF = 0, /* "\r\n" */
+    EOLN_LF   = 1, /* "\n" */
+    EOLN_CR   = 2  /* "\r" */
+} EOLN; /* End of line (NewLine) type */
 
 typedef struct
 {
@@ -76,7 +79,7 @@ typedef struct
     TCHAR szStatusBarLineCol[MAX_PATH];
 
     ENCODING encFile;
-    int iEoln;
+    EOLN iEoln;
 
     FINDREPLACE find;
     WNDPROC EditProc;
@@ -89,8 +92,8 @@ extern NOTEPAD_GLOBALS Globals;
 VOID SetFileName(LPCTSTR szFileName);
 
 /* from text.c */
-BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING 
*pencFile, int *piEoln);
-BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING 
encFile, int iEoln);
+BOOL ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln);
+BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING 
encFile, EOLN iEoln);
 
 /* from settings.c */
 void NOTEPAD_LoadSettingsFromRegistry(void);
diff --git a/base/applications/notepad/text.c b/base/applications/notepad/text.c
index 6a83a89d64f..3ce3a889bfe 100644
--- a/base/applications/notepad/text.c
+++ b/base/applications/notepad/text.c
@@ -4,7 +4,7 @@
  *  Copyright 1998,99 Marcel Baur <[email protected]>
  *  Copyright 2002 Sylvain Petreolle <[email protected]>
  *  Copyright 2002 Andriy Palamarchuk
- *  Copyright 2019 Katayama Hirofumi MZ <[email protected]>
+ *  Copyright 2019-2023 Katayama Hirofumi MZ <[email protected]>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -22,31 +22,7 @@
  */
 
 #include "notepad.h"
-
-static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, 
DWORD dwAppendLen)
-{
-    LPWSTR pszNewText;
-
-    if (dwAppendLen > 0)
-    {
-        if (*ppszText)
-        {
-            pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, 
(*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
-        }
-        else
-        {
-            pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * 
sizeof(WCHAR));
-        }
-
-        if (!pszNewText)
-            return FALSE;
-
-        memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * 
sizeof(WCHAR));
-        *ppszText = pszNewText;
-        *pdwTextLen += dwAppendLen;
-    }
-    return TRUE;
-}
+#include <assert.h>
 
 BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
 {
@@ -63,71 +39,156 @@ BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
 
 ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
 {
-    INT flags = IS_TEXT_UNICODE_STATISTICS;
+    INT flags = IS_TEXT_UNICODE_STATISTICS | 
IS_TEXT_UNICODE_REVERSE_STATISTICS;
 
-    if (dwSize <= 1)
+    if (dwSize <= 1 || IsTextNonZeroASCII(pBytes, dwSize))
         return ENCODING_ANSI;
 
-    if (IsTextNonZeroASCII(pBytes, dwSize))
-    {
-        return ENCODING_ANSI;
-    }
-
     if (IsTextUnicode(pBytes, dwSize, &flags))
-    {
         return ENCODING_UTF16LE;
-    }
 
     if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & 
IS_TEXT_UNICODE_ILLEGAL_CHARS))
-    {
         return ENCODING_UTF16BE;
-    }
 
     /* is it UTF-8? */
     if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, 
NULL, 0))
-    {
         return ENCODING_UTF8;
-    }
 
     return ENCODING_ANSI;
 }
 
+static VOID
+ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld)
+{
+    BOOL bPrevCR = FALSE;
+    SIZE_T ichNew, ichOld;
+
+    for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld)
+    {
+        WCHAR ch = pszOld[ichOld];
+
+        if (ch == L'\n')
+        {
+            if (!bPrevCR)
+            {
+                pszNew[ichNew++] = L'\r';
+                pszNew[ichNew++] = L'\n';
+            }
+        }
+        else if (ch == '\r')
+        {
+            pszNew[ichNew++] = L'\r';
+            pszNew[ichNew++] = L'\n';
+        }
+        else
+        {
+            pszNew[ichNew++] = ch;
+        }
+
+        bPrevCR = (ch == L'\r');
+    }
+
+    pszNew[ichNew] = UNICODE_NULL;
+    assert(ichNew == cchNew);
+}
+
+static BOOL
+ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, 
EOLN *piEoln)
+{
+    SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs;
+    LPWSTR pszText = *ppszText;
+    EOLN iEoln;
+    BOOL bPrevCR = FALSE;
+
+    /* Replace '\0' with SPACE. Count newlines. */
+    for (ich = 0; ich < cchText; ++ich)
+    {
+        WCHAR ch = pszText[ich];
+        if (ch == UNICODE_NULL)
+            pszText[ich] = L' ';
+
+        if (ch == L'\n')
+        {
+            if (bPrevCR)
+            {
+                adwEolnCount[EOLN_CR]--;
+                adwEolnCount[EOLN_CRLF]++;
+            }
+            else
+            {
+                adwEolnCount[EOLN_LF]++;
+            }
+        }
+        else if (ch == '\r')
+        {
+            adwEolnCount[EOLN_CR]++;
+        }
+
+        bPrevCR = (ch == L'\r');
+    }
+
+    /* Choose the newline code */
+    if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF])
+        iEoln = EOLN_CR;
+    else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF])
+        iEoln = EOLN_LF;
+    else
+        iEoln = EOLN_CRLF;
+
+    cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF];
+    if (cNonCRLFs != 0)
+    {
+        /* Allocate a buffer for EM_SETHANDLE */
+        SIZE_T cchNew = cchText + cNonCRLFs;
+        HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * 
sizeof(WCHAR));
+        LPWSTR pszNew = LocalLock(hLocal);
+        if (!pszNew)
+        {
+            LocalFree(hLocal);
+            return FALSE; /* Failure */
+        }
+
+        ReplaceNewLines(pszNew, cchNew, pszText, cchText);
+
+        /* Replace with new data */
+        LocalUnlock(*phLocal);
+        LocalFree(*phLocal);
+        *phLocal = hLocal;
+        *ppszText = pszNew;
+        *pcchText = cchNew;
+    }
+
+    *piEoln = iEoln;
+    return TRUE;
+}
+
 BOOL
-ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING 
*pencFile, int *piEoln)
+ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln)
 {
-    DWORD dwSize;
-    LPBYTE pBytes = NULL;
-    LPWSTR pszText;
-    LPWSTR pszAllocText = NULL;
-    DWORD dwPos, i;
-    DWORD dwCharCount;
+    PCHAR pBytes = NULL;
+    LPWSTR pszText, pszNewText = NULL;
+    DWORD dwSize, dwPos;
+    SIZE_T i, cchText, cbContent;
     BOOL bSuccess = FALSE;
-    BYTE b = 0;
     ENCODING encFile = ENCODING_ANSI;
-    int iCodePage = 0;
-    WCHAR szCrlf[2] = {'\r', '\n'};
-    DWORD adwEolnCount[3] = {0, 0, 0};
-
-    *ppszText = NULL;
-    *pdwTextLen = 0;
+    UINT iCodePage;
+    HANDLE hMapping = INVALID_HANDLE_VALUE;
+    HLOCAL hNewLocal;
 
     dwSize = GetFileSize(hFile, NULL);
     if (dwSize == INVALID_FILE_SIZE)
         goto done;
 
-    pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
-    if (!pBytes)
+    hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (hMapping == NULL)
         goto done;
 
-    if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
+    pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize);
+    if (!pBytes)
         goto done;
-    dwPos = 0;
-
-    /* Make sure that there is a NUL character at the end, in any encoding */
-    pBytes[dwSize + 0] = '\0';
-    pBytes[dwSize + 1] = '\0';
 
     /* Look for Byte Order Marks */
+    dwPos = 0;
     if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
     {
         encFile = ENCODING_UTF16LE;
@@ -151,124 +212,85 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD 
*pdwTextLen, ENCODING *pencFile,
     switch(encFile)
     {
     case ENCODING_UTF16BE:
-        for (i = dwPos; i < dwSize-1; i += 2)
-        {
-            b = pBytes[i+0];
-            pBytes[i+0] = pBytes[i+1];
-            pBytes[i+1] = b;
-        }
-        /* fall through */
-
     case ENCODING_UTF16LE:
+    {
+        /* Re-allocate the buffer for EM_SETHANDLE */
         pszText = (LPWSTR) &pBytes[dwPos];
-        dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
+        cchText = (dwSize - dwPos) / sizeof(WCHAR);
+        hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), 
LMEM_MOVEABLE);
+        pszNewText = LocalLock(hNewLocal);
+        if (pszNewText == NULL)
+            goto done;
+
+        *phLocal = hNewLocal;
+        CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR));
+
+        if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */
+        {
+            BYTE tmp, *pb = (LPBYTE)pszNewText;
+            for (i = 0; i < cchText * 2; i += 2)
+            {
+                tmp = pb[i];
+                pb[i] = pb[i + 1];
+                pb[i + 1] = tmp;
+            }
+        }
         break;
+    }
 
     case ENCODING_ANSI:
     case ENCODING_UTF8:
     case ENCODING_UTF8BOM:
-        if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
-            iCodePage = CP_UTF8;
-        else
-            iCodePage = CP_ACP;
+    {
+        iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) 
? CP_UTF8 : CP_ACP);
 
-        if ((dwSize - dwPos) > 0)
+        /* Get ready for ANSI-to-Wide conversion */
+        cbContent = dwSize - dwPos;
+        cchText = 0;
+        if (cbContent > 0)
         {
-            dwCharCount = MultiByteToWideChar(iCodePage, 0, 
(LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
-            if (dwCharCount == 0)
+            cchText = MultiByteToWideChar(iCodePage, 0, &pBytes[dwPos], 
(INT)cbContent, NULL, 0);
+            if (cchText == 0)
                 goto done;
         }
-        else
-        {
-            /* special case for files with no characters (other than BOMs) */
-            dwCharCount = 0;
-        }
 
-        pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 
1) * sizeof(WCHAR));
-        if (!pszAllocText)
+        /* Re-allocate the buffer for EM_SETHANDLE */
+        hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), 
LMEM_MOVEABLE);
+        pszNewText = LocalLock(hNewLocal);
+        if (!pszNewText)
             goto done;
+        *phLocal = hNewLocal;
 
-        if ((dwSize - dwPos) > 0)
+        /* Do ANSI-to-Wide conversion */
+        if (cbContent > 0)
         {
-            if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], 
dwSize - dwPos, pszAllocText, dwCharCount))
+            if (!MultiByteToWideChar(iCodePage, 0,
+                                     &pBytes[dwPos], (INT)cbContent, 
pszNewText, (INT)cchText))
+            {
                 goto done;
+            }
         }
-
-        pszAllocText[dwCharCount] = '\0';
-        pszText = pszAllocText;
         break;
-    DEFAULT_UNREACHABLE;
     }
 
-    dwPos = 0;
-    for (i = 0; i < dwCharCount; i++)
-    {
-        switch(pszText[i])
-        {
-        case '\r':
-            if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
-            {
-                i++;
-                adwEolnCount[EOLN_CRLF]++;
-                break;
-            }
-            /* fall through */
-
-        case '\n':
-            if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
-                return FALSE;
-            if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
-                return FALSE;
-            dwPos = i + 1;
-
-            if (pszText[i] == '\r')
-                adwEolnCount[EOLN_CR]++;
-            else
-                adwEolnCount[EOLN_LF]++;
-            break;
-
-        case '\0':
-            pszText[i] = ' ';
-            break;
-        }
+    DEFAULT_UNREACHABLE;
     }
 
-    if (!*ppszText && (pszText == pszAllocText))
-    {
-        /* special case; don't need to reallocate */
-        *ppszText = pszAllocText;
-        *pdwTextLen = dwCharCount;
-        pszAllocText = NULL;
-    }
-    else
-    {
-        /* append last remaining text */
-        if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
-            return FALSE;
-    }
+    pszNewText[cchText] = UNICODE_NULL;
 
-    /* chose which eoln to use */
-    *piEoln = EOLN_CRLF;
-    if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
-        *piEoln = EOLN_LF;
-    if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
-        *piEoln = EOLN_CR;
-    *pencFile = encFile;
+    if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln))
+        goto done;
 
+    *pencFile = encFile;
     bSuccess = TRUE;
 
 done:
     if (pBytes)
-        HeapFree(GetProcessHeap(), 0, pBytes);
-    if (pszAllocText)
-        HeapFree(GetProcessHeap(), 0, pszAllocText);
-
-    if (!bSuccess && *ppszText)
-    {
-        HeapFree(GetProcessHeap(), 0, *ppszText);
-        *ppszText = NULL;
-        *pdwTextLen = 0;
-    }
+        UnmapViewOfFile(pBytes);
+    if (hMapping != INVALID_HANDLE_VALUE)
+        CloseHandle(hMapping);
+    if (pszNewText)
+        LocalUnlock(*phLocal);
     return bSuccess;
 }
 
@@ -367,7 +389,7 @@ done:
     return bSuccess;
 }
 
-BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING 
encFile, int iEoln)
+BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING 
encFile, EOLN iEoln)
 {
     WCHAR wcBom;
     LPCWSTR pszLF = L"\n";

Reply via email to