Author: hbelusca
Date: Fri Oct 17 22:08:51 2014
New Revision: 64792

URL: http://svn.reactos.org/svn/reactos?rev=64792&view=rev
Log:
[NTVDM]
- Use a helper function for copying little chunks of memory (1, 2, 4 and 8 
bytes) because a profiling of ntvdm showed that especially 1 and 2 bytes of 
memory were read the most, and calling RtlCopy/MoveMemory for intensively 
copying 1 or 2 bytes was shown to be inefficient. We also don't use directly 
intrinsics/builtins because the compiler cannot know in advance the size of the 
memory to be copied, it cannot perform the required optimizations. It was 
checked that using the builtin-memcpy or memmove of GCC when compiling the 
program in release+full optimization mode just embedded a call to _memcpy, and 
naively using the movsX intrinsics of MSVC does not do the job of "moving" 
memory taking into account for the possible overlaps. Therefore, for small 
sizes (<= 8 bytes), we use copy assignments, whereas for large sizes (and for 
3, 5, 7, 9+ bytes) we use the regular method of calling RtlMoveMemory. We gain 
~=10% speed with this optimization.
- Also I use >> and & for dividing by 4 and 2 instead of the regular / and % 
operations because they are not optimized otherwise by default by MSVC (they 
are however, if you explicitely enable optimizations).

Modified:
    trunk/reactos/subsystems/ntvdm/emulator.c
    trunk/reactos/subsystems/ntvdm/io.c

Modified: trunk/reactos/subsystems/ntvdm/emulator.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/ntvdm/emulator.c?rev=64792&r1=64791&r2=64792&view=diff
==============================================================================
--- trunk/reactos/subsystems/ntvdm/emulator.c   [iso-8859-1] (original)
+++ trunk/reactos/subsystems/ntvdm/emulator.c   [iso-8859-1] Fri Oct 17 
22:08:51 2014
@@ -60,6 +60,75 @@
 
 /* PRIVATE FUNCTIONS 
**********************************************************/
 
+static inline VOID
+EmulatorMoveMemory(OUT VOID UNALIGNED *Destination,
+                   IN const VOID UNALIGNED *Source,
+                   IN SIZE_T Length)
+{
+#if 1
+    /*
+     * We use a switch here to detect small moves of memory, as these
+     * constitute the bulk of our moves.
+     * Using RtlMoveMemory for all these small moves would be slow otherwise.
+     */
+    switch (Length)
+    {
+        case 0:
+            return;
+
+        case sizeof(UCHAR):
+            *(PUCHAR)Destination = *(PUCHAR)Source;
+            return;
+
+        case sizeof(USHORT):
+            *(PUSHORT)Destination = *(PUSHORT)Source;
+            return;
+
+        case sizeof(ULONG):
+            *(PULONG)Destination = *(PULONG)Source;
+            return;
+
+        case sizeof(ULONGLONG):
+            *(PULONGLONG)Destination = *(PULONGLONG)Source;
+            return;
+
+        default:
+#if defined(__GNUC__)
+            __builtin_memmove(Destination, Source, Length);
+#else
+            RtlMoveMemory(Destination, Source, Length);
+#endif
+    }
+
+#else // defined(_MSC_VER)
+
+    PUCHAR Dest = (PUCHAR)Destination;
+    PUCHAR Src  = (PUCHAR)Source;
+
+    SIZE_T Count, NewSize = Length;
+
+    /* Move dword */
+    Count   = NewSize >> 2; // NewSize / sizeof(ULONG);
+    NewSize = NewSize  & 3; // NewSize % sizeof(ULONG);
+    __movsd(Dest, Src, Count);
+    Dest += Count << 2; // Count * sizeof(ULONG);
+    Src  += Count << 2;
+
+    /* Move word */
+    Count   = NewSize >> 1; // NewSize / sizeof(USHORT);
+    NewSize = NewSize  & 1; // NewSize % sizeof(USHORT);
+    __movsw(Dest, Src, Count);
+    Dest += Count << 1; // Count * sizeof(USHORT);
+    Src  += Count << 1;
+
+    /* Move byte */
+    Count   = NewSize; // NewSize / sizeof(UCHAR);
+    // NewSize = NewSize; // NewSize % sizeof(UCHAR);
+    __movsb(Dest, Src, Count);
+
+#endif
+}
+
 VOID WINAPI EmulatorReadMemory(PFAST486_STATE State, ULONG Address, PVOID 
Buffer, ULONG Size)
 {
     UNREFERENCED_PARAMETER(State);
@@ -91,7 +160,7 @@
     }
 
     /* Read the data from the virtual address space and store it in the buffer 
*/
-    RtlCopyMemory(Buffer, REAL_TO_PHYS(Address), Size);
+    EmulatorMoveMemory(Buffer, REAL_TO_PHYS(Address), Size);
 }
 
 VOID WINAPI EmulatorWriteMemory(PFAST486_STATE State, ULONG Address, PVOID 
Buffer, ULONG Size)
@@ -112,7 +181,7 @@
     if ((Address + Size) >= ROM_AREA_START && (Address < ROM_AREA_END)) return;
 
     /* Read the data from the buffer and store it in the virtual address space 
*/
-    RtlCopyMemory(REAL_TO_PHYS(Address), Buffer, Size);
+    EmulatorMoveMemory(REAL_TO_PHYS(Address), Buffer, Size);
 
     /*
      * Check if we modified the VGA memory.

Modified: trunk/reactos/subsystems/ntvdm/io.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/subsystems/ntvdm/io.c?rev=64792&r1=64791&r2=64792&view=diff
==============================================================================
--- trunk/reactos/subsystems/ntvdm/io.c [iso-8859-1] (original)
+++ trunk/reactos/subsystems/ntvdm/io.c [iso-8859-1] Fri Oct 17 22:08:51 2014
@@ -100,8 +100,7 @@
     }
     else
     {
-        while (Count--)
-            *Buffer++ = IOReadB(Port);
+        while (Count--) *Buffer++ = IOReadB(Port);
     }
 }
 
@@ -196,8 +195,7 @@
     }
     else
     {
-        while (Count--)
-            *Buffer++ = IOReadW(Port);
+        while (Count--) *Buffer++ = IOReadW(Port);
     }
 }
 
@@ -278,8 +276,7 @@
     }
     else
     {
-        while (Count--)
-            *Buffer++ = IOReadD(Port);
+        while (Count--) *Buffer++ = IOReadD(Port);
     }
 }
 
@@ -379,7 +376,7 @@
     }
     else
     {
-        PBYTE Address = (PBYTE)Buffer;
+        PUCHAR Address = (PUCHAR)Buffer;
 
         while (DataCount--)
         {
@@ -388,8 +385,8 @@
             UCHAR NewDataSize = DataSize;
 
             /* Read dword */
-            Count       = NewDataSize / sizeof(ULONG);
-            NewDataSize = NewDataSize % sizeof(ULONG);
+            Count       = NewDataSize >> 2; // NewDataSize / sizeof(ULONG);
+            NewDataSize = NewDataSize  & 3; // NewDataSize % sizeof(ULONG);
             while (Count--)
             {
                 *(PULONG)Address = IOReadD(CurrentPort);
@@ -398,8 +395,8 @@
             }
 
             /* Read word */
-            Count       = NewDataSize / sizeof(USHORT);
-            NewDataSize = NewDataSize % sizeof(USHORT);
+            Count       = NewDataSize >> 1; // NewDataSize / sizeof(USHORT);
+            NewDataSize = NewDataSize  & 1; // NewDataSize % sizeof(USHORT);
             while (Count--)
             {
                 *(PUSHORT)Address = IOReadW(CurrentPort);
@@ -408,17 +405,14 @@
             }
 
             /* Read byte */
-            Count       = NewDataSize / sizeof(UCHAR);
-            NewDataSize = NewDataSize % sizeof(UCHAR);
+            Count       = NewDataSize; // NewDataSize / sizeof(UCHAR);
+            // NewDataSize = NewDataSize % sizeof(UCHAR);
             while (Count--)
             {
                 *(PUCHAR)Address = IOReadB(CurrentPort);
                 CurrentPort += sizeof(UCHAR);
                 Address     += sizeof(UCHAR);
             }
-
-            ASSERT(Count == 0);
-            ASSERT(NewDataSize == 0);
         }
     }
 }
@@ -457,7 +451,7 @@
     }
     else
     {
-        PBYTE Address = (PBYTE)Buffer;
+        PUCHAR Address = (PUCHAR)Buffer;
 
         while (DataCount--)
         {
@@ -466,8 +460,8 @@
             UCHAR NewDataSize = DataSize;
 
             /* Write dword */
-            Count       = NewDataSize / sizeof(ULONG);
-            NewDataSize = NewDataSize % sizeof(ULONG);
+            Count       = NewDataSize >> 2; // NewDataSize / sizeof(ULONG);
+            NewDataSize = NewDataSize  & 3; // NewDataSize % sizeof(ULONG);
             while (Count--)
             {
                 IOWriteD(CurrentPort, *(PULONG)Address);
@@ -476,8 +470,8 @@
             }
 
             /* Write word */
-            Count       = NewDataSize / sizeof(USHORT);
-            NewDataSize = NewDataSize % sizeof(USHORT);
+            Count       = NewDataSize >> 1; // NewDataSize / sizeof(USHORT);
+            NewDataSize = NewDataSize  & 1; // NewDataSize % sizeof(USHORT);
             while (Count--)
             {
                 IOWriteW(CurrentPort, *(PUSHORT)Address);
@@ -486,17 +480,14 @@
             }
 
             /* Write byte */
-            Count       = NewDataSize / sizeof(UCHAR);
-            NewDataSize = NewDataSize % sizeof(UCHAR);
+            Count       = NewDataSize; // NewDataSize / sizeof(UCHAR);
+            // NewDataSize = NewDataSize % sizeof(UCHAR);
             while (Count--)
             {
                 IOWriteB(CurrentPort, *(PUCHAR)Address);
                 CurrentPort += sizeof(UCHAR);
                 Address     += sizeof(UCHAR);
             }
-
-            ASSERT(Count == 0);
-            ASSERT(NewDataSize == 0);
         }
     }
 }


Reply via email to