Author: sir_richard
Date: Sun Mar  4 17:56:00 2012
New Revision: 56000

URL: http://svn.reactos.org/svn/reactos?rev=56000&view=rev
Log:
[MEGAPERF]: This build introduces the following performance boosts:
- Using a dead stack lookaside list for up to 5 dead kernel thread stacks. 
1500% improvement when rapidly destroying/creating threads, such as during 
second stage setup and many winetests.
- Using a free pool page lookaside list for up to 12 free non-paged or paged 
pool pages. 800% improvement when allocating big pages from the pool, as well 
as during pool expansion.
- Using a bucketized per-processor and local list (in the KPRCB) for block 
sizes between 1 and 32 bytes. 1000% improvement when rapidly allocating/freeing 
small pool allocations, and 8x reduction in pool fragmentation.

Modified:
    trunk/reactos/ntoskrnl/io/iomgr/device.c
    trunk/reactos/ntoskrnl/mm/ARM3/expool.c
    trunk/reactos/ntoskrnl/mm/ARM3/miarm.h
    trunk/reactos/ntoskrnl/mm/ARM3/mminit.c
    trunk/reactos/ntoskrnl/mm/ARM3/pool.c
    trunk/reactos/ntoskrnl/mm/ARM3/procsup.c

Modified: trunk/reactos/ntoskrnl/io/iomgr/device.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/io/iomgr/device.c?rev=56000&r1=55999&r2=56000&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/io/iomgr/device.c [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/io/iomgr/device.c [iso-8859-1] Sun Mar  4 17:56:00 
2012
@@ -367,7 +367,7 @@
     /* We can't unload unless there's an unload handler */
     if (!DriverObject->DriverUnload)
     {
-        DPRINT1("No DriverUnload function! '%wZ' will not be unloaded!\n", 
&DriverObject->DriverName);
+        DPRINT("No DriverUnload function! '%wZ' will not be unloaded!\n", 
&DriverObject->DriverName);
         return;
     }
 

Modified: trunk/reactos/ntoskrnl/mm/ARM3/expool.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/expool.c?rev=56000&r1=55999&r2=56000&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/mm/ARM3/expool.c [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/mm/ARM3/expool.c [iso-8859-1] Sun Mar  4 17:56:00 
2012
@@ -1346,7 +1346,7 @@
 {
     ULONG i;
     PPOOL_DESCRIPTOR PoolDesc;
-    
+
     //
     // Assume all failures
     //
@@ -1414,6 +1414,8 @@
     KIRQL OldIrql;
     USHORT BlockSize, i;
     ULONG OriginalType;
+    PKPRCB Prcb = KeGetCurrentPrcb();
+    PGENERAL_LOOKASIDE LookasideList;
 
     //
     // Some sanity checks
@@ -1560,6 +1562,57 @@
     //
     i = (USHORT)((NumberOfBytes + sizeof(POOL_HEADER) + (POOL_BLOCK_SIZE - 1))
                  / POOL_BLOCK_SIZE);
+
+    //
+    // Handle lookaside list optimization for both paged and nonpaged pool
+    //
+    if (i <= MAXIMUM_PROCESSORS)
+    {
+        //
+        // Try popping it from the per-CPU lookaside list
+        //
+        LookasideList = (PoolType == PagedPool) ?
+                         Prcb->PPPagedLookasideList[i - 1].P :
+                         Prcb->PPNPagedLookasideList[i - 1].P;
+        LookasideList->TotalAllocates++;
+        Entry = 
(PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead);
+        if (!Entry)
+        {
+            //
+            // We failed, try popping it from the global list
+            //
+            LookasideList = (PoolType == PagedPool) ?
+                             Prcb->PPPagedLookasideList[i - 1].L :
+                             Prcb->PPNPagedLookasideList[i - 1].L;
+            LookasideList->TotalAllocates++;
+            Entry = 
(PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead);
+        }
+
+        //
+        // If we were able to pop it, update the accounting and return the 
block
+        //
+        if (Entry)
+        {
+            LookasideList->AllocateHits++;
+
+            //
+            // Get the real entry, write down its pool type, and track it
+            //
+            Entry--;
+            Entry->PoolType = PoolType + 1;
+            ExpInsertPoolTracker(Tag,
+                                 Entry->BlockSize * POOL_BLOCK_SIZE,
+                                 OriginalType);
+
+            //
+            // Return the pool allocation
+            //
+            Entry->PoolTag = Tag;
+            (POOL_FREE_BLOCK(Entry))->Flink = NULL;
+            (POOL_FREE_BLOCK(Entry))->Blink = NULL;
+            return POOL_FREE_BLOCK(Entry);
+        }
+    }
 
     //
     // Loop in the free lists looking for a block if this size. Start with the
@@ -1902,6 +1955,8 @@
     ULONG Tag;
     BOOLEAN Combined = FALSE;
     PFN_NUMBER PageCount, RealPageCount;
+    PKPRCB Prcb = KeGetCurrentPrcb();
+    PGENERAL_LOOKASIDE LookasideList;
 
     //
     // Check if any of the debug flags are enabled
@@ -2073,6 +2128,40 @@
                          Entry->PoolType - 1);
 
     //
+    // Is this allocation small enough to have come from a lookaside list?
+    //
+    if (BlockSize <= MAXIMUM_PROCESSORS)
+    {
+        //
+        // Try pushing it into the per-CPU lookaside list
+        //
+        LookasideList = (PoolType == PagedPool) ?
+                         Prcb->PPPagedLookasideList[BlockSize - 1].P :
+                         Prcb->PPNPagedLookasideList[BlockSize - 1].P;
+        LookasideList->TotalFrees++;
+        if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth)
+        {
+            LookasideList->FreeHits++;
+            InterlockedPushEntrySList(&LookasideList->ListHead, P);
+            return;
+        }
+
+        //
+        // We failed, try to push it into the global lookaside list
+        //
+        LookasideList = (PoolType == PagedPool) ?
+                         Prcb->PPPagedLookasideList[BlockSize - 1].L :
+                         Prcb->PPNPagedLookasideList[BlockSize - 1].L;
+        LookasideList->TotalFrees++;
+        if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth)
+        {
+            LookasideList->FreeHits++;
+            InterlockedPushEntrySList(&LookasideList->ListHead, P);
+            return;
+        }
+    }
+
+    //
     // Get the pointer to the next entry
     //
     NextEntry = POOL_BLOCK(Entry, BlockSize);

Modified: trunk/reactos/ntoskrnl/mm/ARM3/miarm.h
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/miarm.h?rev=56000&r1=55999&r2=56000&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/mm/ARM3/miarm.h [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/mm/ARM3/miarm.h [iso-8859-1] Sun Mar  4 17:56:00 2012
@@ -598,6 +598,8 @@
 extern PVOID MiSessionPoolEnd;     // 0xBE000000
 extern PVOID MiSessionPoolStart;   // 0xBD000000
 extern PVOID MiSessionViewStart;   // 0xBE000000
+extern ULONG MmMaximumDeadKernelStacks;
+extern SLIST_HEADER MmDeadStackSListHead;
 
 BOOLEAN
 FORCEINLINE

Modified: trunk/reactos/ntoskrnl/mm/ARM3/mminit.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/mminit.c?rev=56000&r1=55999&r2=56000&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/mm/ARM3/mminit.c [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/mm/ARM3/mminit.c [iso-8859-1] Sun Mar  4 17:56:00 
2012
@@ -2076,6 +2076,9 @@
         KeInitializeEvent(&MmZeroingPageEvent, SynchronizationEvent, FALSE);
         MmZeroingPageThreadActive = FALSE;
 
+        /* Initialize the dead stack S-LIST */
+        InitializeSListHead(&MmDeadStackSListHead);
+
         //
         // Check if this is a machine with less than 19MB of RAM
         //
@@ -2268,18 +2271,21 @@
         {
             /* Set small system */
             MmSystemSize = MmSmallSystem;
+            MmMaximumDeadKernelStacks = 0;
         }
         else if (MmNumberOfPhysicalPages <= ((19 * _1MB) / PAGE_SIZE))
         {
             /* Set small system and add 100 pages for the cache */
             MmSystemSize = MmSmallSystem;
             MmSystemCacheWsMinimum += 100;
+            MmMaximumDeadKernelStacks = 2;
         }
         else
         {
             /* Set medium system and add 400 pages for the cache */
             MmSystemSize = MmMediumSystem;
             MmSystemCacheWsMinimum += 400;
+            MmMaximumDeadKernelStacks = 5;
         }
 
         /* Check for less than 24MB */

Modified: trunk/reactos/ntoskrnl/mm/ARM3/pool.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/pool.c?rev=56000&r1=55999&r2=56000&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/mm/ARM3/pool.c [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/mm/ARM3/pool.c [iso-8859-1] Sun Mar  4 17:56:00 2012
@@ -27,6 +27,10 @@
 ULONG MmSpecialPoolTag;
 ULONG MmConsumedPoolPercentage;
 BOOLEAN MmProtectFreedNonPagedPool;
+SLIST_HEADER MiNonPagedPoolSListHead;
+ULONG MiNonPagedPoolSListMaximum = 4;
+SLIST_HEADER MiPagedPoolSListHead;
+ULONG MiPagedPoolSListMaximum = 8;
 
 /* PRIVATE FUNCTIONS 
**********************************************************/
 
@@ -276,6 +280,34 @@
     PMMFREE_POOL_ENTRY FreeEntry, FirstEntry;
     PMMPTE PointerPte;
     PAGED_CODE();
+
+    //
+    // Initialize the pool S-LISTs as well as their maximum count. In general,
+    // we'll allow 8 times the default on a 2GB system, and two times the 
default
+    // on a 1GB system.
+    //
+    InitializeSListHead(&MiPagedPoolSListHead);
+    InitializeSListHead(&MiNonPagedPoolSListHead);
+    if (MmNumberOfPhysicalPages >= ((2 * _1GB) /PAGE_SIZE))
+    {
+        MiNonPagedPoolSListMaximum *= 8;
+        MiPagedPoolSListMaximum *= 8;
+    }
+    else if (MmNumberOfPhysicalPages >= (_1GB /PAGE_SIZE))
+    {
+        MiNonPagedPoolSListMaximum *= 2;
+        MiPagedPoolSListMaximum *= 2;
+    }
+
+    //
+    // However if debugging options for the pool are enabled, turn off the 
S-LIST
+    // to reduce the risk of messing things up even more
+    //
+    if (MmProtectFreedNonPagedPool)
+    {
+        MiNonPagedPoolSListMaximum = 0;
+        MiPagedPoolSListMaximum = 0;
+    }
 
     //
     // We keep 4 lists of free pages (4 lists help avoid contention)
@@ -411,6 +443,15 @@
     if ((PoolType & BASE_POOL_TYPE_MASK) == PagedPool)
     {
         //
+        // If only one page is being requested, try to grab it from the S-LIST
+        //
+        if ((SizeInPages == 1) && (ExQueryDepthSList(&MiPagedPoolSListHead)))
+        {
+            BaseVa = InterlockedPopEntrySList(&MiPagedPoolSListHead);
+            if (BaseVa) return BaseVa;
+        }
+
+        //
         // Lock the paged pool mutex
         //
         KeAcquireGuardedMutex(&MmPagedPoolMutex);
@@ -611,6 +652,15 @@
     }
 
     //
+    // If only one page is being requested, try to grab it from the S-LIST
+    //
+    if ((SizeInPages == 1) && (ExQueryDepthSList(&MiNonPagedPoolSListHead)))
+    {
+        BaseVa = InterlockedPopEntrySList(&MiNonPagedPoolSListHead);
+        if (BaseVa) return BaseVa;
+    }
+
+    //
     // Allocations of less than 4 pages go into their individual buckets
     //
     i = SizeInPages - 1;
@@ -861,9 +911,16 @@
         while (!RtlTestBit(MmPagedPoolInfo.EndOfPagedPoolBitmap, End)) End++;
 
         //
-        // Now calculate the total number of pages this allocation spans
+        // Now calculate the total number of pages this allocation spans. If 
it's
+        // only one page, add it to the S-LIST instead of freeing it
         //
         NumberOfPages = End - i + 1;
+        if ((NumberOfPages == 1) &&
+            (ExQueryDepthSList(&MiPagedPoolSListHead) < 
MiPagedPoolSListMaximum))
+        {
+            InterlockedPushEntrySList(&MiPagedPoolSListHead, StartingVa);
+            return 1;
+        }
 
         /* Delete the actual pages */
         PointerPte = MmPagedPoolInfo.FirstPteForPagedPool + i;
@@ -898,10 +955,18 @@
     }
 
     //
-    // Get the first PTE and its corresponding PFN entry
+    // Get the first PTE and its corresponding PFN entry. If this is also the
+    // last PTE, meaning that this allocation was only for one page, push it 
into
+    // the S-LIST instead of freeing it
     //
     StartPte = PointerPte = MiAddressToPte(StartingVa);
     StartPfn = Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber);
+    if ((Pfn1->u3.e1.EndOfAllocation == 1) &&
+        (ExQueryDepthSList(&MiNonPagedPoolSListHead) < 
MiNonPagedPoolSListMaximum))
+    {
+        InterlockedPushEntrySList(&MiNonPagedPoolSListHead, StartingVa);
+        return 1;
+    }
 
     //
     // Loop until we find the last PTE

Modified: trunk/reactos/ntoskrnl/mm/ARM3/procsup.c
URL: 
http://svn.reactos.org/svn/reactos/trunk/reactos/ntoskrnl/mm/ARM3/procsup.c?rev=56000&r1=55999&r2=56000&view=diff
==============================================================================
--- trunk/reactos/ntoskrnl/mm/ARM3/procsup.c [iso-8859-1] (original)
+++ trunk/reactos/ntoskrnl/mm/ARM3/procsup.c [iso-8859-1] Sun Mar  4 17:56:00 
2012
@@ -19,6 +19,8 @@
 
 ULONG MmProcessColorSeed = 0x12345678;
 PMMWSL MmWorkingSetList;
+ULONG MmMaximumDeadKernelStacks = 5;
+SLIST_HEADER MmDeadStackSListHead;
 
 /* PRIVATE FUNCTIONS 
**********************************************************/
 
@@ -235,6 +237,19 @@
     PointerPte--;
 
     //
+    // If this is a small stack, just push the stack onto the dead stack S-LIST
+    //
+    if (!GuiStack)
+    {
+        if (ExQueryDepthSList(&MmDeadStackSListHead) < 
MmMaximumDeadKernelStacks)
+        {
+            Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber);
+            InterlockedPushEntrySList(&MmDeadStackSListHead, 
&Pfn1->u1.NextStackPfn);
+            return;
+        }
+    }
+
+    //
     // Calculate pages used
     //
     StackPages = BYTES_TO_PAGES(GuiStack ?
@@ -303,6 +318,7 @@
     KIRQL OldIrql;
     PFN_NUMBER PageFrameIndex;
     ULONG i;
+    PMMPFN Pfn1;
 
     //
     // Calculate pages needed
@@ -318,6 +334,21 @@
     }
     else
     {
+        //
+        // If the dead stack S-LIST has a stack on it, use it instead of 
allocating
+        // new system PTEs for this stack
+        //
+        if (ExQueryDepthSList(&MmDeadStackSListHead))
+        {
+            Pfn1 = (PMMPFN)InterlockedPopEntrySList(&MmDeadStackSListHead);
+            if (Pfn1)
+            {
+                PointerPte = Pfn1->PteAddress;
+                BaseAddress = MiPteToAddress(++PointerPte);
+                return BaseAddress;
+            }
+        }
+
         //
         // We'll allocate 12K and that's it
         //


Reply via email to