After further investigation I found that the order in which the CMyClass instances are destroyed matter. delete[] destroys them in reverse order while the vector destroys them in a forward order.
I could emulate both ways by using placement new: #include <cassert> #include <vector> int SINGLE_ALLOC_SIZE = 21200; int NUMBER_OF_ALLOCS = 21200 * 4; class CMyClass { public: CMyClass() { lpData = new char[SINGLE_ALLOC_SIZE]; assert(lpData); }; ~CMyClass() { delete[] lpData; }; public: char* lpData; }; void strat1() { printf("strat1 start\n"); CMyClass* lpList = new CMyClass[NUMBER_OF_ALLOCS]; printf("after alloc. starting freeing\n"); delete[] lpList; printf("end\n"); } void strat2() { printf("strat2 start\n"); { std::vector<CMyClass> lpList(NUMBER_OF_ALLOCS); printf("after alloc. starting freeing\n"); } printf("end\n"); } void strat3() { printf("strat3 start\n"); void* block = malloc(NUMBER_OF_ALLOCS * sizeof(CMyClass)); for (size_t i = 0; i < NUMBER_OF_ALLOCS; ++i) { void* address = (char*)block + i * sizeof(CMyClass); new (address) CMyClass(); } printf("after alloc. starting freeing\n"); for (size_t i = 0; i < NUMBER_OF_ALLOCS; ++i) { CMyClass* instance = (CMyClass*)((char*)block + (NUMBER_OF_ALLOCS - i - 1) * sizeof(CMyClass)); instance->~CMyClass(); } free(block); printf("end\n"); } void strat4() { printf("strat4 start\n"); void* block = malloc(NUMBER_OF_ALLOCS * sizeof(CMyClass)); for (size_t i = 0; i < NUMBER_OF_ALLOCS; ++i) { void* address = (char*)block + i * sizeof(CMyClass); new (address) CMyClass(); } printf("after alloc. starting freeing\n"); for (size_t i = 0; i < NUMBER_OF_ALLOCS; ++i) { CMyClass* instance = (CMyClass*)((char*)block + i * sizeof(CMyClass)); instance->~CMyClass(); } free(block); printf("end\n"); } int main() { do { strat1(); strat2(); strat3(); strat4(); } while (1); } From: gdal-dev <gdal-dev-boun...@lists.osgeo.org> On Behalf Of Uhrig, Stefan via gdal-dev Sent: Thursday, March 21, 2024 2:17 PM To: gdal-dev@lists.osgeo.org Subject: Re: [gdal-dev] Experience with slowness of free() on Windows with lots of allocations? I was curious and gave it a try. I also saw the bad performance on deallocations, but surprisingly the usage of a std::vector in the outer loop speeds things up considerably. I could still see a peak memory usage of 1.8GiB, so it does not seem as if the compiler did optimize something out. <snip>
_______________________________________________ gdal-dev mailing list gdal-dev@lists.osgeo.org https://lists.osgeo.org/mailman/listinfo/gdal-dev