Hi, We're having high latency issues when using CURL in our product. We've reduced the problem to the piece of code below. Github project: https://github.com/maor-avni-hpe/curlmultipoc
Our product performs thousands of HTTP requests to Azure/AWS from a machine running on Azure/AWS. The transfer sizes vary from 128K to several MBs. We're using the multi API. The attached program accepts 5 arguments: * URL * Chunk size to download in K * Total GB to download * Batch Size * Maximum Open Handles * The program requests <batch size> downloads, each transfer <chunk size>, until reaching total GB. * The program makes sure that no more than <max open handles> are requested from CURL. * When a download completes its handle is removed and a new one is added until batch is completed. The program is executed on a VM running on Azure, downloading a file from an Azure storage account, to make sure that the network is as predictable as possible. The file downloaded is a 10GB file, although we always request the same first few bytes. We ran it both on Linux and Windows. Linux: Debian 12.8, curl 7.88 Windows: Server 2022, curl 7.88, curl 8.10 The results are for downloading a 5GB file using 128K chunks. The results: * single unlimited batch, 50 handles: Linux: 7s, Windows: 11s. * single unlimited batch, 100 handles: Linux: 6s, Windows 11s. * 50 per batch, 50 handles: Linux 12s, Windows 15s. * 100 per batch, 50 handles: Linux 9s, Windows 14s. * 100 per batch, 100 handles: Linux 36s, Windows 290s. Logging the remaining FDs after each poll shows that when running 100 handles there's 1 handle that takes longer to finish. Our product works in the 100/100 configuration. Under load we get very high latency and low data rates. This test explains why we get those slowdowns. Is there a reasonable explanation for this slowdown? Is it our usage of the curl API? What can be done to improve the performance? #include <iostream> #include <list> #include <set> #include <string> #include <thread> #include <vector> #include <curl/curl.h> struct Chunk { std::string data; size_t size; }; size_t WriteCallback(void* contents, size_t size, size_t nmemb, void* userp) { size_t totalSize = size * nmemb; Chunk* chunk = (Chunk*)userp; // chunk->data.append((char *)contents, totalSize); chunk->size += totalSize; // sleep for 0.1ms return totalSize; } CURL* createCurlHandle(long start, long end, Chunk* chunk, std::string url) { CURL* curl = curl_easy_init(); if (curl) { std::string range = std::to_string(start) + "-" + std::to_string(end); curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 3L); curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1L); curl_easy_setopt(curl, CURLOPT_RANGE, range.c_str()); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteCallback); curl_easy_setopt(curl, CURLOPT_WRITEDATA, chunk); curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 128L * 1024L); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L); // Enable SSL verification curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 2L); // Verify the host } return curl; } int main(int argc, char* argv[]) { if (argc != 6) { std::cerr << "Usage: " << argv[0] << " <URL> <Chunk Size> <Total GB> <Batch Size> <Number of Concurrent Connections>" << std::endl; return 1; } std::string url = argv[1]; long chunkSize = std::stol(argv[2]) * 1024; long totalGB = std::stol(argv[3]); int batchSize = std::stoi(argv[4]); int numConnections = std::stoi(argv[5]); // Initialize CURL curl_global_init(CURL_GLOBAL_DEFAULT); // Get the total size of the file CURL* curl = curl_easy_init(); double fileSize = 0.0; if (curl) { curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); curl_easy_setopt(curl, CURLOPT_HEADER, 1L); curl_easy_setopt(curl, CURLOPT_FILETIME, 1L); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 3L); curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1L); curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, 128L * 1024L); curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); // Enable SSL verification curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L); // Verify the host CURLcode res = curl_easy_perform(curl); if (res == CURLE_OK) { curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &fileSize); } else { // get the http status code long http_code = 0; curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code); std::cerr << "Failed to get file size: " << curl_easy_strerror(res) << " HTTP code: " << http_code << std::endl; return 1; } curl_easy_cleanup(curl); } long long numOfChunks = (long long)totalGB * 1024 * 1024 * 1024 / chunkSize; long long totalBatches; if (batchSize == 0) { batchSize = numOfChunks; totalBatches = 1; } else totalBatches = numOfChunks / batchSize; std::vector<Chunk> chunks; std::set<CURL*> handles; std::cout << "chunks: " << numOfChunks << std::endl; CURLM* multi_handle = curl_multi_init(); // curl_multi_setopt(multi_handle, CURLMOPT_MAXCONNECTS, 0); // curl_multi_setopt(multi_handle, CURLMOPT_MAX_TOTAL_CONNECTIONS, 1); // curl_multi_setopt(multi_handle, CURLMOPT_PIPELINING, 0L); // measure time taken to download clock_t outerStart, outerEnd; outerStart = clock(); std::cout << outerStart << std::endl; int still_running = 0; int maxHandles = 0; int maxPolledFd = 0; long long currentBatch = 0; do { do { // Add new connections while (handles.size() < numConnections && chunks.size() < batchSize) { long start = 0; long end = chunkSize - 1; if (end >= fileSize) end = fileSize - 1; Chunk chunk; auto handle = createCurlHandle(start, end, &chunk, url); handles.insert(handle); curl_multi_add_handle(multi_handle, handle); chunks.push_back(chunk); } if (handles.size() > maxHandles) { maxHandles = handles.size(); } // Perform connections CURLMcode mc = curl_multi_perform(multi_handle, &still_running); // Wait for activity, timeout or "nothing" int currentPolledFd = 0; if (still_running) { // measure call time in milliseconds clock_t start, end; start = clock(); mc = curl_multi_poll(multi_handle, NULL, 0, 1000, nullptr); end = clock(); double time_taken = double(end - start) / double(CLOCKS_PER_SEC); //if (time_taken > 0.1) //std::cout << "Time taken to poll: " << time_taken << " seconds. Still running: " << still_running << std::endl; if (mc != CURLM_OK) { std::cerr << "curl_multi_poll failed: " << curl_multi_strerror(mc) << std::endl; } if (currentPolledFd > maxPolledFd) { maxPolledFd = currentPolledFd; } } int handlesBeforeClean = handles.size(); while (CURLMsg* msg = curl_multi_info_read(multi_handle, &still_running)) { if (msg->msg == CURLMSG_DONE) { // std::cout << "http request done" << std::endl; CURL* handle = msg->easy_handle; CURLcode result = msg->data.result; if (result != CURLE_OK) { std::cerr << "Download failed: " << curl_easy_strerror(result) << std::endl; } handles.erase(handle); curl_multi_remove_handle(multi_handle, handle); curl_easy_cleanup(handle); } } int handlesAfterClean = handles.size(); // print handles before clean, after clean, and max on one line, but only if there isn't any change from previous iteration. // std::cout << "Handles before clean: " << handlesBeforeClean << " Handles after clean: " << handlesAfterClean << " Max handles: " << maxHandles << std::endl; } while (!(!still_running && handles.empty() && chunks.size() >= batchSize)); ++currentBatch; chunks.clear(); } while (currentBatch < totalBatches); outerEnd = clock(); std::cout << outerEnd << std::endl; double time_taken = double(outerEnd - outerStart) / double(CLOCKS_PER_SEC); std::cout << "Time taken to download: " << time_taken << " seconds" << std::endl; // calculate download speed double downloadSpeed = (fileSize / 1024) / time_taken; std::cout << "Download speed: " << downloadSpeed << " KB/s" << std::endl; std::cout << "max polled fd: " << maxPolledFd << std::endl; curl_multi_cleanup(multi_handle); curl_global_cleanup(); // Combine chunks std::string result; for (const auto& chunk : chunks) { result.append(chunk.data); } // Output the result std::cout << "Downloaded data: " << result << std::endl; return 0; } Maor Avni -- Unsubscribe: https://lists.haxx.se/mailman/listinfo/curl-library Etiquette: https://curl.se/mail/etiquette.html