maskit commented on pull request #7719:
URL: https://github.com/apache/trafficserver/pull/7719#issuecomment-826531511


   > That could explain why the utilization is high, but I don't think it is 
the problem here.
   
   I think it is, because erasing random entries is not a right thing to do. We 
shouldn't cut corners.
   
   I found that `find()` (or an equivalent that is internally called inside 
`erase()`) is actually not that fast and it's waste of time if we don't use the 
found entry. The code below is not a very fair comparison, but test1 is the 
original, test2 is the proposed approach, and test3 is another approach. 
Performance of test2 and test3 are pretty similar but test3 erases old entries 
first. Would you try it out?
   ```cpp
   #include <unordered_map>
   #include <random>
   #include <ctime>
   #include <iostream>
   
   constexpr int N = 20000000;
   
   int randomKey()
   {
       return std::rand() / (RAND_MAX / 1000000);
   }
   
   void test1()
   {
       std::unordered_map<int, int> map;
       int miss = 0;
       map.reserve(1000);
       int prev;
       for (int i = 0; i < N; ++i) {
           if (map.size() > 1000) {
               map.erase(prev);
           }
           map.insert({prev = randomKey(), 1});
   
           if (map.find(randomKey()) == map.end()) {
               ++miss;
           }
       }
       std::cout << miss << std::endl;
   }
   void test2()
   {
       std::unordered_map<int, int> map;
       int miss = 0;
       map.reserve(1000);
   
       for (int i = 0; i < N; ++i) {
           if (map.size() > 1000) {
               map.erase(map.begin());
           }
           map.insert({randomKey(), 1});
   
           if (map.find(randomKey()) == map.end()) {
               ++miss;
           }
       }
       std::cout << miss << std::endl;
   }
   void test3()
   {
       std::unordered_map<int, int> map1;
       std::unordered_map<int, int> map2;
       std::unordered_map<int, int> *current_map = &map1;
       std::unordered_map<int, int> *old_map = &map2;
       int miss = 0;
       map1.reserve(500);
       map2.reserve(500);
   
       for (int i = 0; i < N; ++i) {
           if (current_map->size() > 500) {
               old_map->clear();
               std::swap(current_map, old_map);
           }
           current_map->insert({randomKey(), 1});
   
           auto k = randomKey();
           if (current_map->find(k) == current_map->end()) {
               if (old_map->find(k) == old_map->end()) {
                   ++miss;
               }
           }
   
       }
       std::cout << miss << std::endl;
   }
   
   int main(int argc, char **argv) {
       std::srand(std::time(0));
       test1();
       test2();
       test3();
       return 0;
   }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to