D4503: util: teach lrucachedict to enforce a max total cost

2018-09-12 Thread indygreg (Gregory Szorc)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHG842cd0bdda75: util: teach lrucachedict to enforce a max 
total cost (authored by indygreg, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D4503?vs=10822=10942

REVISION DETAIL
  https://phab.mercurial-scm.org/D4503

AFFECTED FILES
  contrib/perf.py
  mercurial/util.py
  tests/test-lrucachedict.py

CHANGE DETAILS

diff --git a/tests/test-lrucachedict.py b/tests/test-lrucachedict.py
--- a/tests/test-lrucachedict.py
+++ b/tests/test-lrucachedict.py
@@ -133,6 +133,22 @@
 for key in ('a', 'b', 'c', 'd'):
 self.assertEqual(d[key], 'v%s' % key)
 
+d = util.lrucachedict(4, maxcost=42)
+d.insert('a', 'va', cost=5)
+d.insert('b', 'vb', cost=4)
+d.insert('c', 'vc', cost=3)
+dc = d.copy()
+self.assertEqual(dc.maxcost, 42)
+self.assertEqual(len(dc), 3)
+
+# Max cost can be lowered as part of copy.
+dc = d.copy(maxcost=10)
+self.assertEqual(dc.maxcost, 10)
+self.assertEqual(len(dc), 2)
+self.assertEqual(dc.totalcost, 7)
+self.assertIn('b', dc)
+self.assertIn('c', dc)
+
 def testcopydecreasecapacity(self):
 d = util.lrucachedict(5)
 d.insert('a', 'va', cost=4)
@@ -217,5 +233,93 @@
 d['a'] = 'va'
 self.assertEqual(d.popoldest(), ('b', 'vb'))
 
+def testmaxcost(self):
+# Item cost is zero by default.
+d = util.lrucachedict(6, maxcost=10)
+d['a'] = 'va'
+d['b'] = 'vb'
+d['c'] = 'vc'
+d['d'] = 'vd'
+self.assertEqual(len(d), 4)
+self.assertEqual(d.totalcost, 0)
+
+d.clear()
+
+# Insertion to exact cost threshold works without eviction.
+d.insert('a', 'va', cost=6)
+d.insert('b', 'vb', cost=4)
+
+self.assertEqual(len(d), 2)
+self.assertEqual(d['a'], 'va')
+self.assertEqual(d['b'], 'vb')
+
+# Inserting a new element with 0 cost works.
+d['c'] = 'vc'
+self.assertEqual(len(d), 3)
+
+# Inserting a new element with cost putting us above high
+# water mark evicts oldest single item.
+d.insert('d', 'vd', cost=1)
+self.assertEqual(len(d), 3)
+self.assertEqual(d.totalcost, 5)
+self.assertNotIn('a', d)
+for key in ('b', 'c', 'd'):
+self.assertEqual(d[key], 'v%s' % key)
+
+# Inserting a new element with enough room for just itself
+# evicts all items before.
+d.insert('e', 've', cost=10)
+self.assertEqual(len(d), 1)
+self.assertEqual(d.totalcost, 10)
+self.assertIn('e', d)
+
+# Inserting a new element with cost greater than threshold
+# still retains that item.
+d.insert('f', 'vf', cost=11)
+self.assertEqual(len(d), 1)
+self.assertEqual(d.totalcost, 11)
+self.assertIn('f', d)
+
+# Inserting a new element will evict the last item since it is
+# too large.
+d['g'] = 'vg'
+self.assertEqual(len(d), 1)
+self.assertEqual(d.totalcost, 0)
+self.assertIn('g', d)
+
+d.clear()
+
+d.insert('a', 'va', cost=7)
+d.insert('b', 'vb', cost=3)
+self.assertEqual(len(d), 2)
+
+# Replacing a value with smaller cost won't result in eviction.
+d.insert('b', 'vb2', cost=2)
+self.assertEqual(len(d), 2)
+
+# Replacing a value with a higher cost will evict when threshold
+# exceeded.
+d.insert('b', 'vb3', cost=4)
+self.assertEqual(len(d), 1)
+self.assertNotIn('a', d)
+
+def testmaxcostcomplex(self):
+d = util.lrucachedict(100, maxcost=100)
+d.insert('a', 'va', cost=9)
+d.insert('b', 'vb', cost=21)
+d.insert('c', 'vc', cost=7)
+d.insert('d', 'vc', cost=50)
+self.assertEqual(d.totalcost, 87)
+
+# Inserting new element should free multiple elements so we hit
+# low water mark.
+d.insert('e', 'vd', cost=25)
+self.assertEqual(len(d), 3)
+self.assertNotIn('a', d)
+self.assertNotIn('b', d)
+self.assertIn('c', d)
+self.assertIn('d', d)
+self.assertIn('e', d)
+
 if __name__ == '__main__':
 silenttestrunner.main(__name__)
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -1240,16 +1240,23 @@
 Items in the cache can be inserted with an optional "cost" value. This is
 simply an integer that is specified by the caller. The cache can be queried
 for the total cost of all items presently in the cache.
+
+The cache can also define a maximum cost. If a cache insertion would
+cause the total cost of the cache to go beyond the maximum cost limit,
+nodes will be evicted to make room for the new code. This can be used
+

D4503: util: teach lrucachedict to enforce a max total cost

2018-09-06 Thread indygreg (Gregory Szorc)
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Now that lrucachedict entries can have a numeric cost associated
  with them and we can easily pop the oldest item in the cache, it
  now becomes relatively trivial to implement support for enforcing
  a high water mark on the total cost of items in the cache.
  
  This commit teaches lrucachedict instances to have a max cost
  associated with them. When items are inserted, we pop old items
  until enough "cost" frees up to make room for the new item.
  
  This feature is close to zero cost when not used (modulo the insertion
  regressed introduced by the previous commit):
  
  $ ./hg perflrucachedict --size 4 --gets 100 --sets 100 --mixed 100
  ! gets
  ! wall 0.607444 comb 0.61 user 0.61 sys 0.00 (best of 17)
  ! wall 0.601653 comb 0.60 user 0.60 sys 0.00 (best of 17)
  ! inserts
  ! wall 0.678261 comb 0.68 user 0.68 sys 0.00 (best of 14)
  ! wall 0.685042 comb 0.68 user 0.68 sys 0.00 (best of 15)
  ! sets
  ! wall 0.808770 comb 0.80 user 0.80 sys 0.00 (best of 13)
  ! wall 0.834241 comb 0.83 user 0.83 sys 0.00 (best of 12)
  ! mixed
  ! wall 0.782441 comb 0.78 user 0.78 sys 0.00 (best of 13)
  ! wall 0.803804 comb 0.80 user 0.80 sys 0.00 (best of 13)
  
  $ hg perflrucachedict --size 1000 --gets 100 --sets 100 --mixed 
100
  ! init
  ! wall 0.006952 comb 0.01 user 0.01 sys 0.00 (best of 418)
  ! gets
  ! wall 0.613350 comb 0.61 user 0.61 sys 0.00 (best of 17)
  ! wall 0.617415 comb 0.62 user 0.62 sys 0.00 (best of 17)
  ! inserts
  ! wall 0.701270 comb 0.70 user 0.70 sys 0.00 (best of 15)
  ! wall 0.700516 comb 0.70 user 0.70 sys 0.00 (best of 15)
  ! sets
  ! wall 0.825720 comb 0.83 user 0.83 sys 0.00 (best of 13)
  ! wall 0.837946 comb 0.84 user 0.83 sys 0.01 (best of 12)
  ! mixed
  ! wall 0.821644 comb 0.82 user 0.82 sys 0.00 (best of 13)
  ! wall 0.850559 comb 0.85 user 0.85 sys 0.00 (best of 12)
  
  I reckon the slight slowdown on insert is due to added if checks.
  
  For caches with total cost limiting enabled:
  
  $ hg perflrucachedict --size 4 --gets 100 --sets 100 --mixed 100 
--costlimit 100
  ! gets w/ cost limit
  ! wall 0.598737 comb 0.59 user 0.59 sys 0.00 (best of 17)
  ! inserts w/ cost limit
  ! wall 1.694282 comb 1.70 user 1.70 sys 0.00 (best of 6)
  ! mixed w/ cost limit
  ! wall 1.157655 comb 1.15 user 1.15 sys 0.00 (best of 9)
  
  $ hg perflrucachedict --size 1000 --gets 100 --sets 100 --mixed 
100 --costlimit 1
  ! gets w/ cost limit
  ! wall 0.598526 comb 0.60 user 0.60 sys 0.00 (best of 17)
  ! inserts w/ cost limit
  ! wall 37.838315 comb 37.84 user 37.84 sys 0.00 (best of 3)
  ! mixed w/ cost limit
  ! wall 18.060198 comb 18.06 user 18.06 sys 0.00 (best of 3)
  
  $ hg perflrucachedict --size 1000 --gets 100 --sets 100 --mixed 
100 --costlimit 1 --mixedgetfreq 90
  ! gets w/ cost limit
  ! wall 0.600024 comb 0.60 user 0.60 sys 0.00 (best of 17)
  ! inserts w/ cost limit
  ! wall 37.154547 comb 37.12 user 37.12 sys 0.00 (best of 3)
  ! mixed w/ cost limit
  ! wall 4.381602 comb 4.38 user 4.37 sys 0.01 (best of 3)
  
  The functions we're benchmarking are slightly different, which could
  move numbers by a few milliseconds. But the slowdown on insert is too
  great to be explained by that. The slowness is due to insert heavy
  operations needing to call popoldest() repeatedly when the cache is
  at capacity. The next commit will address this.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D4503

AFFECTED FILES
  contrib/perf.py
  mercurial/util.py
  tests/test-lrucachedict.py

CHANGE DETAILS

diff --git a/tests/test-lrucachedict.py b/tests/test-lrucachedict.py
--- a/tests/test-lrucachedict.py
+++ b/tests/test-lrucachedict.py
@@ -129,6 +129,22 @@
 for key in ('a', 'b', 'c', 'd'):
 self.assertEqual(d[key], 'v%s' % key)
 
+d = util.lrucachedict(4, maxcost=42)
+d.insert('a', 'va', cost=5)
+d.insert('b', 'vb', cost=4)
+d.insert('c', 'vc', cost=3)
+dc = d.copy()
+self.assertEqual(dc.maxcost, 42)
+self.assertEqual(len(dc), 3)
+
+# Max cost can be lowered as part of copy.
+dc = d.copy(maxcost=10)
+self.assertEqual(dc.maxcost, 10)
+self.assertEqual(len(dc), 2)
+self.assertEqual(dc.totalcost, 7)
+self.assertIn('b', dc)
+self.assertIn('c', dc)
+
 def testcopydecreasecapacity(self):
 d = util.lrucachedict(5)
 d.insert('a', 'va', cost=4)
@@ -213,5 +229,93 @@
 d['a'] = 'va'