This patch enhances virtual memory mapping code to track both
linear (linear_map) and non-linear (mmap()) virtual memory mappings.

It does so by adding simple struct - vma_range and vma_range_set
collection to store all mappings. It then modifies mmu::find_hole()
implementation to use vma_range_set instead of vma_list to find next hole.
That way it can avoid collisions described in the issue #1135. 

Fixes #1135

Signed-off-by: Waldemar Kozaczuk <[email protected]>
---
 core/mmu.cc         | 76 +++++++++++++++++++++++++++++++++++++++++----
 include/osv/mmu.hh  | 31 ++++++++++++++++++
 include/osv/prio.hh |  1 +
 3 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/core/mmu.cc b/core/mmu.cc
index a479e5c4..80ce6b63 100644
--- a/core/mmu.cc
+++ b/core/mmu.cc
@@ -47,6 +47,17 @@ extern const char text_start[], text_end[];
 
 namespace mmu {
 
+struct vma_range_compare {
+    bool operator()(const vma_range& a, const vma_range& b) {
+        return a.start() < b.start();
+    }
+};
+
+//Set of all vma ranges - both linear and non-linear ones
+__attribute__((init_priority((int)init_prio::vma_range_set)))
+std::set<vma_range, vma_range_compare> vma_range_set;
+rwlock_t vma_range_set_mutex;
+
 struct linear_vma_compare {
     bool operator()(const linear_vma* a, const linear_vma* b) {
         return a->_virt_addr < b->_virt_addr;
@@ -66,6 +77,9 @@ public:
     }
 };
 
+constexpr uintptr_t lower_vma_limit = 0x0;
+constexpr uintptr_t upper_vma_limit = 0x800000000000;
+
 typedef boost::intrusive::set<vma,
                               bi::compare<vma_compare>,
                               bi::member_hook<vma,
@@ -78,9 +92,15 @@ struct vma_list_type : vma_list_base {
     vma_list_type() {
         // insert markers for the edges of allocatable area
         // simplifies searches
-        insert(*new anon_vma(addr_range(0, 0), 0, 0));
-        uintptr_t e = 0x800000000000;
-        insert(*new anon_vma(addr_range(e, e), 0, 0));
+        auto lower_edge = new anon_vma(addr_range(lower_vma_limit, 
lower_vma_limit), 0, 0);
+        insert(*lower_edge);
+        auto upper_edge = new anon_vma(addr_range(upper_vma_limit, 
upper_vma_limit), 0, 0);
+        insert(*upper_edge);
+
+        WITH_LOCK(vma_range_set_mutex.for_write()) {
+            vma_range_set.insert(vma_range(lower_edge));
+            vma_range_set.insert(vma_range(upper_edge));
+        }
     }
 };
 
@@ -954,27 +974,41 @@ static error protect(const void *addr, size_t size, 
unsigned int perm)
     return no_error();
 }
 
+class vma_range_addr_compare {
+public:
+    bool operator()(const vma_range& x, uintptr_t y) const { return x.start() 
< y; }
+    bool operator()(uintptr_t x, const vma_range& y) const { return x < 
y.start(); }
+};
+
 uintptr_t find_hole(uintptr_t start, uintptr_t size)
 {
     bool small = size < huge_page_size;
     uintptr_t good_enough = 0;
 
-    // FIXME: use lower_bound or something
-    auto p = vma_list.begin();
+    SCOPE_LOCK(vma_range_set_mutex.for_read());
+    //Find first vma range which starts before the start parameter or is the 
1st one
+    auto p = std::lower_bound(vma_range_set.begin(), vma_range_set.end(), 
start, vma_range_addr_compare());
+    if (p != vma_range_set.begin()) {
+        --p;
+    }
     auto n = std::next(p);
-    while (n != vma_list.end()) {
+    while (n->start() <= upper_vma_limit) { //we only go up to the upper mmap 
vma limit
+        //See if desired hole fits between p and n vmas
         if (start >= p->end() && start + size <= n->start()) {
             return start;
         }
+        //See if shifting start to the end of p makes desired hole fit between 
p and n
         if (p->end() >= start && n->start() - p->end() >= size) {
             good_enough = p->end();
             if (small) {
                 return good_enough;
             }
+            //See if huge hole fits between p and n
             if (n->start() - align_up(good_enough, huge_page_size) >= size) {
                 return align_up(good_enough, huge_page_size);
             }
         }
+        //If nothing worked move next in the list
         p = n;
         ++n;
     }
@@ -999,6 +1033,9 @@ ulong evacuate(uintptr_t start, uintptr_t end)
                 memory::stats::on_jvm_heap_free(size);
             }
             vma_list.erase(dead);
+            WITH_LOCK(vma_range_set_mutex.for_write()) {
+                vma_range_set.erase(vma_range(&dead));
+            }
             delete &dead;
         }
     }
@@ -1140,6 +1177,9 @@ uintptr_t allocate(vma *v, uintptr_t start, size_t size, 
bool search)
     v->set(start, start+size);
 
     vma_list.insert(*v);
+    WITH_LOCK(vma_range_set_mutex.for_write()) {
+        vma_range_set.insert(vma_range(v));
+    }
 
     return start;
 }
@@ -1493,6 +1533,9 @@ void anon_vma::split(uintptr_t edge)
     vma* n = new anon_vma(addr_range(edge, _range.end()), _perm, _flags);
     set(_range.start(), edge);
     vma_list.insert(*n);
+    WITH_LOCK(vma_range_set_mutex.for_write()) {
+        vma_range_set.insert(vma_range(n));
+    }
 }
 
 error anon_vma::sync(uintptr_t start, uintptr_t end)
@@ -1600,6 +1643,9 @@ jvm_balloon_vma::~jvm_balloon_vma()
     // for a dangling mapping representing a balloon that was already moved
     // out.
     vma_list.erase(*this);
+    WITH_LOCK(vma_range_set_mutex.for_write()) {
+        vma_range_set.erase(vma_range(this));
+    }
     assert(!(_real_flags & mmap_jvm_balloon));
     mmu::map_anon(addr(), size(), _real_flags, _real_perm);
 
@@ -1667,6 +1713,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, 
size_t align, balloon_ptr b)
                     // Since we will change its position in the tree, for the 
sake of future
                     // lookups we need to reinsert it.
                     vma_list.erase(*jvma);
+                    WITH_LOCK(vma_range_set_mutex.for_write()) {
+                        vma_range_set.erase(vma_range(jvma));
+                    }
                     if (jvma->start() < start) {
                         assert(jvma->partial() >= (jvma->end() - start));
                         jvma->set(jvma->start(), start);
@@ -1675,11 +1724,17 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, 
size_t align, balloon_ptr b)
                         jvma->set(end, jvma->end());
                     }
                     vma_list.insert(*jvma);
+                    WITH_LOCK(vma_range_set_mutex.for_write()) {
+                        vma_range_set.insert(vma_range(jvma));
+                    }
                 } else {
                     // Note how v and jvma are different. This is because this 
one,
                     // we will delete.
                     auto& v = *i--;
                     vma_list.erase(v);
+                    WITH_LOCK(vma_range_set_mutex.for_write()) {
+                        vma_range_set.erase(vma_range(&v));
+                    }
                     // Finish the move. In practice, it will temporarily remap 
an
                     // anon mapping here, but this should be rare. Let's not
                     // complicate the code to optimize it. There are no
@@ -1692,6 +1747,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, 
size_t align, balloon_ptr b)
 
         evacuate(start, start + size);
         vma_list.insert(*vma);
+        WITH_LOCK(vma_range_set_mutex.for_write()) {
+            vma_range_set.insert(vma_range(vma));
+        }
         return vma->size();
     }
     return 0;
@@ -1753,6 +1811,9 @@ void file_vma::split(uintptr_t edge)
     vma *n = _file->mmap(addr_range(edge, _range.end()), _flags, _perm, 
off).release();
     set(_range.start(), edge);
     vma_list.insert(*n);
+    WITH_LOCK(vma_range_set_mutex.for_write()) {
+        vma_range_set.insert(vma_range(n));
+    }
 }
 
 error file_vma::sync(uintptr_t start, uintptr_t end)
@@ -1903,6 +1964,9 @@ void linear_map(void* _virt, phys addr, size_t size, 
const char* name,
     WITH_LOCK(linear_vma_set_mutex.for_write()) {
        linear_vma_set.insert(_vma);
     }
+    WITH_LOCK(vma_range_set_mutex.for_write()) {
+       vma_range_set.insert(vma_range(_vma));
+    }
 }
 
 void free_initial_memory_range(uintptr_t addr, size_t size)
diff --git a/include/osv/mmu.hh b/include/osv/mmu.hh
index f4bdaa84..60ad4a89 100644
--- a/include/osv/mmu.hh
+++ b/include/osv/mmu.hh
@@ -90,6 +90,37 @@ public:
     boost::intrusive::set_member_hook<> _vma_list_hook;
 };
 
+struct vma_range {
+    const void* _vma;
+    bool _is_linear;
+
+    vma_range(const linear_vma* v) {
+       _vma = v;
+       _is_linear = true;
+    }
+
+    vma_range(const vma* v) {
+       _vma = v;
+       _is_linear = false;
+    }
+
+    uintptr_t start() const {
+       if (_is_linear) {
+          return static_cast<const linear_vma*>(_vma)->v_start();
+       } else {
+          return static_cast<const vma*>(_vma)->start();
+       }
+    }
+
+    uintptr_t end() const {
+       if (_is_linear) {
+          return static_cast<const linear_vma*>(_vma)->v_end();
+       } else {
+          return static_cast<const vma*>(_vma)->end();
+       }
+    }
+};
+
 class anon_vma : public vma {
 public:
     anon_vma(addr_range range, unsigned perm, unsigned flags);
diff --git a/include/osv/prio.hh b/include/osv/prio.hh
index 23adc08f..6deb0afd 100644
--- a/include/osv/prio.hh
+++ b/include/osv/prio.hh
@@ -16,6 +16,7 @@ enum {
     cpus,
     fpranges,
     pt_root,
+    vma_range_set,
     linear_vma_set,
     mempool,
     routecache,
-- 
2.31.1

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/20220317012428.872553-1-jwkozaczuk%40gmail.com.

Reply via email to