On Thu, Mar 31, 2022 at 6:36 AM Commit Bot <[email protected]> wrote:
> From: Waldemar Kozaczuk <[email protected]> > Committer: Waldemar Kozaczuk <[email protected]> > Branch: master > > mmap: avoid collisions with linear map > > This patch enhances virtual memory mapping code to track both > linear (linear_map) and non-linear (mmap()) virtual memory mappings. > > It does so by adding simple struct - vma_range and vma_range_set > collection to store all mappings. It then modifies mmu::find_hole() > implementation to use vma_range_set instead of vma_list to find next hole. > That way it can avoid collisions described in the issue #1135. > > Fixes #1135 > > Signed-off-by: Waldemar Kozaczuk <[email protected]> > > --- > diff --git a/core/mmu.cc b/core/mmu.cc > --- a/core/mmu.cc > +++ b/core/mmu.cc > @@ -47,6 +47,17 @@ extern const char text_start[], text_end[]; > > namespace mmu { > > +struct vma_range_compare { > + bool operator()(const vma_range& a, const vma_range& b) { > + return a.start() < b.start(); > + } > +}; > + > +//Set of all vma ranges - both linear and non-linear ones > +__attribute__((init_priority((int)init_prio::vma_range_set))) > +std::set<vma_range, vma_range_compare> vma_range_set; > +rwlock_t vma_range_set_mutex; > Do we still need both vma_range_set and vma_list? Why? > + > struct linear_vma_compare { > bool operator()(const linear_vma* a, const linear_vma* b) { > return a->_virt_addr < b->_virt_addr; > @@ -66,6 +77,9 @@ class vma_compare { > } > }; > > +constexpr uintptr_t lower_vma_limit = 0x0; > +constexpr uintptr_t upper_vma_limit = 0x800000000000; > + > typedef boost::intrusive::set<vma, > bi::compare<vma_compare>, > bi::member_hook<vma, > @@ -78,9 +92,15 @@ struct vma_list_type : vma_list_base { > vma_list_type() { > // insert markers for the edges of allocatable area > // simplifies searches > - insert(*new anon_vma(addr_range(0, 0), 0, 0)); > - uintptr_t e = 0x800000000000; > - insert(*new anon_vma(addr_range(e, e), 0, 0)); > + auto lower_edge = new anon_vma(addr_range(lower_vma_limit, > lower_vma_limit), 0, 0); > + insert(*lower_edge); > + auto upper_edge = new anon_vma(addr_range(upper_vma_limit, > upper_vma_limit), 0, 0); > + insert(*upper_edge); > + > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(lower_edge)); > + vma_range_set.insert(vma_range(upper_edge)); > + } > } > }; > > @@ -954,27 +974,41 @@ static error protect(const void *addr, size_t size, > unsigned int perm) > return no_error(); > } > > +class vma_range_addr_compare { > +public: > + bool operator()(const vma_range& x, uintptr_t y) const { return > x.start() < y; } > + bool operator()(uintptr_t x, const vma_range& y) const { return x < > y.start(); } > +}; > + > uintptr_t find_hole(uintptr_t start, uintptr_t size) > { > bool small = size < huge_page_size; > uintptr_t good_enough = 0; > > - // FIXME: use lower_bound or something > - auto p = vma_list.begin(); > + SCOPE_LOCK(vma_range_set_mutex.for_read()); > Unfortunately I'm very rusy with this area of the code, so I have a question that might be stupid: How come we didn't need a mutex here before, but need one now? Could adding a mutex here be a problem - namely that maybe we need to call some of this code from non-preemptable code and this will now break? > + //Find first vma range which starts before the start parameter or is > the 1st one > + auto p = std::lower_bound(vma_range_set.begin(), vma_range_set.end(), > start, vma_range_addr_compare()); > + if (p != vma_range_set.begin()) { > + --p; > + } > auto n = std::next(p); > - while (n != vma_list.end()) { > + while (n->start() <= upper_vma_limit) { //we only go up to the upper > mmap vma limit > + //See if desired hole fits between p and n vmas > if (start >= p->end() && start + size <= n->start()) { > return start; > } > + //See if shifting start to the end of p makes desired hole fit > between p and n > if (p->end() >= start && n->start() - p->end() >= size) { > good_enough = p->end(); > if (small) { > return good_enough; > } > + //See if huge hole fits between p and n > if (n->start() - align_up(good_enough, huge_page_size) >= > size) { > return align_up(good_enough, huge_page_size); > } > } > + //If nothing worked move next in the list > p = n; > ++n; > } > @@ -999,6 +1033,9 @@ ulong evacuate(uintptr_t start, uintptr_t end) > memory::stats::on_jvm_heap_free(size); > } > vma_list.erase(dead); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.erase(vma_range(&dead)); > + } > delete &dead; > } > } > @@ -1140,6 +1177,9 @@ uintptr_t allocate(vma *v, uintptr_t start, size_t > size, bool search) > v->set(start, start+size); > > vma_list.insert(*v); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(v)); > + } > > return start; > } > @@ -1493,6 +1533,9 @@ void anon_vma::split(uintptr_t edge) > vma* n = new anon_vma(addr_range(edge, _range.end()), _perm, _flags); > set(_range.start(), edge); > vma_list.insert(*n); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(n)); > + } > } > > error anon_vma::sync(uintptr_t start, uintptr_t end) > @@ -1600,6 +1643,9 @@ jvm_balloon_vma::~jvm_balloon_vma() > // for a dangling mapping representing a balloon that was already > moved > // out. > vma_list.erase(*this); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.erase(vma_range(this)); > + } > assert(!(_real_flags & mmap_jvm_balloon)); > mmu::map_anon(addr(), size(), _real_flags, _real_perm); > > @@ -1667,6 +1713,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, > size_t align, balloon_ptr b) > // Since we will change its position in the tree, for > the sake of future > // lookups we need to reinsert it. > vma_list.erase(*jvma); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.erase(vma_range(jvma)); > + } > if (jvma->start() < start) { > assert(jvma->partial() >= (jvma->end() - start)); > jvma->set(jvma->start(), start); > @@ -1675,11 +1724,17 @@ ulong map_jvm(unsigned char* jvm_addr, size_t > size, size_t align, balloon_ptr b) > jvma->set(end, jvma->end()); > } > vma_list.insert(*jvma); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(jvma)); > + } > } else { > // Note how v and jvma are different. This is because > this one, > // we will delete. > auto& v = *i--; > vma_list.erase(v); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.erase(vma_range(&v)); > + } > // Finish the move. In practice, it will temporarily > remap an > // anon mapping here, but this should be rare. Let's > not > // complicate the code to optimize it. There are no > @@ -1692,6 +1747,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size, > size_t align, balloon_ptr b) > > evacuate(start, start + size); > vma_list.insert(*vma); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(vma)); > + } > return vma->size(); > } > return 0; > @@ -1753,6 +1811,9 @@ void file_vma::split(uintptr_t edge) > vma *n = _file->mmap(addr_range(edge, _range.end()), _flags, _perm, > off).release(); > set(_range.start(), edge); > vma_list.insert(*n); > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(n)); > + } > } > > error file_vma::sync(uintptr_t start, uintptr_t end) > @@ -1903,6 +1964,9 @@ void linear_map(void* _virt, phys addr, size_t size, > const char* name, > WITH_LOCK(linear_vma_set_mutex.for_write()) { > linear_vma_set.insert(_vma); > } > + WITH_LOCK(vma_range_set_mutex.for_write()) { > + vma_range_set.insert(vma_range(_vma)); > + } > } > > void free_initial_memory_range(uintptr_t addr, size_t size) > diff --git a/include/osv/mmu.hh b/include/osv/mmu.hh > --- a/include/osv/mmu.hh > +++ b/include/osv/mmu.hh > @@ -90,6 +90,37 @@ public: > boost::intrusive::set_member_hook<> _vma_list_hook; > }; > > +struct vma_range { > + const void* _vma; > + bool _is_linear; > + > + vma_range(const linear_vma* v) { > + _vma = v; > + _is_linear = true; > + } > + > + vma_range(const vma* v) { > + _vma = v; > + _is_linear = false; > + } > + > + uintptr_t start() const { > + if (_is_linear) { > + return static_cast<const linear_vma*>(_vma)->v_start(); > + } else { > + return static_cast<const vma*>(_vma)->start(); > + } > + } > + > + uintptr_t end() const { > + if (_is_linear) { > + return static_cast<const linear_vma*>(_vma)->v_end(); > + } else { > + return static_cast<const vma*>(_vma)->end(); > + } > + } > +}; > + > class anon_vma : public vma { > public: > anon_vma(addr_range range, unsigned perm, unsigned flags); > diff --git a/include/osv/prio.hh b/include/osv/prio.hh > --- a/include/osv/prio.hh > +++ b/include/osv/prio.hh > @@ -16,6 +16,7 @@ enum { > cpus, > fpranges, > pt_root, > + vma_range_set, > linear_vma_set, > mempool, > routecache, > > -- > You received this message because you are subscribed to the Google Groups > "OSv Development" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to [email protected]. > To view this discussion on the web visit > https://groups.google.com/d/msgid/osv-dev/000000000000ea29e705db7b5e8f%40google.com > . > -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To view this discussion on the web visit https://groups.google.com/d/msgid/osv-dev/CANEVyjuhQU51K2srp%3Db9sJJiyW5urLKupht37OV%2BvohGBNGq6w%40mail.gmail.com.
