Re: [osv-dev] [COMMIT osv master] mmap: avoid collisions with linear map

Nadav Har'El Thu, 31 Mar 2022 05:54:28 -0700

On Thu, Mar 31, 2022 at 6:36 AM Commit Bot <[email protected]> wrote:


> From: Waldemar Kozaczuk <[email protected]>
> Committer: Waldemar Kozaczuk <[email protected]>
> Branch: master
>
> mmap: avoid collisions with linear map
>
> This patch enhances virtual memory mapping code to track both
> linear (linear_map) and non-linear (mmap()) virtual memory mappings.
>
> It does so by adding simple struct - vma_range and vma_range_set
> collection to store all mappings. It then modifies mmu::find_hole()
> implementation to use vma_range_set instead of vma_list to find next hole.
> That way it can avoid collisions described in the issue #1135.
>
> Fixes #1135
>
> Signed-off-by: Waldemar Kozaczuk <[email protected]>
>
> ---
> diff --git a/core/mmu.cc b/core/mmu.cc
> --- a/core/mmu.cc
> +++ b/core/mmu.cc
> @@ -47,6 +47,17 @@ extern const char text_start[], text_end[];
>
>  namespace mmu {
>
> +struct vma_range_compare {
> +    bool operator()(const vma_range& a, const vma_range& b) {
> +        return a.start() < b.start();
> +    }
> +};
> +
> +//Set of all vma ranges - both linear and non-linear ones
> +__attribute__((init_priority((int)init_prio::vma_range_set)))
> +std::set<vma_range, vma_range_compare> vma_range_set;
> +rwlock_t vma_range_set_mutex;
>

Do we still need both vma_range_set and vma_list? Why?


> +
>  struct linear_vma_compare {
>      bool operator()(const linear_vma* a, const linear_vma* b) {
>          return a->_virt_addr < b->_virt_addr;
> @@ -66,6 +77,9 @@ class vma_compare {
>      }
>  };
>
> +constexpr uintptr_t lower_vma_limit = 0x0;
> +constexpr uintptr_t upper_vma_limit = 0x800000000000;
> +
>  typedef boost::intrusive::set<vma,
>                                bi::compare<vma_compare>,
>                                bi::member_hook<vma,
> @@ -78,9 +92,15 @@ struct vma_list_type : vma_list_base {
>      vma_list_type() {
>          // insert markers for the edges of allocatable area
>          // simplifies searches
>
-        insert(*new anon_vma(addr_range(0, 0), 0, 0));
> -        uintptr_t e = 0x800000000000;
> -        insert(*new anon_vma(addr_range(e, e), 0, 0));
> +        auto lower_edge = new anon_vma(addr_range(lower_vma_limit,
> lower_vma_limit), 0, 0);
> +        insert(*lower_edge);
> +        auto upper_edge = new anon_vma(addr_range(upper_vma_limit,
> upper_vma_limit), 0, 0);
> +        insert(*upper_edge);
> +
> +        WITH_LOCK(vma_range_set_mutex.for_write()) {
> +            vma_range_set.insert(vma_range(lower_edge));
> +            vma_range_set.insert(vma_range(upper_edge));
> +        }
>      }
>  };
>
> @@ -954,27 +974,41 @@ static error protect(const void *addr, size_t size,
> unsigned int perm)
>      return no_error();
>  }
>
> +class vma_range_addr_compare {
> +public:
> +    bool operator()(const vma_range& x, uintptr_t y) const { return
> x.start() < y; }
> +    bool operator()(uintptr_t x, const vma_range& y) const { return x <
> y.start(); }
> +};
> +
>  uintptr_t find_hole(uintptr_t start, uintptr_t size)
>  {
>      bool small = size < huge_page_size;
>      uintptr_t good_enough = 0;
>
> -    // FIXME: use lower_bound or something
> -    auto p = vma_list.begin();
> +    SCOPE_LOCK(vma_range_set_mutex.for_read());
>

Unfortunately I'm very rusy with this area of the code, so I have a
question that might be stupid:

How come we didn't need a mutex here before, but need one now?
Could adding a mutex here be a problem - namely that maybe we need to call
some of this code from non-preemptable code and this will now break?


> +    //Find first vma range which starts before the start parameter or is
> the 1st one
> +    auto p = std::lower_bound(vma_range_set.begin(), vma_range_set.end(),
> start, vma_range_addr_compare());
> +    if (p != vma_range_set.begin()) {
> +        --p;
> +    }
>      auto n = std::next(p);
> -    while (n != vma_list.end()) {
> +    while (n->start() <= upper_vma_limit) { //we only go up to the upper
> mmap vma limit
> +        //See if desired hole fits between p and n vmas
>          if (start >= p->end() && start + size <= n->start()) {
>              return start;
>          }
> +        //See if shifting start to the end of p makes desired hole fit
> between p and n
>          if (p->end() >= start && n->start() - p->end() >= size) {
>              good_enough = p->end();
>              if (small) {
>                  return good_enough;
>              }
> +            //See if huge hole fits between p and n
>              if (n->start() - align_up(good_enough, huge_page_size) >=
> size) {
>                  return align_up(good_enough, huge_page_size);
>              }
>          }
> +        //If nothing worked move next in the list
>          p = n;
>          ++n;
>      }
> @@ -999,6 +1033,9 @@ ulong evacuate(uintptr_t start, uintptr_t end)
>                  memory::stats::on_jvm_heap_free(size);
>              }
>              vma_list.erase(dead);
> +            WITH_LOCK(vma_range_set_mutex.for_write()) {
> +                vma_range_set.erase(vma_range(&dead));
> +            }
>              delete &dead;
>          }
>      }
> @@ -1140,6 +1177,9 @@ uintptr_t allocate(vma *v, uintptr_t start, size_t
> size, bool search)
>      v->set(start, start+size);
>
>      vma_list.insert(*v);
> +    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +        vma_range_set.insert(vma_range(v));
> +    }
>
>      return start;
>  }
> @@ -1493,6 +1533,9 @@ void anon_vma::split(uintptr_t edge)
>      vma* n = new anon_vma(addr_range(edge, _range.end()), _perm, _flags);
>      set(_range.start(), edge);
>      vma_list.insert(*n);
> +    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +        vma_range_set.insert(vma_range(n));
> +    }
>  }
>
>  error anon_vma::sync(uintptr_t start, uintptr_t end)
> @@ -1600,6 +1643,9 @@ jvm_balloon_vma::~jvm_balloon_vma()
>      // for a dangling mapping representing a balloon that was already
> moved
>      // out.
>      vma_list.erase(*this);
> +    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +        vma_range_set.erase(vma_range(this));
> +    }
>      assert(!(_real_flags & mmap_jvm_balloon));
>      mmu::map_anon(addr(), size(), _real_flags, _real_perm);
>
> @@ -1667,6 +1713,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size,
> size_t align, balloon_ptr b)
>                      // Since we will change its position in the tree, for
> the sake of future
>                      // lookups we need to reinsert it.
>                      vma_list.erase(*jvma);
> +                    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +                        vma_range_set.erase(vma_range(jvma));
> +                    }
>                      if (jvma->start() < start) {
>                          assert(jvma->partial() >= (jvma->end() - start));
>                          jvma->set(jvma->start(), start);
> @@ -1675,11 +1724,17 @@ ulong map_jvm(unsigned char* jvm_addr, size_t
> size, size_t align, balloon_ptr b)
>                          jvma->set(end, jvma->end());
>                      }
>                      vma_list.insert(*jvma);
> +                    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +                        vma_range_set.insert(vma_range(jvma));
> +                    }
>                  } else {
>                      // Note how v and jvma are different. This is because
> this one,
>                      // we will delete.
>                      auto& v = *i--;
>                      vma_list.erase(v);
> +                    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +                        vma_range_set.erase(vma_range(&v));
> +                    }
>                      // Finish the move. In practice, it will temporarily
> remap an
>                      // anon mapping here, but this should be rare. Let's
> not
>                      // complicate the code to optimize it. There are no
> @@ -1692,6 +1747,9 @@ ulong map_jvm(unsigned char* jvm_addr, size_t size,
> size_t align, balloon_ptr b)
>
>          evacuate(start, start + size);
>          vma_list.insert(*vma);
> +        WITH_LOCK(vma_range_set_mutex.for_write()) {
> +            vma_range_set.insert(vma_range(vma));
> +        }
>          return vma->size();
>      }
>      return 0;
> @@ -1753,6 +1811,9 @@ void file_vma::split(uintptr_t edge)
>      vma *n = _file->mmap(addr_range(edge, _range.end()), _flags, _perm,
> off).release();
>      set(_range.start(), edge);
>      vma_list.insert(*n);
> +    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +        vma_range_set.insert(vma_range(n));
> +    }
>  }
>
>  error file_vma::sync(uintptr_t start, uintptr_t end)
> @@ -1903,6 +1964,9 @@ void linear_map(void* _virt, phys addr, size_t size,
> const char* name,
>      WITH_LOCK(linear_vma_set_mutex.for_write()) {
>         linear_vma_set.insert(_vma);
>      }
> +    WITH_LOCK(vma_range_set_mutex.for_write()) {
> +       vma_range_set.insert(vma_range(_vma));
> +    }
>  }
>
>  void free_initial_memory_range(uintptr_t addr, size_t size)
> diff --git a/include/osv/mmu.hh b/include/osv/mmu.hh
> --- a/include/osv/mmu.hh
> +++ b/include/osv/mmu.hh
> @@ -90,6 +90,37 @@ public:
>      boost::intrusive::set_member_hook<> _vma_list_hook;
>  };
>
> +struct vma_range {
> +    const void* _vma;
> +    bool _is_linear;
> +
> +    vma_range(const linear_vma* v) {
> +       _vma = v;
> +       _is_linear = true;
> +    }
> +
> +    vma_range(const vma* v) {
> +       _vma = v;
> +       _is_linear = false;
> +    }
> +
> +    uintptr_t start() const {
> +       if (_is_linear) {
> +          return static_cast<const linear_vma*>(_vma)->v_start();
> +       } else {
> +          return static_cast<const vma*>(_vma)->start();
> +       }
> +    }
> +
> +    uintptr_t end() const {
> +       if (_is_linear) {
> +          return static_cast<const linear_vma*>(_vma)->v_end();
> +       } else {
> +          return static_cast<const vma*>(_vma)->end();
> +       }
> +    }
> +};
> +
>  class anon_vma : public vma {
>  public:
>      anon_vma(addr_range range, unsigned perm, unsigned flags);
> diff --git a/include/osv/prio.hh b/include/osv/prio.hh
> --- a/include/osv/prio.hh
> +++ b/include/osv/prio.hh
> @@ -16,6 +16,7 @@ enum {
>      cpus,
>      fpranges,
>      pt_root,
> +    vma_range_set,
>      linear_vma_set,
>      mempool,
>      routecache,
>
> --
> You received this message because you are subscribed to the Google Groups
> "OSv Development" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> To view this discussion on the web visit
> https://groups.google.com/d/msgid/osv-dev/000000000000ea29e705db7b5e8f%40google.com
> .
>

-- 
You received this message because you are subscribed to the Google Groups "OSv 
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/osv-dev/CANEVyjuhQU51K2srp%3Db9sJJiyW5urLKupht37OV%2BvohGBNGq6w%40mail.gmail.com.

Re: [osv-dev] [COMMIT osv master] mmap: avoid collisions with linear map

Reply via email to