https://bugs.kde.org/show_bug.cgi?id=514297

--- Comment #37 from Mark Wielaard <[email protected]> ---
Global comment. The "guard code" is guarded (pun intended) by
different things.  #ifdef VKI_MADV_GUARD_INSTALL and
#if defined(VGO_linux).

It would be good to just use one.  I would suggest to use
#if defined(VGO_linux).  And probably have a bit more code
behind it, because it isn't necessary/used on other OSes.

I think you also need to handle the NSegment hasGuardPages in
maybe_merge_nsegments. Everywhere it returns True it should have a
s1->hasGuardPages |= s2->hasGuardPages. And in split_nsegment_at where
you have to see if hasGuardPages is true whether it holds for both parts or
only
one.

> From b8e100769c54745db0fd327ef43a0eb5766df965 Mon Sep 17 00:00:00 2001
> From: Martin Cermak <[email protected]>
> Date: Tue, 24 Mar 2026 12:49:16 +0100
> Subject: [PATCH] Bug 514297 -  Track madvise MADV_GUARD_INSTALL in address
>  space manager
> 
> Track madvise MADV_GUARD_INSTALL in address space manager

Could use a bit more comments. You have good commit messages in the
split commit, I would add some of those to explain what goes where.

> https://bugs.kde.org/show_bug.cgi?id=514297
> 
> diff --git a/.gitignore b/.gitignore
> index 7b3b1a626..fb458647a 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -944,6 +944,7 @@
>  /memcheck/tests/lsframe2
>  /memcheck/tests/Makefile
>  /memcheck/tests/Makefile.in
> +/memcheck/tests/madv_guard
>  /memcheck/tests/mallinfo
>  /memcheck/tests/mallinfo2
>  /memcheck/tests/malloc1

Good.

> diff --git a/NEWS b/NEWS
> index ff78f4b2b..15636fe97 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -94,6 +94,7 @@ are not entered into bugzilla tend to get forgotten about 
> or ignored.
>  514094  readlink("/proc/self/exe") overwrites buffer beyond its return value
>  514206  Assertion '!sr_isError(sr)' failed - mmap fd points to an open
>          descriptor to a PCI device
> +514297  Track madvise MADV_GUARD_INSTALL in address space manager
>  514343  Add a valgrind.h macro VALGRIND_REPLACES_MALLOC
>  514596  Add SSE4.1 BLENDPD instruction for x86 32 bit
>  514613  Unclosed leak_summary/still_reachable tag in xml output

OK.

> diff --git a/coregrind/m_addrinfo.c b/coregrind/m_addrinfo.c
> index 32d2fd4c9..f134c1c3f 100644
> --- a/coregrind/m_addrinfo.c
> +++ b/coregrind/m_addrinfo.c
> @@ -228,7 +228,12 @@ void VG_(describe_addr) ( DiEpoch ep, Addr a, 
> /*OUT*/AddrInfo* ai )
>        if (tid != VG_INVALID_THREADID) {
>           /* Should be below stack pointer, as if it is >= SP, it
>              will have been described as StackPos_stacked above. */
> -         stackPos = StackPos_below_stack_ptr;
> +         const NSegment *seg = VG_(am_find_nsegment) (a);
> +         if (seg->hasGuardPages && VG_(is_guarded)(a) ) {
> +            stackPos = StackPos_guard_page;
> +         } else {
> +            stackPos = StackPos_below_stack_ptr;
> +         }

Code looks good. But the comment before it should be updated.

>        } else {
>           /* Try to find a stack with guard page containing a.
>              For this, check if a is in a page mapped without r, w and x. */

If this else part is triggered it should probably also be updated?

         const NSegment *seg = VG_(am_find_nsegment) (a);
         if (seg != NULL && seg->kind == SkAnonC
             && !seg->hasR && !seg->hasW && !seg->hasX) {

                ^ ( ... || (seg->hasGuardPages && VG_(is_guarded)(a))

            /* This looks a plausible guard page. Check if a is close to        
               the start of stack (lowest byte). */
            tid = find_tid_with_stack_containing (VG_PGROUNDUP(a+1));
            if (tid != VG_INVALID_THREADID)
               stackPos = StackPos_guard_page;
         }

I am surprised this code is never triggered.

> diff --git a/coregrind/m_aspacemgr/aspacemgr-common.c 
> b/coregrind/m_aspacemgr/aspacemgr-common.c
> index 379ad773d..95473ff6a 100644
> --- a/coregrind/m_aspacemgr/aspacemgr-common.c
> +++ b/coregrind/m_aspacemgr/aspacemgr-common.c
> @@ -324,6 +324,12 @@ Int ML_(am_fcntl) ( Int fd, Int cmd, Addr arg )
>     return sr_isError(res) ? -1 : sr_Res(res);
>  }
>  
> +Int ML_(am_lseek) ( Int fd, vki_off_t off, Int whence )
> +{
> +   SysRes res = VG_(do_syscall3)(__NR_lseek, fd, off, whence);
> +   return sr_isError(res) ? -1 : sr_Res(res);
> +}
> +
>  /* Get the dev, inode and mode info for a file descriptor, if
>     possible.  Returns True on success. */
>  Bool ML_(am_get_fd_d_i_m)( Int fd,

OK. Some extra helper code.

> diff --git a/coregrind/m_aspacemgr/aspacemgr-linux.c 
> b/coregrind/m_aspacemgr/aspacemgr-linux.c
> index ae9846e54..fbdead911 100644
> --- a/coregrind/m_aspacemgr/aspacemgr-linux.c
> +++ b/coregrind/m_aspacemgr/aspacemgr-linux.c
> @@ -297,6 +297,11 @@
>  static NSegment nsegments[VG_N_SEGMENTS];
>  static Int      nsegments_used = 0;
>  
> +/* bookkeeping for madvise() guard pages, bug 514297 */
> +static UInt     VG_N_GUARDS;
> +static Addr     *guardpages;
> +static Int      nguardpages_used = 0;

Wrap in #if defined(VGO_linux) even though the file is called
aspacemgr-linux.c it is actually used for Linux, Darwin, Solaris and
FreeBSD.

>  #define Addr_MIN ((Addr)0)
>  #define Addr_MAX ((Addr)(-1ULL))
>  
> @@ -473,21 +478,21 @@ static void show_nsegment ( Int logLevel, Int segNo, 
> const NSegment* seg )
>        case SkFree:
>           VG_(debugLog)(
>              logLevel, "aspacem",
> -            "%3d: %s %010lx-%010lx %s\n",
> +            "%3d: %s %010lx-%010lx %s (%s)\n",
>              segNo, show_SegKind(seg->kind),
> -            seg->start, seg->end, len_buf
> +            seg->start, seg->end, len_buf, seg->hasGuardPages ? "G" : "g"
>           );
>           break;
>  
>        case SkAnonC: case SkAnonV: case SkShmC:
>           VG_(debugLog)(
>              logLevel, "aspacem",
> -            "%3d: %s %010lx-%010lx %s %c%c%c%c%c\n",
> +            "%3d: %s %010lx-%010lx %s %c%c%c%c%c (%s)\n",
>              segNo, show_SegKind(seg->kind),
>              seg->start, seg->end, len_buf,
>              seg->hasR ? 'r' : '-', seg->hasW ? 'w' : '-', 
>              seg->hasX ? 'x' : '-', seg->hasT ? 'T' : '-',
> -            seg->isCH ? 'H' : '-'
> +            seg->isCH ? 'H' : '-', seg->hasGuardPages ? "G" : "g"
>           );
>           break;

Likewise, NSegment doesn't have an hasGuardPages field when not on linux.

> @@ -495,27 +500,29 @@ static void show_nsegment ( Int logLevel, Int segNo, 
> const NSegment* seg )
>           VG_(debugLog)(
>              logLevel, "aspacem",
>              "%3d: %s %010lx-%010lx %s %c%c%c%c%c d=0x%03llx "
> -            "i=%-7llu o=%-7lld (%d,%d)\n",
> +            "i=%-7llu o=%-7lld (%d,%d) (%s)\n",
>              segNo, show_SegKind(seg->kind),
>              seg->start, seg->end, len_buf,
>              seg->hasR ? 'r' : '-', seg->hasW ? 'w' : '-', 
>              seg->hasX ? 'x' : '-', seg->hasT ? 'T' : '-', 
>              seg->isCH ? 'H' : '-',
>              seg->dev, seg->ino, seg->offset,
> -            ML_(am_segname_get_seqnr)(seg->fnIdx), seg->fnIdx
> +            ML_(am_segname_get_seqnr)(seg->fnIdx), seg->fnIdx,
> +            seg->hasGuardPages ? "G" : "g"
>           );
>           break;
>  
>        case SkResvn:
>           VG_(debugLog)(
>              logLevel, "aspacem",
> -            "%3d: %s %010lx-%010lx %s %c%c%c%c%c %s\n",
> +            "%3d: %s %010lx-%010lx %s %c%c%c%c%c %s (%s)\n",
>              segNo, show_SegKind(seg->kind),
>              seg->start, seg->end, len_buf,
>              seg->hasR ? 'r' : '-', seg->hasW ? 'w' : '-', 
>              seg->hasX ? 'x' : '-', seg->hasT ? 'T' : '-', 
>              seg->isCH ? 'H' : '-',
> -            show_ShrinkMode(seg->smode)
> +            show_ShrinkMode(seg->smode),
> +            seg->hasGuardPages ? "G" : "g"
>           );
>           break;

Likewise.

> @@ -1061,6 +1068,203 @@ void ML_(am_do_sanity_check)( void )
>     AM_SANITY_CHECK;
>  }
>  
> +/*-----------------------------------------------------------------*/
> +/*---                                                           ---*/
> +/*--- Low level access / modification of the guardpages array.  ---*/
> +/*---                                                           ---*/
> +/*-----------------------------------------------------------------*/

All this should probably be #if defined(VGO_linux)

> +static void guard_page_install ( Addr addr ) {
> +   /* Note that this only installs guard pages into the
> +      guardpages array.  But it doesn't flag hasGuardPages
> +      for segments having guard pages.
> +      That's handled in guard_pages_install() below. */
> +   Addr addr_aligned = addr & ~(VKI_PAGE_SIZE - 1);
> +   if (nguardpages_used >= VG_N_GUARDS) {
> +      VG_(debugLog)(0,"aspacem",
> +                    "ERROR: nguardpages_used limit reached\n");
> +      return;
> +   }

This is probably more fatal, we shouldn't just return here.
For max threads we do:

   VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
               "and rerun valgrind\n");
   VG_(core_panic)("Max number of threads is too low");

> +   // bisect
> +   Int mid = 0,
> +       lo = 0,
> +       hi = nguardpages_used - 1;
> +   while (True) {
> +      if (lo > hi) {
> +         break;
> +      } else {
> +         mid = (lo + hi) / 2;
> +         if (addr_aligned < guardpages[mid]) { hi = mid - 1; continue; }
> +         if (addr_aligned > guardpages[mid]) { lo = mid + 1; continue; }
> +         if (addr_aligned == guardpages[mid]) return; // already there

Can that actually happen? Can you call madvise GUARD_INSTALL on the
same page multiple times?

> +      }
> +   }
> +   // merge in
> +   for (Int i=nguardpages_used; i > lo; i--)
> +      guardpages[i] = guardpages[i-1];
> +   guardpages[lo] = addr_aligned;
> +   nguardpages_used++;
> +}

OK.

> +inline static void guard_pages_install ( Addr addr, SizeT len ) {
> +   Int iLo = find_nsegment_idx(addr);
> +   Int iHi = find_nsegment_idx(addr + len - 1);
> +
> +   // Record the new guard pages in the guardpages array
> +   Int pages = (len - 1) / VKI_PAGE_SIZE + 1;
> +   for (Int i=0; i < pages; i++)
> +      guard_page_install(addr + i * VKI_PAGE_SIZE);
> +
> +   // These 5 should be guaranteed by find_nsegment_idx.
> +   aspacem_assert(0 <= iLo && iLo < nsegments_used);
> +   aspacem_assert(0 <= iHi && iHi < nsegments_used);
> +   aspacem_assert(iLo <= iHi);
> +   aspacem_assert(nsegments[iLo].start <= addr );
> +   aspacem_assert(nsegments[iHi].end   >= addr + len - 1 );

Nice.

> +   // Flag the new guardpages in the nsegments array
> +   for (Int i = iLo; i <= iHi; i++)
> +      nsegments[i].hasGuardPages = True;
> +}

OK.

> +static void guard_page_remove ( Addr addr ) {
> +   /* Note that this only removes guard pages from the
> +      guardpages array.  But it doesn't unflag hasGuardPages
> +      for segments not having any guard pages any more.
> +      That's handled in guard_pages_remove() below. */
> +   Addr addr_aligned = addr & ~(VKI_PAGE_SIZE - 1);
> +   // search
> +   Int mid = 0,
> +       lo = 0,
> +       hi = nguardpages_used - 1;
> +   while (True) {
> +      if (lo > hi) {
> +         VG_(message)(Vg_UserMsg,
> +                      "Removal failed, expected address not found\n");
> +         return;

Should this be an aspacem_assert ?

> +      } else {
> +         mid = (lo + hi) / 2;
> +         if (addr_aligned < guardpages[mid]) { hi = mid - 1; continue; }
> +         if (addr_aligned > guardpages[mid]) { lo = mid + 1; continue; }
> +         if (addr_aligned == guardpages[mid]) break;
> +      }
> +   }
> +   // remove
> +   for(Int i=mid; i<nguardpages_used; i++)
> +      guardpages[i] = guardpages[i+1];
> +   nguardpages_used--;
> +}

OK.

> +inline static void guard_pages_remove ( Addr addr, SizeT len ) {
> +   Int iLo = find_nsegment_idx(addr);
> +   Int iHi = find_nsegment_idx(addr + len - 1);
> +   Bool guardPageSeen __attribute__((unused));

Why unused?

> +   // Reflect the guard pages removal in the guardpages array
> +   Int pages = (len - 1) / VKI_PAGE_SIZE + 1;
> +   for (Int i=0; i < pages; i++)
> +      if (VG_(is_guarded)(addr + i * VKI_PAGE_SIZE))
> +         guard_page_remove (addr + i * VKI_PAGE_SIZE);

Interesting. So you can remove guard pages that were never installed?

> +   // These 5 should be guaranteed by find_nsegment_idx.
> +   aspacem_assert(0 <= iLo && iLo < nsegments_used);
> +   aspacem_assert(0 <= iHi && iHi < nsegments_used);
> +   aspacem_assert(iLo <= iHi);
> +   aspacem_assert(nsegments[iLo].start <= addr );
> +   aspacem_assert(nsegments[iHi].end   >= addr + len - 1 );

Nice again.

> +   // Unflag segments not having any guard pages any more
> +   for (Int i = iLo; i <= iHi; i++) { 
> +      Addr aLo = nsegments[i].start;
> +      Addr aHi = nsegments[i].end;
> +      guardPageSeen = False;
> +      for (Int j = 0; j < nguardpages_used; j++) {
> +         if ((guardpages[j] >= aLo) && (guardpages[j] <= aHi))
> +            guardPageSeen = True;
> +      }
> +      if (guardPageSeen == False)
> +          nsegments[i].hasGuardPages = False;

I think this can be simplified. If start and end completely fall
between addr and addr + len, it doesn't have guard pages. Only when
end is larger than addr + len do we have to check (per page_size) if
there are still guardpages left.

> +   }
> +}
> +
> +static void show_guard_pages (void) {
> +   for (Int i=0; i<nguardpages_used; i++)
> +      VG_(debugLog)(0,"aspacem","guard page: seg=%d id=%d addr=0x%lx\n",
> +                    find_nsegment_idx(guardpages[i]), i, guardpages[i]);
> +   VG_(am_show_nsegments)(0, "aspacem");
> +}

OK, used in am_notify_madv_guard.
Maybe also #if defined(VGO_linux) ?

> +#if defined(VGO_linux)
> +static Bool is_guarded_sanity ( Addr addr )
> +{
> +   static Int      VG_(cl_pagemap_fd) = -1;
> +   if (LIKELY(VG_(cl_pagemap_fd) != -1)) {
> +      // do nothing
> +   } else {
> +      VG_(cl_pagemap_fd) = sr_Res(ML_(am_open)("/proc/self/pagemap", 
> VKI_O_RDONLY, 0 ));
> +         if(VG_(cl_pagemap_fd) == -1)
> +            ML_(am_barf)("I/O error on /proc/self/pagemap");
> +      VG_(cl_pagemap_fd) = VG_(safe_fd)(VG_(cl_pagemap_fd));
> +   }
> +   Addr addr_page_aligned = addr & ~(VKI_PAGE_SIZE - 1);
> +   vki_off_t offset = ((vki_uint64_t)addr_page_aligned / VKI_PAGE_SIZE) * 
> sizeof(vki_uint64_t);
> +   Int ret = ML_(am_lseek) (VG_(cl_pagemap_fd), offset, VKI_SEEK_SET);
> +   if (ret == -1)
> +      ML_(am_barf)("failed lseek in pagemap\n");
> +   // https://docs.kernel.org/admin-guide/mm/pagemap.html
> +   vki_uint64_t entry; // one 64-bit value for each virtual page
> +   ret = ML_(am_read) (VG_(cl_pagemap_fd), &entry, sizeof(vki_uint64_t));
> +   if (ret == -1)
> +      ML_(am_barf)("failed reading pagemap\n");
> +   if (((entry >> 58) & 1) == 1) {
> +      VG_(debugLog)(1, "aspacem",
> +                    "madvise guard page hit at addr 0x%010lx\n", addr);
> +      return True;
> +   }
> +   return False;
> +}
> +#endif

ML_(am_barf) is probably too strong here. It is just a sanity check.
Just warn once and set VG_(cl_pagemap_fd) to -2 or something, and
check that so we don't try again? Also probably easier to do all
warning in this function. So give it a is_guarded boolean to check
against. Then you don't need to check and warn in the callers.

> +Bool VG_(is_guarded) ( Addr addr ) {
> +   Addr addr_aligned = addr & ~(VKI_PAGE_SIZE - 1);
> +   Int mid, 
> +       lo = 0,
> +       hi = nguardpages_used - 1;
> +   while (True) {
> +      if (lo > hi) {
> +         if (LIKELY(VG_(clo_sanity_level) < 3)) {
> +            /* do nothing */
> +         } else {
> +            if (is_guarded_sanity ( addr ) == True)
> +               VG_(debugLog)(1, "aspacem",
> +                             "VG_(is_guarded)() failed (not guarded 
> 0x%lx)\n",
> +                             addr);
> +         }
> +         return False;
> +      }
> +      mid = (lo + hi) / 2;
> +      if (addr_aligned < guardpages[mid]) { hi = mid - 1; continue; }
> +      if (addr_aligned > guardpages[mid]) { lo = mid + 1; continue; }
> +      if (LIKELY(VG_(clo_sanity_level) < 3)) {
> +         /* do nothing */
> +      } else {
> +         if (is_guarded_sanity ( addr ) == False)
> +            VG_(debugLog)(1, "aspacem",
> +                          "VG_(is_guarded)() failed (guarded 0x%lx)\n",
> +                          addr);
> +      }
> +      return True;
> +   }
> +}

It is good to have this as a generic function, but the implementation
should probably be guarded by +#if defined(VGO_linux) with an #else
return False;

> +Bool VG_(is_guarded_addr_len) ( Addr addr, SizeT len ) {
> +   Int pages = (len - 1) / VKI_PAGE_SIZE + 1;
> +   for (Int i=0; i < pages; i++)
> +      if(VG_(is_guarded)(addr + i * VKI_PAGE_SIZE))
> +         return True;
> +   return False;
> +}

Should this be static? What about len == 0? Probably cannot happen,
but then vg_assert?

>  /*-----------------------------------------------------------------*/
>  /*---                                                           ---*/
> @@ -1233,10 +1437,11 @@ ULong VG_(am_get_anonsize_total)( void )
>     belong to in order to be considered "valid".
>  */
>  static
> -Bool is_valid_for( UInt kinds, Addr start, SizeT len, UInt prot )
> +Bool is_valid_for( UInt kinds, Addr start, SizeT len, UInt prot, Bool madv )
>  {
>     Int  i, iLo, iHi;
>     Bool needR, needW, needX;
> +   Bool needGuardPageCheck = False;
>  
>     if (len == 0)
>        return True; /* somewhat dubious case */
> @@ -1267,11 +1472,22 @@ Bool is_valid_for( UInt kinds, Addr start, SizeT len, 
> UInt prot )
>             && (needW ? nsegments[i].hasW : True)
>             && (needX ? nsegments[i].hasX : True) ) {
>           /* ok */
> +#if defined(VGO_linux)
> +           if ( nsegments[i].hasGuardPages ) {
> +              needGuardPageCheck = True;
> +           }
> +#endif
>        } else {
>           return False;
>        }
>     }
>  
> +#if defined(VKI_MADV_GUARD_INSTALL)
> +   if (madv && needGuardPageCheck && VG_(is_guarded)(start)) {
> +      return False;
> +   }
> +#endif
> +
>     return True;
>  }

Pick #if defined(VGO_linux) in both cases.  Do you really need the
Bool madv? A guarded page is not valid for any kind, except when
checking for VKI_PROT_NONE (so no read/write/execute) flag set. So I
think you can use needGuardPageCheck = (prot == VKI_PROT_NONE) here.

> @@ -1283,7 +1499,7 @@ Bool VG_(am_is_valid_for_client)( Addr start, SizeT len,
>  {
>     const UInt kinds = SkFileC | SkAnonC | SkShmC;
>  
> -   return is_valid_for(kinds, start, len, prot);
> +   return is_valid_for(kinds, start, len, prot, True);
>  }

See above, do you need to pass True here?

>  /* Variant of VG_(am_is_valid_for_client) which allows free areas to
> @@ -1291,11 +1507,11 @@ Bool VG_(am_is_valid_for_client)( Addr start, SizeT 
> len,
>     considers reservations to be allowable, since from the client's
>     point of view they don't exist. */
>  Bool VG_(am_is_valid_for_client_or_free_or_resvn)
> -   ( Addr start, SizeT len, UInt prot )
> +   ( Addr start, SizeT len, UInt prot, Bool madv )
>  {
>     const UInt kinds = SkFileC | SkAnonC | SkShmC | SkFree | SkResvn;
>  
> -   return is_valid_for(kinds, start, len, prot);
> +   return is_valid_for(kinds, start, len, prot, madv);
>  }

And here. Need to pass madv?

>  /* Checks if a piece of memory consists of either free or reservation
> @@ -1304,7 +1520,7 @@ Bool VG_(am_is_free_or_resvn)( Addr start, SizeT len )
>  {
>     const UInt kinds = SkFree | SkResvn;
>  
> -   return is_valid_for(kinds, start, len, 0);
> +   return is_valid_for(kinds, start, len, 0, True);
>  }

And here.

>  
> @@ -1312,7 +1528,7 @@ Bool VG_(am_is_valid_for_valgrind) ( Addr start, SizeT 
> len, UInt prot )
>  {
>     const UInt kinds = SkFileV | SkAnonV;
>  
> -   return is_valid_for(kinds, start, len, prot);
> +   return is_valid_for(kinds, start, len, prot, True);
>  }
>  

And here.

> @@ -1969,6 +2185,19 @@ Addr VG_(am_startup) ( Addr sp_at_startup )
>  
>     VG_(am_show_nsegments)(2, "With contents of /proc/self/maps");
>  
> +   /* With glibc upstream commit a6fbe36b7f31 and others, on x86_64,
> +      a new madvise(MADV_GUARD_INSTALL ... ) guard page is installed for
> +      each new thread. In the future, MADV_GUARD_INSTALL is likely to
> +      be used with DSOs supporting multiple kernel page sizes.  A rough
> +      estimation of max madvise guard page count is Nthreads + 3 * DSOcnt.
> +      Madvise guard pages are tracked in the guardpages array below. The
> +      array size is set via --max-guard-pages or --max-threads: */
> +   VG_N_GUARDS =
> +      (VG_(clo_max_guard_pages) == MAX_GUARDS_DEFAULT)
> +         ? VG_(clo_max_threads)
> +         : VG_(clo_max_guard_pages);
> +   guardpages = VG_(calloc)("aspacem.guardpages", VG_N_GUARDS, sizeof(Addr));

Note that this might give unexpected results if someone does
--max-threads=12 --max-guard-pages=500 (assuming MAX_GUARDS_DEFAULT 500).

>     AM_SANITY_CHECK;
>     return suggested_clstack_end;
>  }
> @@ -2434,6 +2663,42 @@ Bool VG_(am_notify_mprotect)( Addr start, SizeT len, 
> UInt prot )
>     return needDiscard;
>  }
>  
> +/* Notifiy aspacem about madvise(MADV_GUARD_INSTALL), bug 514297 */
> +Bool VG_(am_notify_madv_guard)( Addr start, SizeT len, Bool install )
> +{
> +   aspacem_assert(VG_IS_PAGE_ALIGNED(start));
> +   aspacem_assert(VG_IS_PAGE_ALIGNED(len));
> +
> +   if (len == 0)
> +      return False;
> +
> +   if (install) {
> +      VG_(debugLog)(1, "aspacem",
> +                    "installing guard pages (addr=0x%lx, len=0x%lx)\n",
> +                    start, len);
> +      guard_pages_install(start, len);
> +   } else {
> +      VG_(debugLog)(1, "aspacem",
> +                    "removing guard pages (addr=0x%lx, len=0x%lx)\n",
> +                    start, len);
> +      guard_pages_remove(start, len);
> +   }

OK, generic function, but should guard_pages_install and
guard_pages_remove be #if defined(VGO_linux) ?

> +   if (VG_(clo_sanity_level) >= 3)
> +      show_guard_pages(); 
> +
> +   AM_SANITY_CHECK;

Should show_guard_pages () be moved into AM_SANIY_CHECK ?

> +   if (VG_(clo_sanity_level) >= 3)
> +      VG_(debugLog)(0, "aspacem",
> +                    "madv_guard sanity_level 3 checks passed\n");
> +
> +
> +   // The return val determines whether translations will be discarded.
> +   // That is supposed to happen when guard page is installed, but not
> +   // otherwise.
> +   return install;
> +}
>  
>  /* Notifies aspacem that an munmap completed successfully.  The
>     segment array is updated accordingly.  As with
> @@ -2480,6 +2745,10 @@ Bool VG_(am_notify_munmap)( Addr start, SizeT len )
>  #endif
>     add_segment( &seg );
>  
> +   /* Unmapping drops guard pages (if present) */
> +   if(VG_(is_guarded_addr_len)( start, len ))
> +      guard_pages_remove( start, len );

Wrap in #if defined(VGO_linux) ?

>     /* Unmapping could create two adjacent free segments, so a preen is
>        needed.  add_segment() will do that, so no need to here. */
>     AM_SANITY_CHECK;
> @@ -2980,7 +3249,7 @@ SysRes am_munmap_both_wrk ( /*OUT*/Bool* need_discard,
>  
>     if (forClient) {
>        if (!VG_(am_is_valid_for_client_or_free_or_resvn)
> -            ( start, len, VKI_PROT_NONE ))
> +            ( start, len, VKI_PROT_NONE, False ))
>           goto eINVAL;

You are passing in VKI_PROT_NONE here, so I don't think you need to
add False for madv checks. See above.

>     } else {
>        if (!VG_(am_is_valid_for_valgrind)
> @@ -3747,6 +4016,41 @@ static void parse_procselfmaps (
>  
>     if (record_gap && gapStart < Addr_MAX)
>        (*record_gap) ( gapStart, Addr_MAX - gapStart + 1 );
> +
> +   // Iterate over guard pages
> +   for (i = 0; i<nguardpages_used; i++) {
> +      // Check if every guard page in V's evidence has respective
> +      // record in kernel's evidence.
> +      if (!is_guarded_sanity(guardpages[i])) {
> +         VG_(debugLog)(0, "Valgrind:",
> +                          "FATAL: failed guard page sanity check at address 
> 0x%lx.\n", guardpages[i]);
> +         ML_(am_exit)(1);
> +      }
> +      // Make sure that every guard page belongs to a segment
> +      // flagged with hasGuardPages.
> +      if(nsegments[find_nsegment_idx(guardpages[i])].hasGuardPages == False) 
> {
> +         VG_(debugLog)(0, "Valgrind:",
> +                          "FATAL: failed guard page sanity check2 at address 
> 0x%lx.\n", guardpages[i]);
> +         ML_(am_exit)(1);
> +      }
> +   }

Wrap in #if defined(VGO_linux).

> +   // Iterate over segments.  For each segment flagged with hasGuardPages
> +   // make sure that it actually contains at least one guard page.
> +   for (i = 0; i < nsegments_used; i++) {
> +      if (nsegments[i].hasGuardPages == True) {
> +         Bool guardPageSeen = False;
> +         for (Addr a = nsegments[i].start; a < nsegments[i].end; a += 
> VKI_PAGE_SIZE) {
> +            if (VG_(is_guarded)(a) == True)
> +               guardPageSeen = True;
> +         }
> +         if (guardPageSeen == False) {
> +            VG_(debugLog)(0, "Valgrind:",
> +                          "FATAL: no guard page found for segment %d\n", i);
> +            ML_(am_exit)(1);
> +         }
> +      }
> +   }

Same.

>  }
>  
>  /*------END-procmaps-parser-for-Linux----------------------------*/
> diff --git a/coregrind/m_aspacemgr/priv_aspacemgr.h 
> b/coregrind/m_aspacemgr/priv_aspacemgr.h
> index 338b3545b..634ba4271 100644
> --- a/coregrind/m_aspacemgr/priv_aspacemgr.h
> +++ b/coregrind/m_aspacemgr/priv_aspacemgr.h
> @@ -51,7 +51,7 @@
>                                   // VG_(mk_SysRes_Error)
>                                   // VG_(mk_SysRes_Success)
>  
> -#include "pub_core_options.h"    // VG_(clo_sanity_level)
> +#include "pub_core_options.h"    // VG_(clo_sanity_level), 
> VG_(clo_max_guard_pages)
>  
>  #if defined(VGO_freebsd)
>  #include "pub_core_libcproc.h"   // VG_(sysctlbyname)
> @@ -62,6 +62,10 @@
>  #include "pub_core_mach.h"       // macos support
>  #endif
>  
> +#if defined(VGO_linux)
> +#include "../pub_core_libcfile.h" // VG_(safe_fd)
> +#include "pub_tool_mallocfree.h"  // VG_(calloc)
> +#endif

See other comment. include pub_core for both.

>  /* --------------- Implemented in aspacemgr-common.c ---------------*/
>  
> @@ -119,6 +123,7 @@ extern void   ML_(am_close) ( Int fd );
>  extern Int    ML_(am_read)  ( Int fd, void* buf, Int count);
>  extern Int    ML_(am_readlink) ( const HChar* path, HChar* buf, UInt bufsiz 
> );
>  extern Int    ML_(am_fcntl) ( Int fd, Int cmd, Addr arg );
> +extern Int    ML_(am_lseek) ( Int fd, vki_off_t off, Int whence );

Ack. new util function.

>  /* Get the dev, inode and mode info for a file descriptor, if
>     possible.  Returns True on success. */
> diff --git a/coregrind/m_main.c b/coregrind/m_main.c
> index 5cf629f77..c473f2d3a 100644
> --- a/coregrind/m_main.c
> +++ b/coregrind/m_main.c
> @@ -252,8 +252,10 @@ static void usage_NORETURN ( int need_help )
>  "                  recovered by stack scanning [5]\n"
>  "    --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]\n"
>  "              attempt to avoid expensive address-space-resync operations\n"
> -"    --max-threads=<number>    maximum number of threads that valgrind can\n"
> -"                              handle [%d]\n"
> +"    --max-threads=<number>     maximum number of threads that valgrind 
> can\n"
> +"                               handle [%d]\n"
> +"    --max-guard-pages=<number> maximum number of madvise guard pages that\n"
> +"                               valgrind can handle [%d]\n"
>  "\n";
>  
>     const HChar usage2[] =
> @@ -379,7 +381,8 @@ static void usage_NORETURN ( int need_help )
>                    VG_(clo_vgdb_poll)         /* int */,
>                    VG_(vgdb_prefix_default)() /* char* */,
>                    N_SECTORS_DEFAULT          /* int */,
> -                  MAX_THREADS_DEFAULT        /* int */
> +                  MAX_THREADS_DEFAULT        /* int */,
> +                  MAX_GUARDS_DEFAULT         /* int */
>                 );

OK.

>     if (need_help > 1 && VG_(details).name) {
>        VG_(printf)("  user options for %s:\n", VG_(details).name);
> @@ -513,6 +516,8 @@ static void process_option (Clo_Mode mode,
>  
>     // Set up VG_(clo_max_threads); needed for VG_(tl_pre_clo_init)
>     else if VG_INT_CLOM(cloE, arg, "--max-threads", VG_(clo_max_threads)) {}
> +   // Set up VG_(clo_max_guard_pages); needed for aspacemgr init
> +   else if VG_INT_CLOM(cloE, arg, "--max-guard-pages", 
> VG_(clo_max_guard_pages)) {}
>  
>     // Set up VG_(clo_sim_hints). This is needed a.o. for an inner
>     // running in an outer, to have "no-inner-prefix" enabled
> @@ -1333,8 +1338,8 @@ Int valgrind_main ( Int argc, HChar **argv, HChar 
> **envp )
>     /* Start the debugging-log system ASAP.  First find out how many
>        "-d"s were specified.  This is a pre-scan of the command line.  Also
>        get --profile-heap=yes, --core-redzone-size, --redzone-size
> -      --aspace-minaddr which are needed by the time we start up dynamic
> -      memory management.  */
> +      --aspace-minaddr, --max-guard-pages  which are needed by the time
> +      we start up dynamic memory management.  */
>     loglevel = 0;
>     for (i = 1; i < argc; i++) {
>        const HChar* tmp_str;
> @@ -1355,6 +1360,8 @@ Int valgrind_main ( Int argc, HChar **argv, HChar 
> **envp )
>                                                     &errmsg))
>              VG_(fmsg_bad_option)(argv[i], "%s\n", errmsg);
>        }
> +      if VG_INT_CLOM(cloE, argv[i], "--max-threads", VG_(clo_max_threads)) {}
> +      if VG_INT_CLOM(cloE, argv[i], "--max-guard-pages", 
> VG_(clo_max_guard_pages)) {}
>     }
>  
>     /* ... and start the debug logger.  Now we can safely emit logging

OK.

> diff --git a/coregrind/m_options.c b/coregrind/m_options.c
> index 612ef8d8b..faf095b21 100644
> --- a/coregrind/m_options.c
> +++ b/coregrind/m_options.c
> @@ -188,6 +188,7 @@ Bool   VG_(clo_keep_debuginfo) = False;
>  Bool   VG_(clo_show_emwarns)   = False;
>  Word   VG_(clo_max_stackframe) = 2000000;
>  UInt   VG_(clo_max_threads)    = MAX_THREADS_DEFAULT;
> +UInt   VG_(clo_max_guard_pages) = MAX_GUARDS_DEFAULT;
>  Word   VG_(clo_main_stacksize) = 0; /* use client's rlimit.stack */
>  Word   VG_(clo_valgrind_stacksize) = VG_DEFAULT_STACK_ACTIVE_SZB;
>  Bool   VG_(clo_wait_for_gdb)   = False;

OK.

> diff --git a/coregrind/m_syswrap/priv_syswrap-generic.h 
> b/coregrind/m_syswrap/priv_syswrap-generic.h
> index ec38236ea..becadeac9 100644
> --- a/coregrind/m_syswrap/priv_syswrap-generic.h
> +++ b/coregrind/m_syswrap/priv_syswrap-generic.h
> @@ -101,6 +101,9 @@ ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len );
>  extern void 
>  ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot );
>  
> +extern void
> +ML_(notify_core_and_tool_of_madv_guard) ( Addr a, SizeT len, Bool install );
> +

OK. Generic, even though implementation currently only for linux.

>  extern void
>  ML_(pre_mem_read_sockaddr) ( ThreadId tid, const HChar *description,
>                               struct vki_sockaddr *sa, UInt salen );
> diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c 
> b/coregrind/m_syswrap/syswrap-amd64-linux.c
> index c31a44d44..e8904d7b5 100644
> --- a/coregrind/m_syswrap/syswrap-amd64-linux.c
> +++ b/coregrind/m_syswrap/syswrap-amd64-linux.c
> @@ -503,7 +503,7 @@ static SyscallTableEntry syscall_table[] = {
>     GENX_(__NR_mremap,            sys_mremap),         // 25 
>     GENX_(__NR_msync,             sys_msync),          // 26 
>     GENXY(__NR_mincore,           sys_mincore),        // 27 
> -   GENX_(__NR_madvise,           sys_madvise),        // 28 
> +   GENXY(__NR_madvise,           sys_madvise),        // 28 
>     LINX_(__NR_shmget,            sys_shmget),         // 29 
>  
>     LINXY(__NR_shmat,             sys_shmat),          // 30 
> diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c 
> b/coregrind/m_syswrap/syswrap-arm-linux.c
> index dae613e1e..c05000c15 100644
> --- a/coregrind/m_syswrap/syswrap-arm-linux.c
> +++ b/coregrind/m_syswrap/syswrap-arm-linux.c
> @@ -813,7 +813,7 @@ static SyscallTableEntry syscall_main_table[] = {
>     LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
>     LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
>     GENXY(__NR_mincore,           sys_mincore),        // 218
> -   GENX_(__NR_madvise,           sys_madvise),        // 219
> +   GENXY(__NR_madvise,           sys_madvise),        // 219
>  
>     GENXY(__NR_getdents64,        sys_getdents64),     // 220
>     LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
> diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c 
> b/coregrind/m_syswrap/syswrap-arm64-linux.c
> index 8191b26d0..4b3fa4a49 100644
> --- a/coregrind/m_syswrap/syswrap-arm64-linux.c
> +++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
> @@ -781,7 +781,7 @@ static SyscallTableEntry syscall_main_table[] = {
>     GENX_(__NR_mlockall,          sys_mlockall),          // 230
>     LINX_(__NR_munlockall,        sys_munlockall),        // 231
>     GENXY(__NR_mincore,           sys_mincore),           // 232
> -   GENX_(__NR_madvise,           sys_madvise),           // 233
> +   GENXY(__NR_madvise,           sys_madvise),           // 233
>     LINX_(__NR_remap_file_pages,  sys_remap_file_pages),  // 234
>     LINX_(__NR_mbind,             sys_mbind),             // 235
>     LINXY(__NR_get_mempolicy,     sys_get_mempolicy),     // 236

OK, for all linux variants.

> diff --git a/coregrind/m_syswrap/syswrap-darwin.c 
> b/coregrind/m_syswrap/syswrap-darwin.c
> index 69360a5dd..7ff01ed0e 100644
> --- a/coregrind/m_syswrap/syswrap-darwin.c
> +++ b/coregrind/m_syswrap/syswrap-darwin.c
> @@ -11628,7 +11628,7 @@ const SyscallTableEntry ML_(syscall_table)[] = {
>     _____(VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(72)),    // old vadvise
>     GENXY(__NR_munmap,      sys_munmap), 
>     GENXY(__NR_mprotect,    sys_mprotect), 
> -   GENX_(__NR_madvise,     sys_madvise), 
> +   GENXY(__NR_madvise,     sys_madvise), 
>     _____(VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(76)),    // old vhangup
>     _____(VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(77)),    // old vlimit
>     GENXY(__NR_mincore,     sys_mincore),

Might be left as just GENX_.

> diff --git a/coregrind/m_syswrap/syswrap-freebsd.c 
> b/coregrind/m_syswrap/syswrap-freebsd.c
> index d81e91673..015aee1f0 100644
> --- a/coregrind/m_syswrap/syswrap-freebsd.c
> +++ b/coregrind/m_syswrap/syswrap-freebsd.c
> @@ -7488,7 +7488,7 @@ const SyscallTableEntry ML_(syscall_table)[] = {
>     // freebsd11 vadvise                                    72
>     GENXY(__NR_munmap,           sys_munmap),            // 73
>     GENXY(__NR_mprotect,         sys_mprotect),          // 74
> -   GENX_(__NR_madvise,          sys_madvise),           // 75
> +   GENXY(__NR_madvise,          sys_madvise),           // 75
>  
>     // obsol vhangup                                        76
>     // obsol vlimit                                         77

Likewise.

> diff --git a/coregrind/m_syswrap/syswrap-generic.c 
> b/coregrind/m_syswrap/syswrap-generic.c
> index 85d584e4c..cce8d36a2 100644
> --- a/coregrind/m_syswrap/syswrap-generic.c
> +++ b/coregrind/m_syswrap/syswrap-generic.c
> @@ -109,12 +109,21 @@ Bool ML_(valid_client_addr)(Addr start, SizeT size, 
> ThreadId tid,
>                                     const HChar *syscallname)
>  {
>     Bool ret;
> +   Bool madv = True;
>  
>     if (size == 0)
>        return True;
>  
> +   /* Munmap may remove a guard page per man 2 madvise.
> +      More syscalls might need handling here, such as
> +      possibly mmap() with MAP_FIXED, mremap() or
> +      mprotect. */
> +   if (VG_(strcmp)(syscallname, "munmap") == 0) {
> +      madv = False;
> +   }
> +
>     ret = VG_(am_is_valid_for_client_or_free_or_resvn)
> -            (start,size,VKI_PROT_NONE);
> +            (start,size,VKI_PROT_NONE, madv);

Since passing in VKI_PROT_NONE I don't think you need to pass madv
here. Which would also get rid if the special syscallname checks
above.

>     if (0)
>        VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
> @@ -270,7 +279,18 @@ ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT 
> len, Int prot )
>                                   "ML_(notify_core_and_tool_of_mprotect)" );
>  }
>  
> -
> +void
> +ML_(notify_core_and_tool_of_madv_guard) ( Addr a, SizeT len, Bool install )
> +{
> +   page_align_addr_and_len(&a, &len);
> +   if (install) {
> +      if (VG_(am_notify_madv_guard)( a, len, True ))
> +         VG_(discard_translations)( a, (ULong)len,
> +                                    
> "ML_(notify_core_and_tool_of_madv_guard)" );
> +   } else {
> +      VG_(am_notify_madv_guard)(a, len, False);
> +   }
> +}

OK.

>  #if HAVE_MREMAP
>  /* Expand (or shrink) an existing mapping, potentially moving it at
> @@ -3112,15 +3132,24 @@ PRE(sys_madvise)
>                          ARG1, ARG2, SARG3);
>     PRE_REG_READ3(long, "madvise",
>                   unsigned long, start, vki_size_t, length, int, advice);
> -   /* Ugly hack to try to bypass the problem of guard pages not being
> -      understood by valgrind aspace manager.
> -      By making the syscall fail, we expect glibc to fallback
> -      on implementing guard pages with mprotect PROT_NONE to ensure
> -      the valgrind address space manager is not confused wrongly
> -      believing the guard page is rw. */
> +}

OK.

> +POST(sys_madvise)
> +{
>  #ifdef VKI_MADV_GUARD_INSTALL
> -   if (ARG3 == VKI_MADV_GUARD_INSTALL)
> -      SET_STATUS_Failure( VKI_EINVAL );
> +   if (ARG3 == VKI_MADV_GUARD_INSTALL) {
> +      // SET_STATUS_Failure( VKI_EINVAL );
> +      Addr a    = ARG1;
> +      SizeT len = ARG2;
> +      ML_(notify_core_and_tool_of_madv_guard)(a, len, True);
> +   }
> +#endif
> +#ifdef VKI_MADV_GUARD_REMOVE
> +   if (ARG3 == VKI_MADV_GUARD_REMOVE) {
> +      Addr a    = ARG1;
> +      SizeT len = ARG2;
> +      ML_(notify_core_and_tool_of_madv_guard)(a, len, False);
> +   }
>  #endif
>  }

Wrap the whole POST in #if defined(VGO_linux). Then it is only valid
XY for linux and other OSes can keep using X_.

> @@ -4737,7 +4766,6 @@ POST(sys_munmap)
>  {
>     Addr  a   = ARG1;
>     SizeT len = ARG2;
> -
>     ML_(notify_core_and_tool_of_munmap)( a, len );
>  }

Spurious removal of empty line.

> diff --git a/coregrind/m_syswrap/syswrap-linux.c 
> b/coregrind/m_syswrap/syswrap-linux.c
> index 6853e5e94..dfe1b528f 100644
> --- a/coregrind/m_syswrap/syswrap-linux.c
> +++ b/coregrind/m_syswrap/syswrap-linux.c
> @@ -6352,7 +6352,8 @@ PRE(sys_openat)
>                      int, dirfd, const char *, pathname, int, flags);
>     }
>  
> -   PRE_MEM_RASCIIZ( "openat(pathname)", ARG2 );
> +   if (ML_(safe_to_deref)( (void*)(Addr)ARG2, 1 ))
> +      PRE_MEM_RASCIIZ( "openat(pathname)", ARG2 );
>  
>     // check that we are not trying to open the client exe for writing
>     if ((ARG3 & VKI_O_WRONLY) ||

Lets get rid of this here and file a bug to investigate all
PRE_MEM_RASCIIZ calls/macros.

> diff --git a/coregrind/m_syswrap/syswrap-mips32-linux.c 
> b/coregrind/m_syswrap/syswrap-mips32-linux.c
> index d6e651139..c30f162e2 100644
> --- a/coregrind/m_syswrap/syswrap-mips32-linux.c
> +++ b/coregrind/m_syswrap/syswrap-mips32-linux.c
> @@ -980,7 +980,7 @@ static SyscallTableEntry syscall_main_table[] = {
>     PLAXY (__NR_fstat64,                sys_fstat64),                 // 215
>     LINX_ (__NR_pivot_root,             sys_pivot_root),              // 216
>     GENXY (__NR_mincore,                sys_mincore),                 // 217
> -   GENX_ (__NR_madvise,                sys_madvise),                 // 218
> +   GENXY (__NR_madvise,                sys_madvise),                 // 218
>     GENXY (__NR_getdents64,             sys_getdents64),              // 219
>     LINXY (__NR_fcntl64,                sys_fcntl64),                 // 220
>     //..
> diff --git a/coregrind/m_syswrap/syswrap-mips64-linux.c 
> b/coregrind/m_syswrap/syswrap-mips64-linux.c
> index 6930a924d..027aef4e8 100644
> --- a/coregrind/m_syswrap/syswrap-mips64-linux.c
> +++ b/coregrind/m_syswrap/syswrap-mips64-linux.c
> @@ -495,7 +495,7 @@ static SyscallTableEntry syscall_main_table[] = {
>     GENX_ (__NR_mremap, sys_mremap),
>     GENX_ (__NR_msync, sys_msync),
>     GENXY (__NR_mincore, sys_mincore),
> -   GENX_ (__NR_madvise, sys_madvise),
> +   GENXY (__NR_madvise, sys_madvise),
>     LINX_ (__NR_shmget, sys_shmget),
>     LINXY (__NR_shmat, sys_shmat),
>     LINXY (__NR_shmctl, sys_shmctl),
> diff --git a/coregrind/m_syswrap/syswrap-nanomips-linux.c 
> b/coregrind/m_syswrap/syswrap-nanomips-linux.c
> index c42f16166..dcdd6d1a4 100644
> --- a/coregrind/m_syswrap/syswrap-nanomips-linux.c
> +++ b/coregrind/m_syswrap/syswrap-nanomips-linux.c
> @@ -760,7 +760,7 @@ static SyscallTableEntry syscall_main_table[] = {
>     GENX_ (__NR_mlockall,               sys_mlockall),
>     LINX_ (__NR_munlockall,             sys_munlockall),
>     GENXY (__NR_mincore,                sys_mincore),
> -   GENX_ (__NR_madvise,                sys_madvise),
> +   GENXY (__NR_madvise,                sys_madvise),
>     LINX_ (__NR_mbind,                  sys_mbind),
>     LINXY (__NR_get_mempolicy,          sys_get_mempolicy),
>     LINX_ (__NR_set_mempolicy,          sys_set_mempolicy),
> diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c 
> b/coregrind/m_syswrap/syswrap-ppc32-linux.c
> index 78bf2de68..f4418749c 100644
> --- a/coregrind/m_syswrap/syswrap-ppc32-linux.c
> +++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c
> @@ -861,7 +861,7 @@ static SyscallTableEntry syscall_table[] = {
>     GENXY(__NR_getdents64,        sys_getdents64),        // 202
>     LINX_(__NR_pivot_root,        sys_pivot_root),        // 203
>     LINXY(__NR_fcntl64,           sys_fcntl64),           // 204
> -   GENX_(__NR_madvise,           sys_madvise),           // 205
> +   GENXY(__NR_madvise,           sys_madvise),           // 205
>     GENXY(__NR_mincore,           sys_mincore),           // 206
>     LINX_(__NR_gettid,            sys_gettid),            // 207
>     LINXY(__NR_tkill,             sys_tkill),             // 208 */Linux
> diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c 
> b/coregrind/m_syswrap/syswrap-ppc64-linux.c
> index 3689e8a9d..33390d122 100644
> --- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
> +++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
> @@ -846,7 +846,7 @@ static SyscallTableEntry syscall_table[] = {
>     LINX_(__NR_pivot_root,        sys_pivot_root),         // 203
>     LINXY(__NR_fcntl64,           sys_fcntl64),            // 204 !!!!?? 
> 32bit only */
>  
> -   GENX_(__NR_madvise,           sys_madvise),            // 205
> +   GENXY(__NR_madvise,           sys_madvise),            // 205
>     GENXY(__NR_mincore,           sys_mincore),            // 206
>     LINX_(__NR_gettid,            sys_gettid),             // 207
>     LINXY(__NR_tkill,             sys_tkill),              // 208
> diff --git a/coregrind/m_syswrap/syswrap-riscv64-linux.c 
> b/coregrind/m_syswrap/syswrap-riscv64-linux.c
> index 4d0ab7916..4f0c0737a 100644
> --- a/coregrind/m_syswrap/syswrap-riscv64-linux.c
> +++ b/coregrind/m_syswrap/syswrap-riscv64-linux.c
> @@ -539,7 +539,7 @@ static SyscallTableEntry syscall_main_table[] = {
>     GENX_(__NR_mlockall, sys_mlockall),                             /* 230 */
>     LINX_(__NR_munlockall, sys_munlockall),                         /* 231 */
>     GENXY(__NR_mincore, sys_mincore),                               /* 232 */
> -   GENX_(__NR_madvise, sys_madvise),                               /* 233 */
> +   GENXY(__NR_madvise, sys_madvise),                               /* 233 */
>     LINX_(__NR_remap_file_pages, sys_remap_file_pages),             /* 234 */
>     LINX_(__NR_mbind, sys_mbind),                                   /* 235 */
>     LINXY(__NR_get_mempolicy, sys_get_mempolicy),                   /* 236 */
> diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c 
> b/coregrind/m_syswrap/syswrap-s390x-linux.c
> index dcec560d7..50afe56e5 100644
> --- a/coregrind/m_syswrap/syswrap-s390x-linux.c
> +++ b/coregrind/m_syswrap/syswrap-s390x-linux.c
> @@ -673,7 +673,7 @@ static SyscallTableEntry syscall_table[] = {
>     LINX_(__NR_setfsgid, sys_setfsgid),                                // 216
>     LINX_(__NR_pivot_root, sys_pivot_root),                            // 217
>     GENXY(__NR_mincore, sys_mincore),                                  // 218
> -   GENX_(__NR_madvise,  sys_madvise),                                 // 219
> +   GENXY(__NR_madvise,  sys_madvise),                                 // 219
>  
>     GENXY(__NR_getdents64,  sys_getdents64),                           // 220
>     GENX_(221, sys_ni_syscall), /* unimplemented (by the kernel) */    // 221
> diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c 
> b/coregrind/m_syswrap/syswrap-x86-linux.c
> index af6679747..610515e34 100644
> --- a/coregrind/m_syswrap/syswrap-x86-linux.c
> +++ b/coregrind/m_syswrap/syswrap-x86-linux.c
> @@ -1422,7 +1422,7 @@ static SyscallTableEntry syscall_table[] = {
>     LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
>     LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
>     GENXY(__NR_mincore,           sys_mincore),        // 218
> -   GENX_(__NR_madvise,           sys_madvise),        // 219
> +   GENXY(__NR_madvise,           sys_madvise),        // 219
>  
>     GENXY(__NR_getdents64,        sys_getdents64),     // 220
>     LINXY(__NR_fcntl64,           sys_fcntl64),        // 221

OK, all linux go GENXY.

> diff --git a/coregrind/pub_core_aspacemgr.h b/coregrind/pub_core_aspacemgr.h
> index 070fa5495..fdc670513 100644
> --- a/coregrind/pub_core_aspacemgr.h
> +++ b/coregrind/pub_core_aspacemgr.h
> @@ -90,7 +90,7 @@ extern Bool VG_(am_is_valid_for_valgrind)
>     considers reservations to be allowable, since from the client's
>     point of view they don't exist. */
>  extern Bool VG_(am_is_valid_for_client_or_free_or_resvn)
> -   ( Addr start, SizeT len, UInt prot );
> +   ( Addr start, SizeT len, UInt prot, Bool madv );

No need to add Bool madv. See above.

>  /* Checks if a piece of memory consists of either free or reservation
>     segments. */
> @@ -119,6 +119,14 @@ extern void VG_(am_show_nsegments) ( Int logLevel, const 
> HChar* who );
>  extern Bool VG_(am_do_sync_check) ( const HChar* fn, 
>                                      const HChar* file, Int line );
>  
> +/* VG_(is_guarded) checks if address is part of a madvise
> +   MADV_GUARD_INSTALL guard page */
> +extern Bool VG_(is_guarded) ( Addr addr );

OK generic, just implemented for linux for now.

> +/* VG_(is_guarded_addr_len) checks if interval of addresses
> +   containa MADV_GUARD_INSTALL guard page */
> +extern Bool VG_(is_guarded_addr_len) ( Addr addr, SizeT len );

This is only used inside aspacemgr-linux.c so could probably just be
static there.

>  //--------------------------------------------------------------
>  // Functions pertaining to the central query-notify mechanism
>  // used to handle mmap/munmap/mprotect resulting from client
> @@ -187,6 +195,10 @@ extern Bool VG_(am_notify_client_shmat)( Addr a, SizeT 
> len, UInt prot );
>     range. */
>  extern Bool VG_(am_notify_mprotect)( Addr start, SizeT len, UInt prot );
>  
> +/* Bug 514297: Notifies aspacem about madvise(MADV_GUARD_*) */
> +extern Bool VG_(am_notify_madv_guard) ( Addr start, SizeT len, Bool install 
> );
> +

OK generic, just implemented for linux for now.

>  /* Notifies aspacem that an munmap completed successfully.  The
>     segment array is updated accordingly.  As with
>     VG_(am_notify_mprotect), we merely record the given info, and don't
> diff --git a/coregrind/pub_core_options.h b/coregrind/pub_core_options.h
> index 6462ae90c..f6b84ea8c 100644
> --- a/coregrind/pub_core_options.h
> +++ b/coregrind/pub_core_options.h
> @@ -326,6 +326,10 @@ extern Word VG_(clo_main_stacksize);
>  #define MAX_THREADS_DEFAULT 500
>  extern UInt VG_(clo_max_threads);
>  
> +/* The maximum number of madvise guard pages we support. */
> +#define MAX_GUARDS_DEFAULT 500
> +extern UInt VG_(clo_max_guard_pages);
> +
>  /* If the same IP is found twice in a backtrace in a sequence of max
>     VG_(clo_merge_recursive_frames) frames, then the recursive call
>     is merged in the backtrace.

OK.

> diff --git a/include/pub_tool_aspacemgr.h b/include/pub_tool_aspacemgr.h
> index 2f4775d6e..cefd595af 100644
> --- a/include/pub_tool_aspacemgr.h
> +++ b/include/pub_tool_aspacemgr.h
> @@ -113,6 +113,9 @@ typedef
>  #if defined(VGO_freebsd)
>        Bool    isFF;     // True --> is a fixed file mapping
>        Bool    ignore_offset; // True --> we can't work out segment offset
> +#endif
> +#if defined(VGO_linux)
> +      Bool    hasGuardPages; // True --> contains guard page (bug 514297)
>  #endif
>     }
>     NSegment;

OK.

> diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
> index 1ce689bc7..c6ebe52b2 100644
> --- a/memcheck/tests/Makefile.am
> +++ b/memcheck/tests/Makefile.am
> @@ -88,6 +88,8 @@ dist_noinst_SCRIPTS = \
>       filter_xml_leak \
>       filter_strchr \
>       filter_varinfo3 \
> +     filter_madv_guard \
> +     filter_madv_guard_stderr \
>       filter_memcheck \
>       filter_malloc_free \
>       filter_sendmsg \
> @@ -270,6 +272,7 @@ EXTRA_DIST = \
>       long_namespace_xml.vgtest long_namespace_xml.stdout.exp \
>       long_namespace_xml.stderr.exp long_namespace_xml.stderr.exp-freebsd \
>       long-supps.vgtest long-supps.supp \
> +     madv_guard.stderr.exp madv_guard.stdout.exp madv_guard.vgtest \
>       mallinfo.stderr.exp mallinfo.vgtest \
>       mallinfo2.stderr.exp mallinfo2.vgtest \
>       malloc_free_fill.vgtest \
> @@ -530,6 +533,7 @@ check_PROGRAMS = \
>       leak-tree \
>       leak-segv-jmp \
>       long-supps \
> +     madv_guard \
>       mallinfo \
>       mallinfo2 \
>       malloc_free_fill \
> diff --git a/memcheck/tests/filter_madv_guard 
> b/memcheck/tests/filter_madv_guard
> new file mode 100755
> index 000000000..5b89af707
> --- /dev/null
> +++ b/memcheck/tests/filter_madv_guard
> @@ -0,0 +1,3 @@
> +#! /bin/sh
> +
> +sed 's/0x[0-f][0-f]*/0x.../g'
> diff --git a/memcheck/tests/filter_madv_guard_stderr 
> b/memcheck/tests/filter_madv_guard_stderr
> new file mode 100755
> index 000000000..59b3408ae
> --- /dev/null
> +++ b/memcheck/tests/filter_madv_guard_stderr
> @@ -0,0 +1,3 @@
> +#! /bin/sh
> +
> +grep -o 'aspacem madv_guard sanity_level 3 checks passed'

As you already said, this should move to memcheck/tests/linux.

> diff --git a/memcheck/tests/madv_guard.c b/memcheck/tests/madv_guard.c
> new file mode 100644
> index 000000000..948220f2a
> --- /dev/null
> +++ b/memcheck/tests/madv_guard.c
> @@ -0,0 +1,91 @@
> +#define _GNU_SOURCE
> +#include <stdio.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <string.h>
> +#include <errno.h>
> +
> +int fd = -1; // for /proc/self/pagemap
> +size_t ps = 0; // page size
> +
> +void check_addr(char *p)
> +{
> +    bool rv = true;
> +
> +    // check page aligned-ness
> +    if ((uintptr_t)p % ps != 0)
> +        perror("page not aligned!\n");
> +
> +    // https://docs.kernel.org/admin-guide/mm/pagemap.html
> +    off_t offset = ((uintptr_t)p / ps) * sizeof(uint64_t);
> +    uint64_t entry; // one 64-bit value for each virtual page
> +    if (lseek(fd, offset, SEEK_SET) == (off_t)-1)
> +        perror("lseek /proc/self/pagemap failed\n");
> +
> +    read(fd, &entry, sizeof(entry));
> +    // Bit 58 pte is a guard region (since 6.15) (see madvise (2) man page)
> +    printf("page 0x%lx, is guardpage: %d, present: %d\n",
> +           p, ((entry >> 58) & 1), ((entry >> 63) & 1));
> +
> +    // attempt accessing the address
> +    if ((open(p, O_RDONLY) == -1) && errno == EFAULT)
> +        // EFAULT means that we've hit guarded region
> +        printf("accessing address 0x%lx failed\n", p);
> +    else if ((open(p, O_RDONLY) == -1) && errno == ENOENT)
> +        // ENOENT means no such file or directory, and that's OK here
> +        // we didn't hit a guarded region, we consider this a pass
> +        printf("accessing address 0x%lx passed\n", p);
> +    else
> +        // We never open a valid file here, so this can't happen!
> +        printf("the impossible happened, open() succeeded!\n");
> +
> +}
> +
> +void install_guardpage(char *p)
> +{
> +    printf("Installing guard page, addr=0x%lx\n", p);
> +    int rv = madvise(p, ps, MADV_GUARD_INSTALL);
> +    if (rv != 0)
> +        perror("madvise failed\n");
> +}
> +
> +void remove_guardpage(char *p)
> +{
> +    printf("Removing guard page, addr=0x%lx\n", p);
> +    int rv = madvise(p, ps, MADV_GUARD_REMOVE);
> +    if (rv != 0)
> +        perror("madvise failed\n");
> +}
> +
> +int main()
> +{
> +    fd = open("/proc/self/pagemap", O_RDONLY);
> +    if (fd < 0)
> +        perror("open /proc/self/pagemap failed\n");
> +
> +    ps = sysconf(_SC_PAGESIZE);
> +
> +    char *p = mmap(NULL, ps, PROT_READ | PROT_WRITE, MAP_PRIVATE | 
> MAP_ANONYMOUS, -1, 0);
> +
> +    printf("force page-in by printing this: %d\n", p[0]);
> +
> +    check_addr(p);
> +    install_guardpage(p);
> +    check_addr(p);
> +    remove_guardpage(p);
> +    // The page-in below isn't needed from the persp of checking whether 
> we've
> +    // hit a guard page or not.  It affects whether the page is present 
> though,
> +    // so it does impact the expected stdout.log so to speak.
> +    printf("force page-in by printing this: %d\n", p[0]);
> +    check_addr(p);
> +    install_guardpage(p);
> +    check_addr(p);
> +    // A valid way to nuke a guard page is to unmap it (per man 2 madvise)
> +    munmap(p, ps);
> +    check_addr(p);
> +
> +    close(fd);
> +    return 0;
> +}
> diff --git a/memcheck/tests/madv_guard.stderr.exp 
> b/memcheck/tests/madv_guard.stderr.exp
> new file mode 100644
> index 000000000..35f375ed6
> --- /dev/null
> +++ b/memcheck/tests/madv_guard.stderr.exp
> @@ -0,0 +1,3 @@
> +aspacem madv_guard sanity_level 3 checks passed
> +aspacem madv_guard sanity_level 3 checks passed
> +aspacem madv_guard sanity_level 3 checks passed

See above, I think these messages shouldn't really be there.
It is OK to just check madv_guard.stderr is empty here.

> diff --git a/memcheck/tests/madv_guard.stdout.exp 
> b/memcheck/tests/madv_guard.stdout.exp
> new file mode 100644
> index 000000000..2e59a8bf5
> --- /dev/null
> +++ b/memcheck/tests/madv_guard.stdout.exp
> @@ -0,0 +1,15 @@
> +force page-in by printing this: 0
> +page 0x..., is guardpage: 0, present: 1
> +accessing address 0x... passed
> +Installing guard page, addr=0x...
> +page 0x..., is guardpage: 1, present: 0
> +accessing address 0x... failed
> +Removing guard page, addr=0x...
> +force page-in by printing this: 0
> +page 0x..., is guardpage: 0, present: 1
> +accessing address 0x... passed
> +Installing guard page, addr=0x...
> +page 0x..., is guardpage: 1, present: 0
> +accessing address 0x... failed
> +page 0x..., is guardpage: 0, present: 0
> +accessing address 0x... failed

OK.

> diff --git a/memcheck/tests/madv_guard.vgtest 
> b/memcheck/tests/madv_guard.vgtest
> new file mode 100644
> index 000000000..bcafae199
> --- /dev/null
> +++ b/memcheck/tests/madv_guard.vgtest
> @@ -0,0 +1,4 @@
> +prog: madv_guard
> +vgopts: --sanity-level=3
> +stdout_filter: filter_madv_guard
> +stderr_filter: filter_madv_guard_stderr
> diff --git a/none/tests/cmdline1.stdout.exp b/none/tests/cmdline1.stdout.exp
> index 5d3ea0569..0541412fa 100644
> --- a/none/tests/cmdline1.stdout.exp
> +++ b/none/tests/cmdline1.stdout.exp
> @@ -163,8 +163,10 @@ usage: valgrind [options] prog-and-args
>                    recovered by stack scanning [5]
>      --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]
>                attempt to avoid expensive address-space-resync operations
> -    --max-threads=<number>    maximum number of threads that valgrind can
> -                              handle [500]
> +    --max-threads=<number>     maximum number of threads that valgrind can
> +                               handle [500]
> +    --max-guard-pages=<number> maximum number of madvise guard pages that
> +                               valgrind can handle [500]
>  
>    user options for Nulgrind:
>      (none)
> diff --git a/none/tests/cmdline2.stdout.exp b/none/tests/cmdline2.stdout.exp
> index 2ed4fc02e..7490a573f 100644
> --- a/none/tests/cmdline2.stdout.exp
> +++ b/none/tests/cmdline2.stdout.exp
> @@ -163,8 +163,10 @@ usage: valgrind [options] prog-and-args
>                    recovered by stack scanning [5]
>      --resync-filter=no|yes|verbose [yes on MacOS, no on other OSes]
>                attempt to avoid expensive address-space-resync operations
> -    --max-threads=<number>    maximum number of threads that valgrind can
> -                              handle [500]
> +    --max-threads=<number>     maximum number of threads that valgrind can
> +                               handle [500]
> +    --max-guard-pages=<number> maximum number of madvise guard pages that
> +                               valgrind can handle [500]
>  
>    user options for Nulgrind:
>      (none)

OK.

-- 
You are receiving this mail because:
You are watching all bug changes.

Reply via email to