> + del_page_from_lru_list(page, lruvec,
> page_off_lru(page));
>
> if (unlikely(PageCompound(page))) {
> spin_unlock_irq(>lru_lock);
> --
> 2.28.0.681.g6f77f65b4e-goog
--
Michal Hocko
SUSE Labs
nr_pages = thp_nr_pages(page);
> nr_moved += nr_pages;
> if (PageActive(page))
> workingset_age_nonresident(lruvec, nr_pages);
> --
> 2.28.0.681.g6f77f65b4e-goog
--
Michal Hocko
SUSE Labs
On Thu 17-09-20 11:16:55, Vijay Balakrishna wrote:
>
>
> On 9/17/2020 10:52 AM, Michal Hocko wrote:
> > On Thu 17-09-20 10:27:16, Vijay Balakrishna wrote:
> > >
> > >
> > > On 9/17/2020 2:28 AM, Michal Hocko wrote:
> > >
yze this.
In any case your stress test is oveprovisioning your Normal zone and
increased min_free_kbytes just papers over the sizing problem.
--
Michal Hocko
SUSE Labs
On Thu 17-09-20 10:27:16, Vijay Balakrishna wrote:
>
>
> On 9/17/2020 2:28 AM, Michal Hocko wrote:
> > On Wed 16-09-20 23:39:39, Vijay Balakrishna wrote:
> > > set_recommended_min_free_kbytes need to honor min_free_kbytes set by the
> > > user. Post start-of
nding too much time reclaiming memory.
> >
> > The auto tuned value should never reach such a low value to cause
> > problems.
>
> The auto tuned value is incorrect post hotplug memory operation, in our use
> case memoy hot add occurs very early during boot.
Define incorrect. What are the actual values? Have you tried to increase
the value manually after the hotplug?
--
Michal Hocko
SUSE Labs
o calculated min_free_kbytes >=
user_min_free_kbytes.
Except for value clamping when the value is reduced and this likely
needs fixing. But set_recommended_min_free_kbytes should be fine.
--
Michal Hocko
SUSE Labs
On Thu 17-09-20 11:28:06, Michal Hocko wrote:
> On Wed 16-09-20 23:39:39, Vijay Balakrishna wrote:
> > set_recommended_min_free_kbytes need to honor min_free_kbytes set by the
> > user. Post start-of-day THP enable or memory hotplug operations can
> > lose user spec
* When a new child is created while the hierarchy is under oom,
> - * mem_cgroup_oom_lock() may not be called. Watch for underflow.
> + * mem_cgroup_oom_trylock() may not be called. Watch for underflow.
>*/
> spin_lock(_oom_lock);
> for_each_mem_cgroup_tree(iter, memcg)
> --
> 2.19.1
--
Michal Hocko
SUSE Labs
ally care.
> if (user_min_free_kbytes >= 0)
> - pr_info("raising min_free_kbytes from %d to %lu to help
> transparent hugepage allocations\n",
> + pr_info("raising min_free_kbytes from %d to %d to help
> transparent hugepage allocations\n",
> min_free_kbytes, recommended_min);
>
> min_free_kbytes = recommended_min;
> --
> 2.28.0
>
--
Michal Hocko
SUSE Labs
,6 +858,7 @@ int __ref online_pages(unsigned long pfn, unsigned long
> nr_pages,
> zone_pcp_update(zone);
>
> init_per_zone_wmark_min();
> + khugepaged_min_free_kbytes_update();
>
> kswapd_run(nid);
> kcompactd_run(nid);
> @@ -1600,6 +1602,7 @@ static int __ref __offline_pages(unsigned long
> start_pfn,
> pgdat_resize_unlock(zone->zone_pgdat, );
>
> init_per_zone_wmark_min();
> + khugepaged_min_free_kbytes_update();
>
> if (!populated_zone(zone)) {
> zone_pcp_reset(zone);
> --
> 2.28.0
--
Michal Hocko
SUSE Labs
On Tue 15-09-20 08:48:08, Vijay Balakrishna wrote:
>
>
> On 9/15/2020 1:18 AM, Michal Hocko wrote:
> > On Mon 14-09-20 09:57:02, Vijay Balakrishna wrote:
> > >
> > >
> > > On 9/14/2020 7:33 AM, Michal Hocko wrote:
> > > > On Thu 10-09-20 13
>
> Signed-off-by: Chunxin Zang
> Signed-off-by: Muchun Song
Acked-by: Michal Hocko
> ---
> changelogs in v5:
> 1) v4 patch used wrong branch, very apologies about that.
>
> changelogs in v4:
> changelogs in v3:
> 1) Fix some descr
hink that this
should better be fatal_signal_pending because that would make sure that
the userspace will not see an incomplete operation. This is a general
practice for other bail outs as well.
> After that, you can add:
>
> Acked-by: Chris Down
--
Michal Hocko
SUSE Labs
On Tue 15-09-20 15:09:59, Mateusz Nosek wrote:
>
>
> On 9/14/2020 4:22 PM, Michal Hocko wrote:
> > On Mon 14-09-20 12:06:54, mateusznos...@gmail.com wrote:
> > > From: Mateusz Nosek
> > >
> > > Most operations from '__alloc_pages_may_oom' do not requ
pace.
>
> Signed-off-by: Chunxin Zang
> Signed-off-by: Muchun Song
Acked-by: Michal Hocko
> ---
>
> changelogs in v4:
> changelogs in v3:
> 1) Fix some descriptive problems pointed out by Michal Hocko.
> v2 named: mm/vmscan: fix infin
functional change introduced by this patch
>
> Suggested-by: David Hildenbrand
> Signed-off-by: Laurent Dufour
Acked-by: Michal Hocko
> ---
> arch/ia64/mm/init.c| 6 +++---
> include/linux/mm.h | 2 +-
> include/linux/mmzone.h | 11 ---
> mm/memo
d=dimm4,slot=4 \
> -object memory-backend-ram,id=memdimm5,size=134217728 -device
> pc-dimm,node=1,memdev=memdimm5,id=dimm5,slot=5 \
> -object memory-backend-ram,id=memdimm6,size=134217728 -device
> pc-dimm,node=1,memdev=memdimm6,id=dimm6,slot=6 \
>
> Fixes: 4fbce6339
oesn't clear memmap
> + * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
>* because this is done early in section_activate()
>*/
> if (!(pfn & (pageblock_nr_pages - 1))) {
> @@ -6137,7 +6137,7 @@ void __meminit __weak memmap_init(unsigned long size,
> int nid,
> if (end_pfn > start_pfn) {
> size = end_pfn - start_pfn;
> memmap_init_zone(size, nid, zone, start_pfn,
> - MEMMAP_EARLY, NULL);
> + MEMINIT_EARLY, NULL);
> }
> }
> }
> --
> 2.28.0
>
--
Michal Hocko
SUSE Labs
26bdf2..6b2b5d420510 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -704,6 +704,9 @@ void drop_slab_node(int nid)
> do {
> struct mem_cgroup *memcg = NULL;
>
> + if (signal_pending(current))
> + return;
> +
> freed = 0;
> memcg = mem_cgroup_iter(NULL, NULL, NULL);
> do {
> --
> 2.11.0
>
--
Michal Hocko
SUSE Labs
On Mon 14-09-20 09:57:02, Vijay Balakrishna wrote:
>
>
> On 9/14/2020 7:33 AM, Michal Hocko wrote:
> > On Thu 10-09-20 13:47:39, Vijay Balakrishna wrote:
> > > When memory is hotplug added or removed the min_free_kbytes must be
> > > recalculated based
On Mon 14-09-20 23:02:15, Chunxin Zang wrote:
> On Mon, Sep 14, 2020 at 9:47 PM Michal Hocko wrote:
>
> > On Mon 14-09-20 21:25:59, Chunxin Zang wrote:
> > > On Mon, Sep 14, 2020 at 5:30 PM Michal Hocko wrote:
> > >
> > > > The subject is misleading
or the v2 values depending on which one is currently
> in use.
>
> Signed-off-by: Waiman Long
Acked-by: Michal Hocko
Thanks!
> ---
> mm/memcontrol.c | 24 +---
> 1 file changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/mm/memcontrol.
easier to
understand if you did
/* calculate swap excess capacity from memsw limit*/
unsigned long memsw = READ_ONCE(memcg->memsw.max) - max;
max += min (memsw, total_swap_pages);
> + }
> }
> return max;
> }
> --
> 2.18.1
--
Michal Hocko
SUSE Labs
es(unsigned long pfn, unsigned long
> nr_pages,
> zone_pcp_update(zone);
>
> init_per_zone_wmark_min();
> + khugepaged_min_free_kbytes_update();
>
> kswapd_run(nid);
> kcompactd_run(nid);
> @@ -1600,6 +1602,7 @@ static int __ref __offline_pages(unsigned long
> start_pfn,
> pgdat_resize_unlock(zone->zone_pgdat, );
>
> init_per_zone_wmark_min();
> + khugepaged_min_free_kbytes_update();
>
> if (!populated_zone(zone)) {
> zone_pcp_reset(zone);
> --
> 2.28.0
>
--
Michal Hocko
SUSE Labs
mp; __GFP_NOFAIL)) {
> *did_some_progress = 1;
>
> /*
> @@ -4004,8 +4008,7 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int
> order,
> page = __alloc_pages_cpuset_fallback(gfp_mask, order,
> ALLOC_NO_WATERMARKS, ac);
> }
> -out:
> - mutex_unlock(_lock);
> +
> return page;
> }
>
> --
> 2.20.1
>
--
Michal Hocko
SUSE Labs
On Mon 14-09-20 21:25:59, Chunxin Zang wrote:
> On Mon, Sep 14, 2020 at 5:30 PM Michal Hocko wrote:
>
> > The subject is misleading because this patch doesn't fix an infinite
> > loop, right? It just allows the userspace to interrupt the operation.
> >
> >
>
is so large.
> While at it, also document which page counters are used in v1 and/or v2.
>
> Signed-off-by: Waiman Long
Acked-by: Michal Hocko
> ---
> include/linux/memcontrol.h | 13 -
> mm/memcontrol.c| 3 ---
> 2 files changed, 8 insertions(+), 8 de
On Mon 14-09-20 19:46:36, Muchun Song wrote:
> On Mon, Sep 14, 2020 at 6:32 PM Michal Hocko wrote:
> >
> > On Mon 14-09-20 17:43:42, Muchun Song wrote:
> > > On Mon, Sep 14, 2020 at 5:18 PM Michal Hocko wrote:
> > > >
> > > > On Mon 14-09-20 12:02
hat making v1 vs. v2 distinction here makes the code more
obvious. But I do not think your code is correct for v1. In a default
state it would lead to max = PAGE_COUNTER_MAX which is not something
we want, right?
instead you want
max += min(READ_ONCE(memcg->memsw.max), total_swap_pages);
> }
> return max;
> }
> --
> 2.18.1
>
--
Michal Hocko
SUSE Labs
gt; was not removed at that time. Remove the obsolete enum charge_type now.
>
> Signed-off-by: Waiman Long
> Acked-by: Johannes Weiner
> Reviewed-by: Shakeel Butt
Acked-by: Michal Hocko
> ---
> mm/memcontrol.c | 8
> 1 file changed, 8 deletions(-)
>
On Fri 11-09-20 12:09:52, David Hildenbrand wrote:
> On 11.09.20 11:12, Michal Hocko wrote:
> > On Fri 11-09-20 10:09:07, David Hildenbrand wrote:
> > [...]
> >> Consider two cases:
> >>
> >> 1. Hot(un)plugging huge DIMMs: many (not all!) use cases want
ray")
>
> Signed-off-by: Miaohe Lin
Other than that
Acked-by: Michal Hocko
> ---
> mm/memcontrol.c | 6 +++---
> 1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 75cd1a1e66c8..a57aa0f42d40 100644
> --- a
On Mon 14-09-20 17:43:42, Muchun Song wrote:
> On Mon, Sep 14, 2020 at 5:18 PM Michal Hocko wrote:
> >
> > On Mon 14-09-20 12:02:33, Muchun Song wrote:
> > > On Sun, Sep 13, 2020 at 8:42 AM Andrew Morton
> > > wrote:
> > > >
> > > > On Sa
256K, 512K) 2 ||
>
> In the while loop, we can check whether the TASK_KILLABLE signal is set,
> if so, we should break the loop.
I would make it explicit that this is not fixing the above scenario. It
just helps to cancel to operatio
K((u64)page_counter_read(>memory)),
> @@ -6415,7 +6417,7 @@ static int memory_events_local_show(struct seq_file *m,
> void *v)
> static int memory_stat_show(struct seq_file *m, void *v)
> {
> struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> - char *buf;
> + const char *buf;
>
> buf = memory_stat_format(memcg);
> if (!buf)
> --
> 2.20.1
--
Michal Hocko
SUSE Labs
which makes
it more than 100B per line. I strongly suspect we are not able to use
that storage up.
--
Michal Hocko
SUSE Labs
On Mon 14-09-20 10:51:06, Laurent Dufour wrote:
> Le 14/09/2020 à 10:49, Michal Hocko a écrit :
[...]
> > /*
> > * Memory initialization context, use to differentiate memory added by
> > * the platform statically or via memory hotplug interface.
> >
e because the
callpath can be called with locks held etc...
> Since the number of memory blocks managed could be high, the messages are
> rate limited.
>
> As a consequence, link_mem_sections() has no status to report anymore.
>
> Signed-off-by: Laurent Dufour
> Cc: David Hilden
re clear!
> Fixes: 4fbce633910e ("mm/memory_hotplug.c: make
> register_mem_sect_under_node() a callback of walk_memory_range()")
> Signed-off-by: Laurent Dufour
> Cc: sta...@vger.kernel.org
> Cc: Greg Kroah-Hartman
> Cc: "Rafael J. Wysocki"
> Cc: Andrew
added by
* the platform statically or via memory hotplug interface.
*/
enum meminit_context {
MEMINIT_EARLY,
MEMINIT_HOTPLUG
}
--
Michal Hocko
SUSE Labs
e to focus on something that
we can mis^Wdesign with exising and forming usecases in mind that would
get rid of all the cruft that we know it doesn't work (removable would
be another one.
I am definitely not going to insist and I appreciate you are trying to
clean this up. That is highly appreciated of course.
--
Michal Hocko
SUSE Labs
rs. I believe we need
a completely new interface which would effectively deprecate the
existing one. One could still chose to use the old interface but new
usecases would use the new one ideally.
I have brought that up earlier already without much follow up
(http://lkml.kernel.org/r/20200619120704.gd12...@dhcp22.suse.cz)
--
Michal Hocko
SUSE Labs
0811] acpi_bus_attach+0x60/0x1c0
> kernel: [0.760811] acpi_bus_scan+0x33/0x70
> kernel: [0.760811] acpi_scan_init+0xea/0x21b
> kernel: [0.760811] acpi_init+0x2f1/0x33c
> kernel: [0.760811] do_one_initcall+0x46/0x1f4
Is there any actual usecase for a configuration like this? What is the
point to statically define additional memory like this when the same can
be achieved on the same command line?
--
Michal Hocko
SUSE Labs
On Thu 10-09-20 15:39:00, Oscar Salvador wrote:
> On Thu, Sep 10, 2020 at 02:48:47PM +0200, Michal Hocko wrote:
> > > Is there any actual usecase for a configuration like this? What is the
> > > point to statically define additional memory like this when the same can
> >
On Thu 10-09-20 14:49:28, David Hildenbrand wrote:
> On 10.09.20 14:47, Michal Hocko wrote:
> > On Thu 10-09-20 14:03:48, Oscar Salvador wrote:
> >> On Thu, Sep 10, 2020 at 01:35:32PM +0200, Laurent Dufour wrote:
> >>
> >>> That points has
On Thu 10-09-20 15:51:07, Michal Hocko wrote:
> On Thu 10-09-20 15:39:00, Oscar Salvador wrote:
> > On Thu, Sep 10, 2020 at 02:48:47PM +0200, Michal Hocko wrote:
[...]
> > > Forgot to ask one more thing. Who is going to online that memory when
> > > userspace is not ru
On Thu 10-09-20 14:47:56, Michal Hocko wrote:
> On Thu 10-09-20 14:03:48, Oscar Salvador wrote:
> > On Thu, Sep 10, 2020 at 01:35:32PM +0200, Laurent Dufour wrote:
> >
> > > That points has been raised by David, quoting him here:
> > >
> > > >
On Thu 10-09-20 13:35:32, Laurent Dufour wrote:
> Le 10/09/2020 à 13:12, Michal Hocko a écrit :
> > On Thu 10-09-20 09:51:39, Laurent Dufour wrote:
> > > Le 10/09/2020 à 09:23, Michal Hocko a écrit :
> > > > On Wed 09-09-20 18:07:15, Laurent Dufour wrote:
> >
On Thu 10-09-20 09:51:39, Laurent Dufour wrote:
> Le 10/09/2020 à 09:23, Michal Hocko a écrit :
> > On Wed 09-09-20 18:07:15, Laurent Dufour wrote:
> > > Le 09/09/2020 à 12:59, Michal Hocko a écrit :
> > > > On Wed 09-09-20 11:21:58, Laurent Dufour wrote:
> > [.
ote:
> > On Wed, 2020-09-09 at 09:04 +0200, Michal Hocko wrote:
> >> On Tue 08-09-20 10:41:10, Rik van Riel wrote:
> >>> On Tue, 2020-09-08 at 16:35 +0200, Michal Hocko wrote:
> >>>
> >>>> A global knob is insufficient. 1G pages will become a
On Wed 09-09-20 18:07:15, Laurent Dufour wrote:
> Le 09/09/2020 à 12:59, Michal Hocko a écrit :
> > On Wed 09-09-20 11:21:58, Laurent Dufour wrote:
[...]
> > > For the point a, using the enum allows to know in
> > > register_mem_sect_under_node() if the link op
simple pr_err? We will get a backtrace.
Interesting but not really that useful because there are only few code
paths this can trigger from. Registers dump? Not really useful here.
Taint flag, probably useful because follow up problems might give us a
hint that this might be related. People tend to p
On Wed 09-09-20 09:19:16, Rik van Riel wrote:
> On Wed, 2020-09-09 at 09:04 +0200, Michal Hocko wrote:
> > On Tue 08-09-20 10:41:10, Rik van Riel wrote:
> > > On Tue, 2020-09-08 at 16:35 +0200, Michal Hocko wrote:
> > >
> > > > A global knob is in
on. But as I've already said. It
doesn't seem that we are in a need to fix any practical problem here.
High limit implementation has changed quite a lot recently. I would
rather see it settled for a while and see how it behaves in wider
variety of workloads before changing the implementation again.
--
Michal Hocko
SUSE Labs
waving.
Having something solid is absolutely necessary for a big change like
this.
--
Michal Hocko
SUSE Labs
th EBUSY.
> This commit also changes drain_all_pages() to not trust reading pcp->count
> during
> drain for page isolation - I believe that could be racy and lead to missing
> some
> cpu's to drain. If others agree, this can be separated and potentially
> backported.
>
>
On Wed 09-09-20 11:21:58, Laurent Dufour wrote:
> Le 09/09/2020 à 11:09, Michal Hocko a écrit :
> > On Wed 09-09-20 09:48:59, Laurent Dufour wrote:
> > > Le 09/09/2020 à 09:40, Michal Hocko a écrit :
[...]
> > > > > In
> > > > > that case, the sys
On Wed 09-09-20 09:48:59, Laurent Dufour wrote:
> Le 09/09/2020 à 09:40, Michal Hocko a écrit :
> > [reposting because the malformed cc list confused my email client]
> >
> > On Tue 08-09-20 19:08:35, Laurent Dufour wrote:
> > > In register_mem_sect_under_n
end_pfn, false);
> }
>
> return error;
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index e9d5ab5d3ca0..28028db8364a 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1080,7 +1080,8 @@ int __ref add_memory_resource(int nid, struct resource
> *res)
> }
>
> /* link memory sections under this node.*/
> - ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1));
> + ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
> + true);
> BUG_ON(ret);
>
> /* create new memmap entry */
> --
> 2.28.0
--
Michal Hocko
SUSE Labs
On Tue 08-09-20 12:58:59, Roman Gushchin wrote:
> On Tue, Sep 08, 2020 at 11:09:25AM -0400, Zi Yan wrote:
> > On 7 Sep 2020, at 3:20, Michal Hocko wrote:
> >
> > > On Fri 04-09-20 14:10:45, Roman Gushchin wrote:
> > >> On Fri, Sep 04, 2020 at
On Tue 08-09-20 10:41:10, Rik van Riel wrote:
> On Tue, 2020-09-08 at 16:35 +0200, Michal Hocko wrote:
>
> > A global knob is insufficient. 1G pages will become a very precious
> > resource as it requires a pre-allocation (reservation). So it really
> > has
> > to
/*
> > +* We failed to charge even after retries, give oom_reaper or
> > +* other process a change to make some free pages.
> > +*
> > +* On non-preemptive, Non-SMP system, this is critical, else
> > +* we keep retrying with no success, forever.
> > +*/
> > + cond_resched();
> > +
> > /*
> > * keep retrying as long as the memcg oom killer is able to make
> > * a forward progress or bypass the charge if the oom killer
> > --
> > 2.17.1
> >
> >
--
Michal Hocko
SUSE Labs
itly. Also, 1GB THP is allocated from a reserved CMA
> region (although I had alloc_contig_pages as a fallback, which can be removed
> in next version), so users need to add hugepage_cma=nG kernel parameter to
> enable 1GB THP allocation. If a finer control is necessary, we can add
> a new MADV_HUGEPAGE_1GB for 1GB THP.
A global knob is insufficient. 1G pages will become a very precious
resource as it requires a pre-allocation (reservation). So it really has
to be an opt-in and the question is whether there is also some sort of
access control needed.
--
Michal Hocko
SUSE Labs
is highly subjective.
> Signed-off-by: Alex Shi
> Cc: Andrew Morton
> Cc: Johannes Weiner
> Cc: Michal Hocko
> Cc: Vladimir Davydov
> Cc: linux-kernel@vger.kernel.org
> Cc: linux...@kvack.org
> Cc: cgro...@vger.kernel.org
> ---
> include/linux/page_count
On Tue 08-09-20 16:12:07, Alex Shi wrote:
> After commit 0a31bc97c80c3f mm: memcontrol: rewrite uncharge API, no one
> using MEM_CGROUP_CHARGE_TYPE_xxx, let's remove them.
>
> Signed-off-by: Alex Shi
> Cc: Johannes Weiner
> Cc: Michal Hocko
> Cc: Vladimir Davydov
>
gned long start_pfn,
> unsigned long end_pfn,
> BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
> BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
>
> - for (pfn = start_pfn;
> - pfn < end_pfn;
> - pfn += pageblock_nr_pages) {
> + for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
> page = __first_valid_page(pfn, pageblock_nr_pages);
> if (page) {
> if (set_migratetype_isolate(page, migratetype, flags)) {
> --
> 2.25.1
>
--
Michal Hocko
SUSE Labs
ated_cb() fails.
>
> Fixes: c52e75935f8d ("mm: remove extra drain pages on pcp list")
>
> Signed-off-by: Pavel Tatashin
> Cc: sta...@vger.kernel.org
> Acked-by: David Rientjes
> Acked-by: Vlastimil Babka
Already acked the mmotm version but let's add it here as
this way, so again the extra
> traversal is going to be overhead overhead.
Again this just leads to tricky code. Just look at how easy it was to
break this by removing something that looked clearly a duplicate call.
It is true that memory isolation usage is limited to only few usecasaes
but I would strongly prefer to make the semantic clear so that we do not
repeat this regressions.
--
Michal Hocko
SUSE Labs
On Fri 04-09-20 14:10:45, Roman Gushchin wrote:
> On Fri, Sep 04, 2020 at 09:42:07AM +0200, Michal Hocko wrote:
[...]
> > An explicit opt-in sounds much more appropriate to me as well. If we go
> > with a specific API then I would not make it 1GB pages specific. Why
> > canno
* PageBuddy on freed pages on other zones.
>*/
> + ret = test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE);
> if (ret)
> drain_all_pages(zone);
> } while (ret);
Looks ok
--
Michal Hocko
SUSE Labs
cator" - which is wrong in all but PCP
> > cases (and there only in one possible race?).
> >
>
> It's a two-line hack which fixes a bug in -stable kernels, so I'm
> inclined to proceed with it anyway. We can undo it later on as part of
> a better fix, OK?
Agreed. http://lkml.kernel.org/r/20200904070235.ga15...@dhcp22.suse.cz
for reference.
--
Michal Hocko
SUSE Labs
On Thu 03-09-20 21:24:00, Yu Zhao wrote:
> On Thu, Sep 03, 2020 at 10:28:32AM +0200, Michal Hocko wrote:
> > On Mon 31-08-20 11:50:41, Yu Zhao wrote:
> > [...]
> > > @@ -1860,16 +1859,11 @@ static unsigned noinline_for_stack
> > > move_pages_to_lru(struct lruve
e_pages explicitly checks for
> > + * PageBuddy on freed pages on other zones.
> > + */
> > +if (ret)
> > +drain_all_pages(zone);
> > +} while (test_pages_isolated(start_pfn, end_pfn, MEMORY_OFFLINE));
>
> I think we have to do
>
> ret = test_pages_isolated()
> if (ret)
Yes.
--
Michal Hocko
SUSE Labs
On Thu 03-09-20 09:25:27, Roman Gushchin wrote:
> On Thu, Sep 03, 2020 at 09:32:54AM +0200, Michal Hocko wrote:
> > On Wed 02-09-20 14:06:12, Zi Yan wrote:
> > > From: Zi Yan
> > >
> > > Hi all,
> > >
> > > This patchset adds support for 1
On Thu 03-09-20 20:31:04, David Hildenbrand wrote:
> On 03.09.20 20:23, Pavel Tatashin wrote:
> > On Thu, Sep 3, 2020 at 2:20 PM David Hildenbrand wrote:
> >>
> >> On 03.09.20 08:38, Michal Hocko wrote:
[...]
> >>> diff --git a/mm/page_isolation.c b/mm/pa
nr_pages = thp_nr_pages(page);
> nr_moved += nr_pages;
> if (PageActive(page))
> workingset_age_nonresident(lruvec, nr_pages);
> --
> 2.28.0.402.g5ffc5be6b7-goog
>
--
Michal Hocko
SUSE Labs
mm/pgtable-generic.c | 56 ++
> mm/rmap.c | 289 --
> mm/swap.c | 31 +
> mm/swap_slots.c | 2 +
> mm/swapfile.c | 8 +-
> mm/userfaultfd.c | 2 +-
> mm/util.c | 16 +-
> mm/vmscan.c | 58 +-
> mm/vmstat.c | 8 +
> 50 files changed, 2270 insertions(+), 349 deletions(-)
> create mode 100644 include/linux/pagechain.h
>
> --
> 2.28.0
>
--
Michal Hocko
SUSE Labs
.
> + */
> + if (ret)
> + drain_all_pages(zone);
> } while (ret);
>
> /* Ok, all of our target is isolated.
> --
> 2.25.1
>
--
Michal Hocko
SUSE Labs
On Wed 02-09-20 19:51:45, Vlastimil Babka wrote:
> On 9/2/20 5:13 PM, Michal Hocko wrote:
> > On Wed 02-09-20 16:55:05, Vlastimil Babka wrote:
> >> On 9/2/20 4:26 PM, Pavel Tatashin wrote:
> >> > On Wed, Sep 2, 2020 at 10:08 AM Michal Hocko wrote:
> >> >
On Wed 02-09-20 16:55:05, Vlastimil Babka wrote:
> On 9/2/20 4:26 PM, Pavel Tatashin wrote:
> > On Wed, Sep 2, 2020 at 10:08 AM Michal Hocko wrote:
> >>
> >> >
> >> > Thread#1 - continue
> >> > free_unref_page_commit
> &
On Wed 02-09-20 08:53:49, Pavel Tatashin wrote:
> On Wed, Sep 2, 2020 at 7:32 AM Michal Hocko wrote:
> >
> > On Wed 02-09-20 11:53:00, Vlastimil Babka wrote:
> > > >> > > Thread #2: ccs killer kthread
> > > >> > >css_killed_w
On Wed 02-09-20 16:01:17, Michal Hocko wrote:
> [Cc Mel and Vlastimil - I am still rummaging]
>
> On Tue 01-09-20 08:46:15, Pavel Tatashin wrote:
> > There is a race during page offline that can lead to infinite loop:
> > a page never ends up on a buddy list and __
raced with another thread that was adding pages to
> + * pcp list.
> + */
> + if (ret)
> + drain_all_pages(zone);
> } while (ret);
>
> /* Ok, all of our target is isolated.
> --
> 2.25.1
>
--
Michal Hocko
SUSE Labs
e sure that we
> + * drain again, because when we isolated range we might
> + * have raced with another thread that was adding pages to
> + * pcp list.
> + */
> + if (ret)
> + drain_all_pages(zone);
> } while (ret);
>
> /* Ok, all of our target is isolated.
> --
> 2.25.1
>
--
Michal Hocko
SUSE Labs
g that
> > we should focus on when debugging.
>
> I was hitting this issue:
> mm/memory_hotplug: drain per-cpu pages again during memory offline
> https://lore.kernel.org/lkml/20200901124615.137200-1-pasha.tatas...@soleen.com
I have noticed the patch but didn't have time to think it through (have
been few days off and catching up with emails). Will give it a higher
priority.
--
Michal Hocko
SUSE Labs
n across reboots. Once the information is not needed we
> hot-add that memory and use it during runtime, before shutdown we
> hot-remove the 2G, save the program state on it, and do the reboot.
I still do not get it. So what does guarantee that the memory is
offlineable in the first place? Also what is the difference between
offlining and simply shutting the system down so that the memory is not
used in the first place. In other words what kind of difference
hotremove makes?
--
Michal Hocko
SUSE Labs
already.
Maybe I have forgoten but why do we take hotplug locks in this path in
the first place? Memory hotplug notifier takes slab_mutex so this
shouldn't be really needed.
--
Michal Hocko
SUSE Labs
age to catch the specific pfn and what is it used for?
start_isolate_page_range and scan_movable_pages should fail if there is
any memory that cannot be migrated permanently. This is something that
we should focus on when debugging.
--
Michal Hocko
SUSE Labs
is applied.
>
> Fixes: 44a70adec910 ("mm, oom_adj: make sure processes sharing mm have same
> view of oom_score_adj")
> Reported-by: Tim Murray
> Debugged-by: Minchan Kim
> Suggested-by: Michal Hocko
> Signed-off-by: Suren Baghdasaryan
Acked-by: Michal Hocko
>
gement helps maybe that could be
done automagically after many consecutive failures.
Just my 2c
--
Michal Hocko
SUSE Labs
> > Add cond_resched() at the upper shrink_node_memcgs() to solve this
> > issue, and any other possible issue like meomry.min protection.
> >
> > Suggested-by: Michal Hocko
> > Signed-off-by: Xunlei Pang
>
> This generally makes sense to me but really should ha
will get a scheduling point for each memcg in the
reclaimed hierarchy without any dependency on the reclaimable memory in
that memcg thus making it more predictable.
"
> Suggested-by: Michal Hocko
> Signed-off-by: Xunlei Pang
Acked-by: Michal Hocko
Thanks!
> ---
> mm/vmscan.c
onfiguration. Sorry about
that.
Thanks for bearing with me.
--
Michal Hocko
SUSE Labs
On Wed 26-08-20 20:21:39, xunlei wrote:
> On 2020/8/26 下午8:07, Michal Hocko wrote:
> > On Wed 26-08-20 20:00:47, xunlei wrote:
> >> On 2020/8/26 下午7:00, Michal Hocko wrote:
> >>> On Wed 26-08-20 18:41:18, xunlei wrote:
> >>>> On 2020/8/26 下午4:11, Mich
On Wed 26-08-20 20:00:47, xunlei wrote:
> On 2020/8/26 下午7:00, Michal Hocko wrote:
> > On Wed 26-08-20 18:41:18, xunlei wrote:
> >> On 2020/8/26 下午4:11, Michal Hocko wrote:
> >>> On Wed 26-08-20 15:27:02, Xunlei Pang wrote:
> >>>> We've met
On Wed 26-08-20 18:41:18, xunlei wrote:
> On 2020/8/26 下午4:11, Michal Hocko wrote:
> > On Wed 26-08-20 15:27:02, Xunlei Pang wrote:
> >> We've met softlockup with "CONFIG_PREEMPT_NONE=y", when
> >> the target memcg doesn't have any reclaimable memory.
&
);
+ cond_resched();
+
if (mem_cgroup_below_min(memcg)) {
/*
* Hard protection.
This should catch both cases. I even have a vague recollection that
somebody has proposed something in that direction but I cannot remember
what has happened with that patch.
--
Michal Hocko
SUSE Labs
cannot say I would care deeply about naming. Consistency looks nice
but if there is a preference to keep the lock then I will not object.
--
Michal Hocko
SUSE Labs
RK) being
> quite rare, the regression is gone after the change is applied.
>
> Fixes: 44a70adec910 ("mm, oom_adj: make sure processes sharing mm have same
> view of oom_score_adj")
> Reported-by: Tim Murray
> Debugged-by: Minchan Kim
> Suggested-by: Michal Hocko
901 - 1000 of 20557 matches
Mail list logo