Hi Anton,

> diff --git a/mm/vmpressure.c b/mm/vmpressure.c
> new file mode 100644
> index 0000000..7922503


> +struct vmpressure_event {
> +     struct eventfd_ctx *efd;
> +     enum vmpressure_levels level;
> +     struct list_head node;
> +};
> +
> +static bool vmpressure_event(struct vmpressure *vmpr,
> +                          unsigned long s, unsigned long r)
> +{
> +     struct vmpressure_event *ev;
> +     int level = vmpressure_calc_level(vmpressure_win, s, r);
> +     bool signalled = 0;
> +
> +     mutex_lock(&vmpr->events_lock);
> +
> +     list_for_each_entry(ev, &vmpr->events, node) {
> +             if (level >= ev->level) {
> +                     eventfd_signal(ev->efd, 1);
> +                     signalled++;
> +             }
> +     }
> +
> +     mutex_unlock(&vmpr->events_lock);
> +
> +     return signalled;
> +}
> +
> +static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
> +{
> +     struct cgroup *cg = vmpr_to_css(vmpr)->cgroup->parent;
> +
> +     if (!cg)
> +             return NULL;
> +     return cg_to_vmpr(cg);
> +}

Unfortunately, "parent" in memcg have different meanings for information
propagation purposes depending on the value of the flag "use_hierarchy".
That is set for deprecation, but still...

I suggest you use the helper mem_cgroup_parent, that will already give
you the right parent (either immediate parent or root) with all that
taken into account.

> +
> +static int vmpressure_register_level(struct cgroup *cg, struct cftype *cft,
> +                                  struct eventfd_ctx *eventfd,
> +                                  const char *args)
> +{
> +     struct vmpressure *vmpr = cg_to_vmpr(cg);
> +     struct vmpressure_event *ev;
> +     int lvl;
> +
> +     for (lvl = 0; lvl < VMPRESSURE_NUM_LEVELS; lvl++) {
> +             if (!strcmp(vmpressure_str_levels[lvl], args))
> +                     break;
> +     }
> +
> +     if (lvl >= VMPRESSURE_NUM_LEVELS)
> +             return -EINVAL;
> +
> +     ev = kzalloc(sizeof(*ev), GFP_KERNEL);
> +     if (!ev)
> +             return -ENOMEM;
> +
> +     ev->efd = eventfd;
> +     ev->level = lvl;
> +
> +     mutex_lock(&vmpr->events_lock);
> +     list_add(&ev->node, &vmpr->events);
> +     mutex_unlock(&vmpr->events_lock);
> +
> +     return 0;
> +}
> +
> +static void vmpressure_unregister_level(struct cgroup *cg, struct cftype 
> *cft,
> +                                     struct eventfd_ctx *eventfd)
> +{
> +     struct vmpressure *vmpr = cg_to_vmpr(cg);
> +     struct vmpressure_event *ev;
> +
> +     mutex_lock(&vmpr->events_lock);
> +     list_for_each_entry(ev, &vmpr->events, node) {
> +             if (ev->efd != eventfd)
> +                     continue;
> +             list_del(&ev->node);
> +             kfree(ev);
> +             break;
> +     }
> +     mutex_unlock(&vmpr->events_lock);
> +}
> +
> +static struct cftype vmpressure_cgroup_files[] = {
> +     {
> +             .name = "pressure_level",
> +             .read = vmpressure_read_level,
> +             .register_event = vmpressure_register_level,
> +             .unregister_event = vmpressure_unregister_level,
> +     },
> +     {},
> +};
> +

> +
> +void __init enable_pressure_cgroup(void)
> +{
> +     WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys,
> +                                vmpressure_cgroup_files));
> +}

There is no functionality discovery going on here, and this is
conditional on nothing. Isn't it better then to just add the register +
read functions to memcontrol.c and add the files in the memcontrol cftype ?

> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 88c5fed..34f09b9 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1982,6 +1982,10 @@ static void shrink_zone(struct zone *zone, struct 
> scan_control *sc)
>                       }
>                       memcg = mem_cgroup_iter(root, memcg, &reclaim);
>               } while (memcg);
> +
> +             vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
> +                        sc->nr_scanned - nr_scanned, nr_reclaimed);
> +
>       } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
>                                        sc->nr_scanned - nr_scanned, sc));
>  }
> @@ -2167,6 +2171,8 @@ static unsigned long do_try_to_free_pages(struct 
> zonelist *zonelist,
>               count_vm_event(ALLOCSTALL);
>  
>       do {
> +             vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
> +                             sc->priority);
>               sc->nr_scanned = 0;
>               aborted_reclaim = shrink_zones(zonelist, sc);
>  
vmscan part seems okay to me.




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to