[PATCH] dax: Kconfig: add depends on !FS_DAX_LIMITED for ARCH_HAS_PMEM_API

2022-12-08 Thread Qi Zheng
The implementation of dax_flush() is non-NULL if
CONFIG_ARCH_HAS_PMEM_API is selected. Then if we select
CONFIG_FS_DAX_LIMITED with CONFIG_ARCH_HAS_PMEM_API in
the future, the dax_flush() in the dax_writeback_one()
will cause a panic since it accepts the struct page by
default:

dax_flush(dax_dev, page_address(pfn_to_page(pfn)), count * PAGE_SIZE);

Instead of fixing this, it is better to declare in Kconfig
that pmem does not support CONFIG_FS_DAX_LIMITED now.

Signed-off-by: Qi Zheng 
---
BTW, it seems that CONFIG_FS_DAX_LIMITED currently only has
DCSSBLK as a user, but this makes filesystems dax must support
the case that the struct page is not required, which makes the
code complicated. Is it possible to remove DCSSBLK or change it
to also require struct page?

 lib/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Kconfig b/lib/Kconfig
index a7cd6605cc6c..6989ad3fea99 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -672,6 +672,7 @@ config ARCH_NO_SG_CHAIN
 
 config ARCH_HAS_PMEM_API
bool
+   depends on !FS_DAX_LIMITED
 
 config MEMREGION
bool
-- 
2.20.1




[PATCH] sched/fair: Remove the redundant critical section

2020-11-10 Thread Qi Zheng
Now there is nothing in the critical section, so remove it.

Signed-off-by: Qi Zheng 
---
 kernel/sched/fair.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 52cacfc62922..06c4f3430e95 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5114,9 +5114,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth 
*cfs_b)
return;
 
distribute_cfs_runtime(cfs_b);
-
-   raw_spin_lock_irqsave(&cfs_b->lock, flags);
-   raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
 }
 
 /*
-- 
2.25.1



Re: [PATCH] sched/deadline: Replace rq_of_dl_rq(dl_rq_of_se(dl_se)) with ... ...task_rq(dl_task_of(dl_se))

2020-10-14 Thread Qi Zheng
On 2020/10/13 下午11:48, Peter Zijlstra wrote:
> On Tue, Oct 13, 2020 at 10:31:40PM +0800, Qi Zheng wrote:
>> The rq is already obtained in the dl_rq_of_se() function:
>> struct task_struct *p = dl_task_of(dl_se);
>> struct rq *rq = task_rq(p);
>> So there is no need to do extra conversion.
>>
>> Signed-off-by: Qi Zheng 
>> ---
>> kernel/sched/deadline.c | 4 ++--
>> 1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
>> index 6d93f4518734..f64e577f6aba 100644
>> --- a/kernel/sched/deadline.c
>> +++ b/kernel/sched/deadline.c
>> @@ -1152,7 +1152,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
>> static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
>> {
>> struct task_struct *p = dl_task_of(dl_se);
>> - struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
>> + struct rq *rq = task_rq(p);
>>
>> if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
>> dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
>> @@ -1498,7 +1498,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
>> replenish_dl_entity(dl_se, pi_se);
>> } else if ((flags & ENQUEUE_RESTORE) &&
>> dl_time_before(dl_se->deadline,
>> - rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se) {
>> + rq_clock(task_rq(dl_task_of(dl_se) {
>> setup_new_dl_entity(dl_se);
>> }
>
> Consider where we're going:
>
> https://lkml.kernel.org/r/20200807095051.385985-1-juri.le...@redhat.com
>
> then a dl_entity is no longer immediately a task and the above is no
> longer true.
>

Thanks for your reply, I saw in the patch below that the dl_rq_of_se()
has been changed to the rq_of_dl_se(), so the above is no longer needed.

[RFC PATCH v2 4/6] sched/deadline: Introduce deadline servers

In addition, when will the SCHED_DEADLINE server infrastructure is
expected to be integrated into the mainline? It looks great!

Peter Zijlstra  于2020年10月13日周二 下午11:48写道:
>
> On Tue, Oct 13, 2020 at 10:31:40PM +0800, Qi Zheng wrote:
> > The rq is already obtained in the dl_rq_of_se() function:
> >   struct task_struct *p = dl_task_of(dl_se);
> > struct rq *rq = task_rq(p);
> > So there is no need to do extra conversion.
> >
> > Signed-off-by: Qi Zheng 
> > ---
> >  kernel/sched/deadline.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> > index 6d93f4518734..f64e577f6aba 100644
> > --- a/kernel/sched/deadline.c
> > +++ b/kernel/sched/deadline.c
> > @@ -1152,7 +1152,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
> >  static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
> >  {
> >   struct task_struct *p = dl_task_of(dl_se);
> > - struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
> > + struct rq *rq = task_rq(p);
> >
> >   if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
> >   dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
> > @@ -1498,7 +1498,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
> >   replenish_dl_entity(dl_se, pi_se);
> >   } else if ((flags & ENQUEUE_RESTORE) &&
> > dl_time_before(dl_se->deadline,
> > -  rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se) {
> > +  rq_clock(task_rq(dl_task_of(dl_se) {
> >   setup_new_dl_entity(dl_se);
> >   }
>
> Consider where we're going:
>
>   https://lkml.kernel.org/r/20200807095051.385985-1-juri.le...@redhat.com
>
> then a dl_entity is no longer immediately a task and the above is no
> longer true.


[PATCH] sched/deadline: Replace rq_of_dl_rq(dl_rq_of_se(dl_se)) with ... ...task_rq(dl_task_of(dl_se))

2020-10-13 Thread Qi Zheng
The rq is already obtained in the dl_rq_of_se() function:
struct task_struct *p = dl_task_of(dl_se);
struct rq *rq = task_rq(p);
So there is no need to do extra conversion.

Signed-off-by: Qi Zheng 
---
 kernel/sched/deadline.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6d93f4518734..f64e577f6aba 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1152,7 +1152,7 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
 static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se)
 {
struct task_struct *p = dl_task_of(dl_se);
-   struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se));
+   struct rq *rq = task_rq(p);
 
if (dl_time_before(dl_se->deadline, rq_clock(rq)) &&
dl_time_before(rq_clock(rq), dl_next_period(dl_se))) {
@@ -1498,7 +1498,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
replenish_dl_entity(dl_se, pi_se);
} else if ((flags & ENQUEUE_RESTORE) &&
  dl_time_before(dl_se->deadline,
-rq_clock(rq_of_dl_rq(dl_rq_of_se(dl_se) {
+rq_clock(task_rq(dl_task_of(dl_se) {
setup_new_dl_entity(dl_se);
}
 
-- 
2.25.1



Re: [PATCH] of/fdt: Remove duplicate check in early_init_dt_scan_memory()

2020-08-17 Thread Qi Zheng

On 2020/8/18 上午2:21, Rob Herring wrote:

On Fri, Aug 14, 2020 at 4:57 PM Qi Zheng  wrote:


When the value of the first reg is not NULL, there will be
two repeated checks. So modify it.


I prefer the way it was. I'm sure the compiler is smart enough to
throw out the 2nd check. Plus, 'linux,usable-memory' being present is
the exception, so usually 'reg' will be NULL.


I got it, and thanks for your review.
Please ignore this patch.




Signed-off-by: Qi Zheng 
---
  drivers/of/fdt.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 4602e467ca8b..f54412c00642 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1002,10 +1002,11 @@ int __init early_init_dt_scan_memory(unsigned long 
node, const char *uname,
 return 0;

 reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
-   if (reg == NULL)
+   if (reg == NULL) {
 reg = of_get_flat_dt_prop(node, "reg", &l);
-   if (reg == NULL)
-   return 0;
+   if (reg == NULL)
+   return 0;
+   }

 endp = reg + (l / sizeof(__be32));
 hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
--
2.25.1



[PATCH] of/fdt: Remove duplicate check in early_init_dt_scan_memory()

2020-08-14 Thread Qi Zheng
When the value of the first reg is not NULL, there will be
two repeated checks. So modify it.

Signed-off-by: Qi Zheng 
---
 drivers/of/fdt.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 4602e467ca8b..f54412c00642 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1002,10 +1002,11 @@ int __init early_init_dt_scan_memory(unsigned long 
node, const char *uname,
return 0;
 
reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
-   if (reg == NULL)
+   if (reg == NULL) {
reg = of_get_flat_dt_prop(node, "reg", &l);
-   if (reg == NULL)
-   return 0;
+   if (reg == NULL)
+   return 0;
+   }
 
endp = reg + (l / sizeof(__be32));
hotpluggable = of_get_flat_dt_prop(node, "hotpluggable", NULL);
-- 
2.25.1



Re: [PATCH] sched/core: add unlikely in group_has_capacity()

2020-08-11 Thread Qi Zheng

On 2020/8/7 上午10:47, Qi Zheng wrote:

Yeah, because of the following two points, I also think
the probability is 0%:
a) the sd is protected by rcu lock, and load_balance()
    func is between rcu_read_lock() and rcu_read_unlock().
b) the sgs is a local variable.

So in the group_classify(), the env->sd->imbalance_pct and
the sgs will not be changed. May I remove the duplicate check
from group_has_capacity() and resubmit a patch?

Yours,
Qi Zheng

On 2020/8/6 下午10:45, Ingo Molnar wrote:


* Qi Zheng  wrote:


1. The group_has_capacity() function is only called in
    group_classify().
2. Before calling the group_has_capacity() function,
    group_is_overloaded() will first judge the following
    formula, if it holds, the group_classify() will directly
    return the group_overloaded.

(sgs->group_capacity * imbalance_pct) <
 (sgs->group_runnable * 100)

Therefore, when the group_has_capacity() is called, the
probability that the above formalu holds is very small. Hint
compilers about that.

Signed-off-by: Qi Zheng 
---
  kernel/sched/fair.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..9074fd5e23b2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8234,8 +8234,8 @@ group_has_capacity(unsigned int imbalance_pct, 
struct sg_lb_stats *sgs)

  if (sgs->sum_nr_running < sgs->group_weight)
  return true;
-    if ((sgs->group_capacity * imbalance_pct) <
-    (sgs->group_runnable * 100))
+    if (unlikely((sgs->group_capacity * imbalance_pct) <
+    (sgs->group_runnable * 100)))
  return false;


Isn't the probability that this second check will match around 0%?

I.e. wouldn't the right fix be to remove the duplicate check from
group_has_capacity(), because it's already been checked in
group_classify()? Maybe while leaving a comment in place?

Thanks,

Ingo



Hi,

As Valentin and I discussed in the patch below, simply removing the
check may not be completely harmless.

[PATCH]sched/fair: Remove the duplicate check from
group_has_capacity() :
-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
-   return false;


If sum_nr_running < group_weight, we won't evaluate it.
If sum_nr_running > group_weight, we either won't call into
  group_has_capacity() or we'll have checked it already in
  group_overloaded().
But in the case of sum_nr_running == group_weight, we can
run to this check.

Although I also think it is unlikely to cause the significant
capacity pressure at the == case, but I'm not sure whether there
are some special scenarios. such as some cpus in sg->cpumask are
no longer active, or other scenarios?

So adding the unlikely() in group_has_capacity() may be the safest
way.

Add Valentin Schneider .

Yours,
Qi Zheng


Re: [PATCH] sched/fair: Remove the duplicate check from group_has_capacity()

2020-08-11 Thread Qi Zheng

On 2020/8/12 上午4:16, Valentin Schneider wrote:


On 11/08/20 14:12, Qi Zheng wrote:

On 2020/8/11 下午8:48, Valentin Schneider wrote:

On 11/08/20 12:44, Qi Zheng wrote:

In fact, at the beginning, I added unlikely() here to hint the compiler:

-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
+   if (unlikely((sgs->group_capacity * imbalance_pct) <
+   (sgs->group_runnable * 100)))

The corresponding patch is as follows:

[PATCH]sched/core: add unlikely in group_has_capacity()

Do you think it is necessary?


The "unlikely" approach has the benefit of keeping all corner cases in
place. I was tempted to say it could still make sense to get rid of the
extra check entirely, given that it has an impact only when:

- sum_nr_running == group_weight
- group capacity has been noticeably reduced

If sum_nr_running < group_weight, we won't evaluate it.
If sum_nr_running > group_weight, we either won't call into
group_has_capacity() or we'll have checked it already in
group_overloaded().

That said, it does make very much sense to check it in that ==
case. Vincent might have a different take on this, but right now I'd say
the unlikely approach is the safest one of the two.



So what should I do next? Do I resubmit a patch with unlikely() or
add your email to the old patch([PATCH]sched/core: add unlikely in
group_has_capacity())? Or continue to wait for suggestions from
other maintainers?


I guess you can add a reply to the original thread where you had the
unlikely() to point out *removing* the check isn't 100% harmless.

Vincent might want to have a look at it, but AFAIA he's on holidays ATM.



Okay, I will reply to the old patch and add your email to it.
Thanks for your comments.

Yours,
Qi Zheng


Re: [PATCH] sched/fair: Remove the duplicate check from group_has_capacity()

2020-08-11 Thread Qi Zheng

On 2020/8/11 下午8:48, Valentin Schneider wrote:


On 11/08/20 12:44, Qi Zheng wrote:

On 2020/8/11 下午6:38, Valentin Schneider wrote:


On 11/08/20 04:39, Qi Zheng wrote:

On 2020/8/11 上午2:33, Valentin Schneider wrote:


On 10/08/20 02:00, Qi Zheng wrote:

1. The group_has_capacity() function is only called in
  group_classify().
2. The following inequality has already been checked in
  group_is_overloaded() which was also called in
  group_classify().

 (sgs->group_capacity * imbalance_pct) <
   (sgs->group_runnable * 100)



Consider group_is_overloaded() returns false because of the first
condition:

   if (sgs->sum_nr_running <= sgs->group_weight)
   return false;

then group_has_capacity() would be the first place where the group_runnable
vs group_capacity comparison would be done.

Now in that specific case we'll actually only check it if

 sgs->sum_nr_running == sgs->group_weight

and the only case where the runnable vs capacity check can fail here is if
there's significant capacity pressure going on. TBH this capacity pressure
could be happening even when there are fewer tasks than CPUs, so I'm not
sure how intentional that corner case is.


Maybe some cpus in sg->cpumask are no longer active at the == case,
which causes the significant capacity pressure?



That can only happen in that short window between deactivating a CPU and
not having rebuilt the sched_domains yet, which sounds quite elusive.



In fact, at the beginning, I added unlikely() here to hint the compiler:

-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
+   if (unlikely((sgs->group_capacity * imbalance_pct) <
+   (sgs->group_runnable * 100)))

The corresponding patch is as follows:

   [PATCH]sched/core: add unlikely in group_has_capacity()

Do you think it is necessary?


The "unlikely" approach has the benefit of keeping all corner cases in
place. I was tempted to say it could still make sense to get rid of the
extra check entirely, given that it has an impact only when:

- sum_nr_running == group_weight
- group capacity has been noticeably reduced

If sum_nr_running < group_weight, we won't evaluate it.
If sum_nr_running > group_weight, we either won't call into
   group_has_capacity() or we'll have checked it already in
   group_overloaded().

That said, it does make very much sense to check it in that ==
case. Vincent might have a different take on this, but right now I'd say
the unlikely approach is the safest one of the two.



So what should I do next? Do I resubmit a patch with unlikely() or
add your email to the old patch([PATCH]sched/core: add unlikely in
group_has_capacity())? Or continue to wait for suggestions from
other maintainers?


Re: [PATCH] sched/fair: Remove the duplicate check from group_has_capacity()

2020-08-11 Thread Qi Zheng

On 2020/8/11 下午6:38, Valentin Schneider wrote:


On 11/08/20 04:39, Qi Zheng wrote:

On 2020/8/11 上午2:33, Valentin Schneider wrote:


On 10/08/20 02:00, Qi Zheng wrote:

1. The group_has_capacity() function is only called in
 group_classify().
2. The following inequality has already been checked in
 group_is_overloaded() which was also called in
 group_classify().

(sgs->group_capacity * imbalance_pct) <
  (sgs->group_runnable * 100)



Consider group_is_overloaded() returns false because of the first
condition:

  if (sgs->sum_nr_running <= sgs->group_weight)
  return false;

then group_has_capacity() would be the first place where the group_runnable
vs group_capacity comparison would be done.

Now in that specific case we'll actually only check it if

sgs->sum_nr_running == sgs->group_weight

and the only case where the runnable vs capacity check can fail here is if
there's significant capacity pressure going on. TBH this capacity pressure
could be happening even when there are fewer tasks than CPUs, so I'm not
sure how intentional that corner case is.


Maybe some cpus in sg->cpumask are no longer active at the == case,
which causes the significant capacity pressure?



That can only happen in that short window between deactivating a CPU and
not having rebuilt the sched_domains yet, which sounds quite elusive.



In fact, at the beginning, I added unlikely() here to hint the compiler:

-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
+   if (unlikely((sgs->group_capacity * imbalance_pct) <
+   (sgs->group_runnable * 100)))

The corresponding patch is as follows:

[PATCH]sched/core: add unlikely in group_has_capacity()

Do you think it is necessary?


Re: [PATCH] sched/fair: Remove the duplicate check from group_has_capacity()

2020-08-10 Thread Qi Zheng

On 2020/8/11 上午2:33, Valentin Schneider wrote:


On 10/08/20 02:00, Qi Zheng wrote:

1. The group_has_capacity() function is only called in
group_classify().
2. The following inequality has already been checked in
group_is_overloaded() which was also called in
group_classify().

   (sgs->group_capacity * imbalance_pct) <
 (sgs->group_runnable * 100)



Consider group_is_overloaded() returns false because of the first
condition:

 if (sgs->sum_nr_running <= sgs->group_weight)
 return false;

then group_has_capacity() would be the first place where the group_runnable
vs group_capacity comparison would be done.

Now in that specific case we'll actually only check it if

   sgs->sum_nr_running == sgs->group_weight

and the only case where the runnable vs capacity check can fail here is if
there's significant capacity pressure going on. TBH this capacity pressure
could be happening even when there are fewer tasks than CPUs, so I'm not
sure how intentional that corner case is.


Maybe some cpus in sg->cpumask are no longer active at the == case,
which causes the significant capacity pressure?




For the

 sgs->sum_nr_running > sgs->group_weight

case I agree with your patch, there just is that oddity at the == case.


So just remove the duplicate check from group_has_capacity().

Signed-off-by: Qi Zheng 
---
  kernel/sched/fair.c | 4 
  1 file changed, 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..a41903fb327a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8234,10 +8234,6 @@ group_has_capacity(unsigned int imbalance_pct, struct 
sg_lb_stats *sgs)
   if (sgs->sum_nr_running < sgs->group_weight)
   return true;

-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
-   return false;
-
   if ((sgs->group_capacity * 100) >
   (sgs->group_util * imbalance_pct))
   return true;


[PATCH] sched/fair: Remove the duplicate check from group_has_capacity()

2020-08-09 Thread Qi Zheng
1. The group_has_capacity() function is only called in
   group_classify().
2. The following inequality has already been checked in
   group_is_overloaded() which was also called in
   group_classify().

(sgs->group_capacity * imbalance_pct) <
(sgs->group_runnable * 100)

So just remove the duplicate check from group_has_capacity().

Signed-off-by: Qi Zheng 
---
 kernel/sched/fair.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..a41903fb327a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8234,10 +8234,6 @@ group_has_capacity(unsigned int imbalance_pct, struct 
sg_lb_stats *sgs)
if (sgs->sum_nr_running < sgs->group_weight)
return true;
 
-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
-   return false;
-
if ((sgs->group_capacity * 100) >
(sgs->group_util * imbalance_pct))
return true;
-- 
2.25.1



Re: [PATCH] sched/core: add unlikely in group_has_capacity()

2020-08-06 Thread Qi Zheng

Yeah, because of the following two points, I also think
the probability is 0%:
a) the sd is protected by rcu lock, and load_balance()
   func is between rcu_read_lock() and rcu_read_unlock().
b) the sgs is a local variable.

So in the group_classify(), the env->sd->imbalance_pct and
the sgs will not be changed. May I remove the duplicate check
from group_has_capacity() and resubmit a patch?

Yours,
Qi Zheng

On 2020/8/6 下午10:45, Ingo Molnar wrote:


* Qi Zheng  wrote:


1. The group_has_capacity() function is only called in
group_classify().
2. Before calling the group_has_capacity() function,
group_is_overloaded() will first judge the following
formula, if it holds, the group_classify() will directly
return the group_overloaded.

(sgs->group_capacity * imbalance_pct) <
 (sgs->group_runnable * 100)

Therefore, when the group_has_capacity() is called, the
probability that the above formalu holds is very small. Hint
compilers about that.

Signed-off-by: Qi Zheng 
---
  kernel/sched/fair.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..9074fd5e23b2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8234,8 +8234,8 @@ group_has_capacity(unsigned int imbalance_pct, struct 
sg_lb_stats *sgs)
if (sgs->sum_nr_running < sgs->group_weight)
return true;
  
-	if ((sgs->group_capacity * imbalance_pct) <

-   (sgs->group_runnable * 100))
+   if (unlikely((sgs->group_capacity * imbalance_pct) <
+   (sgs->group_runnable * 100)))
return false;


Isn't the probability that this second check will match around 0%?

I.e. wouldn't the right fix be to remove the duplicate check from
group_has_capacity(), because it's already been checked in
group_classify()? Maybe while leaving a comment in place?

Thanks,

Ingo



Re: [PATCH] sched/fair: Fix the logic about active_balance in load_balance()

2020-08-03 Thread Qi Zheng

Hi Dietmar,

I understand, thank you for your review and very detailed explanation.

Yours,
Qi Zheng

On 2020/8/3 下午3:36, Dietmar Eggemann wrote:

On 02/08/2020 06:51, Qi Zheng wrote:

I think the unbalance scenario here should be that we need to
do active balance but it is not actually done. So fix it.

Signed-off-by: Qi Zheng 
---
  kernel/sched/fair.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..6d8c53718b67 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9710,7 +9710,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
} else
sd->nr_balance_failed = 0;
  
-	if (likely(!active_balance) || voluntary_active_balance(&env)) {

+   if (likely(!active_balance) && voluntary_active_balance(&env)) {
/* We were unbalanced, so reset the balancing interval */
sd->balance_interval = sd->min_interval;
} else {



Active balance is potentially already been done when we reach this code.

See 'if (need_active_balance(&env))' and 'if (!busiest->active_balance)'
further up.

Here we only reset sd->balance_interval in case:
(A) the last load balance wasn't an active one
(B) the reason for the active load balance was:
 (1) asym packing
 (2) capacity of src_cpu is reduced compared to the one of dst_cpu
 (3) misfit handling

(B) is done to not unnecessarily increase of balance interval, see
commit 46a745d90585 ("sched/fair: Fix unnecessary increase of balance
interval").



[PATCH] sched/fair: Fix the logic about active_balance in load_balance()

2020-08-01 Thread Qi Zheng
I think the unbalance scenario here should be that we need to
do active balance but it is not actually done. So fix it.

Signed-off-by: Qi Zheng 
---
 kernel/sched/fair.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..6d8c53718b67 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9710,7 +9710,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
} else
sd->nr_balance_failed = 0;
 
-   if (likely(!active_balance) || voluntary_active_balance(&env)) {
+   if (likely(!active_balance) && voluntary_active_balance(&env)) {
/* We were unbalanced, so reset the balancing interval */
sd->balance_interval = sd->min_interval;
} else {
-- 
2.25.1



[PATCH] sched/core: add unlikely in group_has_capacity()

2020-07-30 Thread Qi Zheng
1. The group_has_capacity() function is only called in
   group_classify().
2. Before calling the group_has_capacity() function,
   group_is_overloaded() will first judge the following
   formula, if it holds, the group_classify() will directly
   return the group_overloaded.

(sgs->group_capacity * imbalance_pct) <
(sgs->group_runnable * 100)

Therefore, when the group_has_capacity() is called, the
probability that the above formalu holds is very small. Hint
compilers about that.

Signed-off-by: Qi Zheng 
---
 kernel/sched/fair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2ba8f230feb9..9074fd5e23b2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8234,8 +8234,8 @@ group_has_capacity(unsigned int imbalance_pct, struct 
sg_lb_stats *sgs)
if (sgs->sum_nr_running < sgs->group_weight)
return true;
 
-   if ((sgs->group_capacity * imbalance_pct) <
-   (sgs->group_runnable * 100))
+   if (unlikely((sgs->group_capacity * imbalance_pct) <
+   (sgs->group_runnable * 100)))
return false;
 
if ((sgs->group_capacity * 100) >
-- 
2.25.1



[PATCH] sched/fair: Fix comment in newidle_balance()

2020-06-10 Thread Qi Zheng
The code is using newidle_balance() rather than idle_balance().

Signed-off-by: Qi Zheng 
---
 kernel/sched/fair.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 0ed04d2a8959..7f9c3245c967 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10434,7 +10434,7 @@ static inline void nohz_newidle_balance(struct rq 
*this_rq) { }
 #endif /* CONFIG_NO_HZ_COMMON */
 
 /*
- * idle_balance is called by schedule() if this_cpu is about to become
+ * newidle_balance() is called by schedule() if this_cpu is about to become
  * idle. Attempts to pull tasks from other CPUs.
  *
  * Returns:
@@ -10452,8 +10452,8 @@ static int newidle_balance(struct rq *this_rq, struct 
rq_flags *rf)
 
update_misfit_status(NULL, this_rq);
/*
-* We must set idle_stamp _before_ calling idle_balance(), such that we
-* measure the duration of idle_balance() as idle time.
+* We must set idle_stamp _before_ calling newidle_balance(), such that
+* we measure the duration of newidle_balance() as idle time.
 */
this_rq->idle_stamp = rq_clock(this_rq);
 
-- 
2.25.1



[PATCH v2] of/fdt: Remove redundant kbasename function call

2020-05-28 Thread Qi Zheng
For version 1 to 3 of the device tree, this is the node full
path as a zero terminated string, starting with "/". The
following equation will not hold, since the node name has
been processed in the fdt_get_name().

*pathp == '/'

For version 16 and later, this is the node unit name only
(or an empty string for the root node). So the above
equation will still not hold.

So the kbasename() is redundant, just remove it.

Signed-off-by: Qi Zheng 
---

Change in v2:
remove another kbasename() also.

 drivers/of/fdt.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 38619e9ef6b2..4602e467ca8b 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -643,8 +643,6 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 offset = fdt_next_node(blob, offset, &depth)) {
 
pathp = fdt_get_name(blob, offset, NULL);
-   if (*pathp == '/')
-   pathp = kbasename(pathp);
rc = it(offset, pathp, depth, data);
}
return rc;
@@ -671,8 +669,6 @@ int __init of_scan_flat_dt_subnodes(unsigned long parent,
int rc;
 
pathp = fdt_get_name(blob, node, NULL);
-   if (*pathp == '/')
-   pathp = kbasename(pathp);
rc = it(node, pathp, data);
if (rc)
return rc;
-- 
2.25.1



Re: [PATCH] of/fdt: Remove redundant kbasename function call

2020-05-28 Thread Qi Zheng

Hi Rob,

Thanks for your review.
I will send you a patch of v2 later.

Yours,
Qi Zheng

On 2020/5/28 上午2:27, Rob Herring wrote:

On Tue, May 12, 2020 at 11:49:09PM +0800, Qi Zheng wrote:

For version 1 to 3 of the device tree, this is the node full
path as a zero terminated string, starting with "/". The
following equation will not hold, since the node name has
been processed in the fdt_get_name().

*pathp == '/'

For version 16 and later, this is the node unit name only
(or an empty string for the root node). So the above
equation will still not hold.

So the kbasename() is redundant, just remove it.


There's 2 occurrences of this. Can you remove the other one too.

Rob



[PATCH] dt/platform: Fix comment in of_dev_lookup()

2020-05-24 Thread Qi Zheng
The code is using of_dev_lookup() rather than of_devname_lookup().

Signed-off-by: Qi Zheng 
---
 drivers/of/platform.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 3371e4a06248..3627fee60215 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -291,7 +291,7 @@ static struct amba_device *of_amba_device_create(struct 
device_node *node,
 #endif /* CONFIG_ARM_AMBA */
 
 /**
- * of_devname_lookup() - Given a device node, lookup the preferred Linux name
+ * of_dev_lookup() - Given a device node, lookup the preferred Linux name
  */
 static const struct of_dev_auxdata *of_dev_lookup(const struct of_dev_auxdata 
*lookup,
 struct device_node *np)
-- 
2.25.1



Re: [PATCH] of/fdt: Remove redundant kbasename function call

2020-05-13 Thread Qi Zheng

On 2020/5/12 下午11:49, Qi Zheng wrote:

For version 1 to 3 of the device tree, this is the node full
path as a zero terminated string, starting with "/". The
following equation will not hold, since the node name has
been processed in the fdt_get_name().

*pathp == '/'

For version 16 and later, this is the node unit name only
(or an empty string for the root node). So the above
equation will still not hold.

So the kbasename() is redundant, just remove it.

Signed-off-by: Qi Zheng 
---
  drivers/of/fdt.c | 2 --
  1 file changed, 2 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 8a8e07a8f03d..ea31b2ae8474 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -643,8 +643,6 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 offset = fdt_next_node(blob, offset, &depth)) {
  
  		pathp = fdt_get_name(blob, offset, NULL);

-   if (*pathp == '/')
-   pathp = kbasename(pathp);
rc = it(offset, pathp, depth, data);
}
return rc;



add Rob Herring .


[PATCH] of/fdt: Remove redundant kbasename function call

2020-05-12 Thread Qi Zheng
For version 1 to 3 of the device tree, this is the node full
path as a zero terminated string, starting with "/". The
following equation will not hold, since the node name has
been processed in the fdt_get_name().

*pathp == '/'

For version 16 and later, this is the node unit name only
(or an empty string for the root node). So the above
equation will still not hold.

So the kbasename() is redundant, just remove it.

Signed-off-by: Qi Zheng 
---
 drivers/of/fdt.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 8a8e07a8f03d..ea31b2ae8474 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -643,8 +643,6 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 offset = fdt_next_node(blob, offset, &depth)) {
 
pathp = fdt_get_name(blob, offset, NULL);
-   if (*pathp == '/')
-   pathp = kbasename(pathp);
rc = it(offset, pathp, depth, data);
}
return rc;
-- 
2.25.1



[PATCH] kobject: documentation: Fix erroneous function example in kobject doc.

2020-05-04 Thread Qi Zheng
Update the definitions of some functions listed in the kobject
document, since they have been changed.

Signed-off-by: Qi Zheng 
---
 Documentation/core-api/kobject.rst | 26 ++
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/Documentation/core-api/kobject.rst 
b/Documentation/core-api/kobject.rst
index 1f62d4d7d966..c3ac2963283b 100644
--- a/Documentation/core-api/kobject.rst
+++ b/Documentation/core-api/kobject.rst
@@ -80,11 +80,11 @@ what is the pointer to the containing structure?  You must 
avoid tricks
 (such as assuming that the kobject is at the beginning of the structure)
 and, instead, use the container_of() macro, found in ::
 
-container_of(pointer, type, member)
+container_of(ptr, type, member)
 
 where:
 
-  * ``pointer`` is the pointer to the embedded kobject,
+  * ``ptr`` is the pointer to the embedded kobject,
   * ``type`` is the type of the containing structure, and
   * ``member`` is the name of the structure field to which ``pointer`` points.
 
@@ -140,7 +140,7 @@ the name of the kobject, call kobject_rename()::
 
 int kobject_rename(struct kobject *kobj, const char *new_name);
 
-kobject_rename does not perform any locking or have a solid notion of
+kobject_rename() does not perform any locking or have a solid notion of
 what names are valid so the caller must provide their own sanity checking
 and serialization.
 
@@ -222,17 +222,17 @@ ksets, show and store functions, and other details.  This 
is the one
 exception where a single kobject should be created.  To create such an
 entry, use the function::
 
-struct kobject *kobject_create_and_add(char *name, struct kobject *parent);
+struct kobject *kobject_create_and_add(const char *name, struct kobject 
*parent);
 
 This function will create a kobject and place it in sysfs in the location
 underneath the specified parent kobject.  To create simple attributes
 associated with this kobject, use::
 
-int sysfs_create_file(struct kobject *kobj, struct attribute *attr);
+int sysfs_create_file(struct kobject *kobj, const struct attribute *attr);
 
 or::
 
-int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp);
+int sysfs_create_group(struct kobject *kobj, const struct attribute_group 
*grp);
 
 Both types of attributes used here, with a kobject that has been created
 with the kobject_create_and_add(), can be of type kobj_attribute, so no
@@ -300,8 +300,10 @@ kobj_type::
 void (*release)(struct kobject *kobj);
 const struct sysfs_ops *sysfs_ops;
 struct attribute **default_attrs;
+const struct attribute_group **default_groups;
 const struct kobj_ns_type_operations *(*child_ns_type)(struct 
kobject *kobj);
 const void *(*namespace)(struct kobject *kobj);
+void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t 
*gid);
 };
 
 This structure is used to describe a particular type of kobject (or, more
@@ -352,12 +354,12 @@ created and never declared statically or on the stack.  
To create a new
 kset use::
 
   struct kset *kset_create_and_add(const char *name,
-   struct kset_uevent_ops *u,
-   struct kobject *parent);
+   const struct kset_uevent_ops *uevent_ops,
+   struct kobject *parent_kobj);
 
 When you are finished with the kset, call::
 
-  void kset_unregister(struct kset *kset);
+  void kset_unregister(struct kset *k);
 
 to destroy it.  This removes the kset from sysfs and decrements its reference
 count.  When the reference count goes to zero, the kset will be released.
@@ -371,9 +373,9 @@ If a kset wishes to control the uevent operations of the 
kobjects
 associated with it, it can use the struct kset_uevent_ops to handle it::
 
   struct kset_uevent_ops {
-  int (*filter)(struct kset *kset, struct kobject *kobj);
-  const char *(*name)(struct kset *kset, struct kobject *kobj);
-  int (*uevent)(struct kset *kset, struct kobject *kobj,
+  int (* const filter)(struct kset *kset, struct kobject *kobj);
+  const char *(* const name)(struct kset *kset, struct kobject *kobj);
+  int (* const uevent)(struct kset *kset, struct kobject *kobj,
 struct kobj_uevent_env *env);
   };
 
-- 
2.25.1