[PATCH] net/mlx5: fix kfree mismatch in indir_table.c

2021-04-04 Thread Xiaoming Ni
Memory allocated by kvzalloc() should be freed by kvfree().

Fixes: 34ca65352ddf2 ("net/mlx5: E-Switch, Indirect table infrastructur")
Signed-off-by: Xiaoming Ni 
---
 .../net/ethernet/mellanox/mlx5/core/esw/indir_table.c  | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c 
b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
index 6f6772bf61a2..3da7becc1069 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c
@@ -248,7 +248,7 @@ static int mlx5_esw_indir_table_rule_get(struct 
mlx5_eswitch *esw,
 err_ethertype:
kfree(rule);
 out:
-   kfree(rule_spec);
+   kvfree(rule_spec);
return err;
 }
 
@@ -328,7 +328,7 @@ static int mlx5_create_indir_recirc_group(struct 
mlx5_eswitch *esw,
e->recirc_cnt = 0;
 
 out:
-   kfree(in);
+   kvfree(in);
return err;
 }
 
@@ -347,7 +347,7 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch 
*esw,
 
spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
if (!spec) {
-   kfree(in);
+   kvfree(in);
return -ENOMEM;
}
 
@@ -371,8 +371,8 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch 
*esw,
}
 
 err_out:
-   kfree(spec);
-   kfree(in);
+   kvfree(spec);
+   kvfree(in);
return err;
 }
 
-- 
2.27.0



[PATCH resend 3/4] nfc: fix memory leak in llcp_sock_connect()

2021-03-24 Thread Xiaoming Ni
In llcp_sock_connect(), use kmemdup to allocate memory for
 "llcp_sock->service_name". The memory is not released in the sock_unlink
label of the subsequent failure branch.
As a result, memory leakage occurs.

fix CVE-2020-25672

Fixes: d646960f7986 ("NFC: Initial LLCP support")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.3
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 9e2799ee1595..59172614b249 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -746,6 +746,8 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
 
 sock_unlink:
nfc_llcp_sock_unlink(>connecting_sockets, sk);
+   kfree(llcp_sock->service_name);
+   llcp_sock->service_name = NULL;
 
 sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
-- 
2.27.0



[PATCH resend 0/4] nfc: fix Resource leakage and endless loop

2021-03-24 Thread Xiaoming Ni
fix Resource leakage and endless loop in net/nfc/llcp_sock.c,
 reported by "kiyin(尹亮)".

Link: https://www.openwall.com/lists/oss-security/2020/11/01/1

Xiaoming Ni (4):
  nfc: fix refcount leak in llcp_sock_bind()
  nfc: fix refcount leak in llcp_sock_connect()
  nfc: fix memory leak in llcp_sock_connect()
  nfc: Avoid endless loops caused by repeated llcp_sock_connect()

 net/nfc/llcp_sock.c | 10 ++
 1 file changed, 10 insertions(+)

-- 
2.27.0



[PATCH resend 1/4] nfc: fix refcount leak in llcp_sock_bind()

2021-03-24 Thread Xiaoming Ni
nfc_llcp_local_get() is invoked in llcp_sock_bind(),
but nfc_llcp_local_put() is not invoked in subsequent failure branches.
As a result, refcount leakage occurs.
To fix it, add calling nfc_llcp_local_put().

fix CVE-2020-25670
Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when
 creating a socket")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.6
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index d257ed3b732a..68832ee4b9f8 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -108,11 +108,13 @@ static int llcp_sock_bind(struct socket *sock, struct 
sockaddr *addr, int alen)
  llcp_sock->service_name_len,
  GFP_KERNEL);
if (!llcp_sock->service_name) {
+   nfc_llcp_local_put(llcp_sock->local);
ret = -ENOMEM;
goto put_dev;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+   nfc_llcp_local_put(llcp_sock->local);
kfree(llcp_sock->service_name);
llcp_sock->service_name = NULL;
ret = -EADDRINUSE;
-- 
2.27.0



[PATCH resend 2/4] nfc: fix refcount leak in llcp_sock_connect()

2021-03-24 Thread Xiaoming Ni
nfc_llcp_local_get() is invoked in llcp_sock_connect(),
but nfc_llcp_local_put() is not invoked in subsequent failure branches.
As a result, refcount leakage occurs.
To fix it, add calling nfc_llcp_local_put().

fix CVE-2020-25671
Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when
creating a socket")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.6
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 68832ee4b9f8..9e2799ee1595 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -704,6 +704,7 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
llcp_sock->local = nfc_llcp_local_get(local);
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+   nfc_llcp_local_put(llcp_sock->local);
ret = -ENOMEM;
goto put_dev;
}
@@ -748,6 +749,7 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
 
 sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
+   nfc_llcp_local_put(llcp_sock->local);
 
 put_dev:
nfc_put_device(dev);
-- 
2.27.0



[PATCH resend 4/4] nfc: Avoid endless loops caused by repeated llcp_sock_connect()

2021-03-24 Thread Xiaoming Ni
When sock_wait_state() returns -EINPROGRESS, "sk->sk_state" is
 LLCP_CONNECTING. In this case, llcp_sock_connect() is repeatedly invoked,
 nfc_llcp_sock_link() will add sk to local->connecting_sockets twice.
 sk->sk_node->next will point to itself, that will make an endless loop
 and hang-up the system.
To fix it, check whether sk->sk_state is LLCP_CONNECTING in
 llcp_sock_connect() to avoid repeated invoking.

Fixes: b4011239a08e ("NFC: llcp: Fix non blocking sockets connections")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.11
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 59172614b249..a3b46f03 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -673,6 +673,10 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
ret = -EISCONN;
goto error;
}
+   if (sk->sk_state == LLCP_CONNECTING) {
+   ret = -EINPROGRESS;
+   goto error;
+   }
 
dev = nfc_get_device(addr->dev_idx);
if (dev == NULL) {
-- 
2.27.0



Re: [PATCH 4/4] nfc: Avoid endless loops caused by repeated llcp_sock_connect()(Internet mail)

2021-03-04 Thread Xiaoming Ni

On 2021/3/3 17:28, kiyin(尹亮) wrote:

Hi xiaoming,
   the path can only fix the endless loop problem. it can't fix the meaningless 
llcp_sock->service_name problem.
   if we set llcp_sock->service_name to meaningless string, the connect will be failed. and 
sk->sk_state will not be LLCP_CONNECTED. then we can call llcp_sock_connect() many times. that 
leaks everything: llcp_sock->dev, llcp_sock->local, llcp_sock->ssap, 
llcp_sock->service_name...


I didn't find the code to modify sk->sk_state after a connect failure. 
Can you provide guidance?


Based on my understanding of the current code:
After llcp_sock_connect() is invoked using the meaningless service_name 
as the parameter, sk->sk_state is set to LLCP_CONNECTING. After that, no 
corresponding service responds to the request because the service_name 
is meaningless, the value of sk->sk_state remains unchanged.
Therefore, when llcp_sock_connect() is invoked again, resources such as 
llcp_sock->service_name are not repeatedly applied because sk_state is 
set to LLCP_CONNECTING.


In this way, the repeated invoking of llcp_sock_connect() does not 
repeatedly leak resources.


Thanks
Xiaoming Ni





-Original Message-
From: Xiaoming Ni [mailto:nixiaom...@huawei.com]
Sent: Wednesday, March 3, 2021 2:17 PM
To: linux-kernel@vger.kernel.org; kiyin(尹亮) ;
sta...@vger.kernel.org; gre...@linuxfoundation.org; sa...@linux.intel.com;
linvi...@tuxdriver.com; da...@davemloft.net; k...@kernel.org;
m...@pengutronix.de; ste...@datenfreihafen.org;
matthieu.bae...@tessares.net; net...@vger.kernel.org
Cc: nixiaom...@huawei.com; wang...@huawei.com; xiaoqi...@huawei.com
Subject: [PATCH 4/4] nfc: Avoid endless loops caused by repeated
llcp_sock_connect()(Internet mail)

When sock_wait_state() returns -EINPROGRESS, "sk->sk_state" is
LLCP_CONNECTING. In this case, llcp_sock_connect() is repeatedly invoked,
  nfc_llcp_sock_link() will add sk to local->connecting_sockets twice.
  sk->sk_node->next will point to itself, that will make an endless loop  and
hang-up the system.
To fix it, check whether sk->sk_state is LLCP_CONNECTING in
  llcp_sock_connect() to avoid repeated invoking.

fix CVE-2020-25673
Fixes: b4011239a08e ("NFC: llcp: Fix non blocking sockets connections")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.11
Signed-off-by: Xiaoming Ni 
---
  net/nfc/llcp_sock.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index
59172614b249..a3b46f03 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -673,6 +673,10 @@ static int llcp_sock_connect(struct socket *sock,
struct sockaddr *_addr,
ret = -EISCONN;
goto error;
}
+   if (sk->sk_state == LLCP_CONNECTING) {
+   ret = -EINPROGRESS;
+   goto error;
+   }

dev = nfc_get_device(addr->dev_idx);
if (dev == NULL) {
--
2.27.0







[PATCH 1/4] nfc: fix refcount leak in llcp_sock_bind()

2021-03-03 Thread Xiaoming Ni
nfc_llcp_local_get() is invoked in llcp_sock_bind(),
but nfc_llcp_local_put() is not invoked in subsequent failure branches.
As a result, refcount leakage occurs.
To fix it, add calling nfc_llcp_local_put().

fix CVE-2020-25670
Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when
 creating a socket")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.6
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index d257ed3b732a..68832ee4b9f8 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -108,11 +108,13 @@ static int llcp_sock_bind(struct socket *sock, struct 
sockaddr *addr, int alen)
  llcp_sock->service_name_len,
  GFP_KERNEL);
if (!llcp_sock->service_name) {
+   nfc_llcp_local_put(llcp_sock->local);
ret = -ENOMEM;
goto put_dev;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+   nfc_llcp_local_put(llcp_sock->local);
kfree(llcp_sock->service_name);
llcp_sock->service_name = NULL;
ret = -EADDRINUSE;
-- 
2.27.0



[PATCH 3/4] nfc: fix memory leak in llcp_sock_connect()

2021-03-03 Thread Xiaoming Ni
In llcp_sock_connect(), use kmemdup to allocate memory for
 "llcp_sock->service_name". The memory is not released in the sock_unlink
label of the subsequent failure branch.
As a result, memory leakage occurs.

fix CVE-2020-25672

Fixes: d646960f7986 ("NFC: Initial LLCP support")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.3
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 9e2799ee1595..59172614b249 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -746,6 +746,8 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
 
 sock_unlink:
nfc_llcp_sock_unlink(>connecting_sockets, sk);
+   kfree(llcp_sock->service_name);
+   llcp_sock->service_name = NULL;
 
 sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
-- 
2.27.0



[PATCH 0/4] nfc: fix Resource leakage and endless loop

2021-03-03 Thread Xiaoming Ni
fix Resource leakage and endless loop in net/nfc/llcp_sock.c,
 reported by "kiyin(尹亮)".

Link: https://www.openwall.com/lists/oss-security/2020/11/01/1



Xiaoming Ni (4):
  nfc: fix refcount leak in llcp_sock_bind()
  nfc: fix refcount leak in llcp_sock_connect()
  nfc: fix memory leak in llcp_sock_connect()
  nfc: Avoid endless loops caused by repeated llcp_sock_connect()

 net/nfc/llcp_sock.c | 10 ++
 1 file changed, 10 insertions(+)

-- 
2.27.0



[PATCH 2/4] nfc: fix refcount leak in llcp_sock_connect()

2021-03-03 Thread Xiaoming Ni
nfc_llcp_local_get() is invoked in llcp_sock_connect(),
but nfc_llcp_local_put() is not invoked in subsequent failure branches.
As a result, refcount leakage occurs.
To fix it, add calling nfc_llcp_local_put().

fix CVE-2020-25671
Fixes: c7aa12252f51 ("NFC: Take a reference on the LLCP local pointer when
creating a socket")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.6
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 68832ee4b9f8..9e2799ee1595 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -704,6 +704,7 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
llcp_sock->local = nfc_llcp_local_get(local);
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+   nfc_llcp_local_put(llcp_sock->local);
ret = -ENOMEM;
goto put_dev;
}
@@ -748,6 +749,7 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
 
 sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
+   nfc_llcp_local_put(llcp_sock->local);
 
 put_dev:
nfc_put_device(dev);
-- 
2.27.0



[PATCH 4/4] nfc: Avoid endless loops caused by repeated llcp_sock_connect()

2021-03-03 Thread Xiaoming Ni
When sock_wait_state() returns -EINPROGRESS, "sk->sk_state" is
 LLCP_CONNECTING. In this case, llcp_sock_connect() is repeatedly invoked,
 nfc_llcp_sock_link() will add sk to local->connecting_sockets twice.
 sk->sk_node->next will point to itself, that will make an endless loop
 and hang-up the system.
To fix it, check whether sk->sk_state is LLCP_CONNECTING in
 llcp_sock_connect() to avoid repeated invoking.

fix CVE-2020-25673
Fixes: b4011239a08e ("NFC: llcp: Fix non blocking sockets connections")
Reported-by: "kiyin(尹亮)" 
Link: https://www.openwall.com/lists/oss-security/2020/11/01/1
Cc:  #v3.11
Signed-off-by: Xiaoming Ni 
---
 net/nfc/llcp_sock.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 59172614b249..a3b46f03 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -673,6 +673,10 @@ static int llcp_sock_connect(struct socket *sock, struct 
sockaddr *_addr,
ret = -EISCONN;
goto error;
}
+   if (sk->sk_state == LLCP_CONNECTING) {
+   ret = -EINPROGRESS;
+   goto error;
+   }
 
dev = nfc_get_device(addr->dev_idx);
if (dev == NULL) {
-- 
2.27.0



Re: [PATCH] futex: fix dead code in attach_to_pi_owner()

2021-02-25 Thread Xiaoming Ni

On 2021/2/25 16:25, Greg KH wrote:

On Mon, Feb 22, 2021 at 08:53:52PM +0800, Xiaoming Ni wrote:

From: Thomas Gleixner 

The handle_exit_race() function is defined in commit c158b461306df82
  ("futex: Cure exit race"), which never returns -EBUSY. This results
in a small piece of dead code in the attach_to_pi_owner() function:

int ret = handle_exit_race(uaddr, uval, p); /* Never return -EBUSY */
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

The return value -EBUSY is added to handle_exit_race() in upsteam
commit ac31c7ff8624409 ("futex: Provide distinct return value when
owner is exiting"). This commit was incorporated into v4.9.255, before
the function handle_exit_race() was introduced, whitout Modify
handle_exit_race().

To fix dead code, extract the change of handle_exit_race() from
commit ac31c7ff8624409 ("futex: Provide distinct return value when owner
  is exiting"), re-incorporated.

mainline:
ac31c7ff8624 futex: Provide distinct return value when owner is exiting



Fixes: c158b461306df82 ("futex: Cure exit race")


stable linux-4.9.y
9c3f39860367 futex: Cure exit race
c27f392040e2 futex: Provide distinct return value when owner is exiting


Cc: sta...@vger.kernel.org # 4.9.258-rc1
Signed-off-by: Xiaoming Ni 
---
  kernel/futex.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)


What is the git commit id of this patch in Linus's tree?

Also, what kernel tree(s) is this supposed to go to?

thanks,

greg k-h
.

Sorry, the commit id c158b461306df82 in the patch does not exist in the 
linux-stable repository.

The commit ID is from linux-stable-rc.

I corrected the commit id in a subsequent email, and added a branch 
label. 
https://lore.kernel.org/lkml/20210224100923.51315-1-nixiaom...@huawei.com/


Sorry, I forgot to use "--in-reply-to=" when I sent the update patch.

This issue occurs only in the linux-4.9.y branch v4.9.258

Thanks
xiaoming Ni






Re: [PATCH stable-rc queue/4.9 1/1] futex: Provide distinct return value when owner is exiting

2021-02-24 Thread Xiaoming Ni

On 2021/2/24 15:47, Greg KH wrote:

On Wed, Feb 24, 2021 at 09:41:01AM +0800, Xiaoming Ni wrote:

On 2021/2/23 21:00, Greg KH wrote:

On Mon, Feb 22, 2021 at 10:11:37PM +0800, Xiaoming Ni wrote:

On 2021/2/22 20:09, Greg KH wrote:

On Mon, Feb 22, 2021 at 06:54:06PM +0800, Xiaoming Ni wrote:

On 2021/2/22 18:16, Greg KH wrote:

On Mon, Feb 22, 2021 at 03:03:28PM +0800, Xiaoming Ni wrote:

From: Thomas Gleixner

commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream.

This commit is already in the 4.9 tree.  If the backport was incorrect,
say that here, and describe what went wrong and why this commit fixes
it.

Also state what commit this fixes as well, otherwise this changelog just
looks like it is being applied again to the tree, which doesn't make
much sense.

thanks,

greg k-h
.


I wrote a cover for it. but forgot to adjust the title of the cover:

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


I found a dead code in the queue/4.9 branch of the stable-rc repository.

2021-02-03:
commit c27f392040e2f6 ("futex: Provide distinct return value when
owner is exiting")
The function handle_exit_race does not exist. Therefore, the
change in handle_exit_race() is ignored in the patch round.

2021-02-22:
commit e55cb811e612 ("futex: Cure exit race")
Define the handle_exit_race() function,
but no branch in the function returns EBUSY.
As a result, dead code occurs in the attach_to_pi_owner():

int ret = handle_exit_race(uaddr, uval, p);
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

To fix the dead code, modify the commit e55cb811e612 ("futex: Cure exit
race"),
or install a patch to incorporate the changes in handle_exit_race().

I am unfamiliar with the processing of the stable-rc queue branch,
and I cannot find the patch mail of the current branch in
https://lore.kernel.org/lkml/?q=%22futex%3A+Cure+exit+race%22
Therefore, I re-integrated commit ac31c7ff8624 ("futex: Provide distinct
return value when owner is exiting").
And wrote a cover (but forgot to adjust the title of the cover):

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


So this is a "fixup" patch, right?

Please clearly label it as such in your patch description and resend
this as what is here I can not apply at all.

thanks,

greg k-h
.


Thank you for your guidance.
I have updated the patch description and resent the patch based on
v4.9.258-rc1
https://lore.kernel.org/lkml/20210222125352.110124-1-nixiaom...@huawei.com/


Can you please try 4.9.258 and let me know if this is still needed or
not?

thanks,

greg k-h
.


The dead code problem still exists in V4.9.258. No conflict occurs during my
patch integration. Do I need to correct the version number marked in the cc
table in the patch and resend the patch?


Please do.

thanks,

greg k-h
.


I have resend the patch based on v4.9.258.
link:
https://lore.kernel.org/lkml/20210224100923.51315-1-nixiaom...@huawei.com/

Thanks

Xiaoming Ni




[PATCH 4.9.258] futex: fix dead code in attach_to_pi_owner()

2021-02-24 Thread Xiaoming Ni
The handle_exit_race() function is defined in commit 9c3f39860367
 ("futex: Cure exit race"), which never returns -EBUSY. This results
in a small piece of dead code in the attach_to_pi_owner() function:

int ret = handle_exit_race(uaddr, uval, p); /* Never return -EBUSY */
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

The return value -EBUSY is added to handle_exit_race() in upsteam
commit ac31c7ff8624409 ("futex: Provide distinct return value when
owner is exiting"). This commit was incorporated into v4.9.255, before
the function handle_exit_race() was introduced, whitout Modify
handle_exit_race().

To fix dead code, extract the change of handle_exit_race() from
commit ac31c7ff8624409 ("futex: Provide distinct return value when owner
 is exiting"), re-incorporated.

Fixes: 9c3f39860367 ("futex: Cure exit race")
Cc: sta...@vger.kernel.org # v4.9.258
Signed-off-by: Xiaoming Ni 
---
 kernel/futex.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index b65dbb5d60bb..0fd785410150 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1207,11 +1207,11 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
u32 uval2;
 
/*
-* If the futex exit state is not yet FUTEX_STATE_DEAD, wait
-* for it to finish.
+* If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+* caller that the alleged owner is busy.
 */
if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-   return -EAGAIN;
+   return -EBUSY;
 
/*
 * Reread the user space value to handle the following situation:
-- 
2.27.0



Re: [PATCH stable-rc queue/4.9 1/1] futex: Provide distinct return value when owner is exiting

2021-02-23 Thread Xiaoming Ni

On 2021/2/23 21:00, Greg KH wrote:

On Mon, Feb 22, 2021 at 10:11:37PM +0800, Xiaoming Ni wrote:

On 2021/2/22 20:09, Greg KH wrote:

On Mon, Feb 22, 2021 at 06:54:06PM +0800, Xiaoming Ni wrote:

On 2021/2/22 18:16, Greg KH wrote:

On Mon, Feb 22, 2021 at 03:03:28PM +0800, Xiaoming Ni wrote:

From: Thomas Gleixner

commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream.

This commit is already in the 4.9 tree.  If the backport was incorrect,
say that here, and describe what went wrong and why this commit fixes
it.

Also state what commit this fixes as well, otherwise this changelog just
looks like it is being applied again to the tree, which doesn't make
much sense.

thanks,

greg k-h
.


I wrote a cover for it. but forgot to adjust the title of the cover:

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


I found a dead code in the queue/4.9 branch of the stable-rc repository.

2021-02-03:
commit c27f392040e2f6 ("futex: Provide distinct return value when
   owner is exiting")
The function handle_exit_race does not exist. Therefore, the
change in handle_exit_race() is ignored in the patch round.

2021-02-22:
commit e55cb811e612 ("futex: Cure exit race")
Define the handle_exit_race() function,
but no branch in the function returns EBUSY.
As a result, dead code occurs in the attach_to_pi_owner():

int ret = handle_exit_race(uaddr, uval, p);
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

To fix the dead code, modify the commit e55cb811e612 ("futex: Cure exit
race"),
or install a patch to incorporate the changes in handle_exit_race().

I am unfamiliar with the processing of the stable-rc queue branch,
and I cannot find the patch mail of the current branch in
https://lore.kernel.org/lkml/?q=%22futex%3A+Cure+exit+race%22
Therefore, I re-integrated commit ac31c7ff8624 ("futex: Provide distinct
   return value when owner is exiting").
   And wrote a cover (but forgot to adjust the title of the cover):

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


So this is a "fixup" patch, right?

Please clearly label it as such in your patch description and resend
this as what is here I can not apply at all.

thanks,

greg k-h
.


Thank you for your guidance.
I have updated the patch description and resent the patch based on
v4.9.258-rc1
https://lore.kernel.org/lkml/20210222125352.110124-1-nixiaom...@huawei.com/


Can you please try 4.9.258 and let me know if this is still needed or
not?

thanks,

greg k-h
.

The dead code problem still exists in V4.9.258. No conflict occurs 
during my patch integration. Do I need to correct the version number 
marked in the cc table in the patch and resend the patch?


Thanks
Xiaoming Ni


Re: [PATCH stable-rc queue/4.9 1/1] futex: Provide distinct return value when owner is exiting

2021-02-22 Thread Xiaoming Ni

On 2021/2/22 20:09, Greg KH wrote:

On Mon, Feb 22, 2021 at 06:54:06PM +0800, Xiaoming Ni wrote:

On 2021/2/22 18:16, Greg KH wrote:

On Mon, Feb 22, 2021 at 03:03:28PM +0800, Xiaoming Ni wrote:

From: Thomas Gleixner

commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream.

This commit is already in the 4.9 tree.  If the backport was incorrect,
say that here, and describe what went wrong and why this commit fixes
it.

Also state what commit this fixes as well, otherwise this changelog just
looks like it is being applied again to the tree, which doesn't make
much sense.

thanks,

greg k-h
.


I wrote a cover for it. but forgot to adjust the title of the cover:

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


I found a dead code in the queue/4.9 branch of the stable-rc repository.

2021-02-03:
commit c27f392040e2f6 ("futex: Provide distinct return value when
  owner is exiting")
The function handle_exit_race does not exist. Therefore, the
change in handle_exit_race() is ignored in the patch round.

2021-02-22:
commit e55cb811e612 ("futex: Cure exit race")
Define the handle_exit_race() function,
but no branch in the function returns EBUSY.
As a result, dead code occurs in the attach_to_pi_owner():

int ret = handle_exit_race(uaddr, uval, p);
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

To fix the dead code, modify the commit e55cb811e612 ("futex: Cure exit
race"),
or install a patch to incorporate the changes in handle_exit_race().

I am unfamiliar with the processing of the stable-rc queue branch,
and I cannot find the patch mail of the current branch in
https://lore.kernel.org/lkml/?q=%22futex%3A+Cure+exit+race%22
Therefore, I re-integrated commit ac31c7ff8624 ("futex: Provide distinct
  return value when owner is exiting").
  And wrote a cover (but forgot to adjust the title of the cover):

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


So this is a "fixup" patch, right?

Please clearly label it as such in your patch description and resend
this as what is here I can not apply at all.

thanks,

greg k-h
.


Thank you for your guidance.
I have updated the patch description and resent the patch based on 
v4.9.258-rc1

https://lore.kernel.org/lkml/20210222125352.110124-1-nixiaom...@huawei.com/

Thanks
Xiaoming Ni




[PATCH] futex: fix dead code in attach_to_pi_owner()

2021-02-22 Thread Xiaoming Ni
From: Thomas Gleixner 

The handle_exit_race() function is defined in commit c158b461306df82
 ("futex: Cure exit race"), which never returns -EBUSY. This results
in a small piece of dead code in the attach_to_pi_owner() function:

int ret = handle_exit_race(uaddr, uval, p); /* Never return -EBUSY */
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

The return value -EBUSY is added to handle_exit_race() in upsteam
commit ac31c7ff8624409 ("futex: Provide distinct return value when
owner is exiting"). This commit was incorporated into v4.9.255, before
the function handle_exit_race() was introduced, whitout Modify
handle_exit_race().

To fix dead code, extract the change of handle_exit_race() from
commit ac31c7ff8624409 ("futex: Provide distinct return value when owner
 is exiting"), re-incorporated.

Fixes: c158b461306df82 ("futex: Cure exit race")
Cc: sta...@vger.kernel.org # 4.9.258-rc1
Signed-off-by: Xiaoming Ni 
---
 kernel/futex.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index b65dbb5d60bb..0fd785410150 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1207,11 +1207,11 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
u32 uval2;
 
/*
-* If the futex exit state is not yet FUTEX_STATE_DEAD, wait
-* for it to finish.
+* If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+* caller that the alleged owner is busy.
 */
if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-   return -EAGAIN;
+   return -EBUSY;
 
/*
 * Reread the user space value to handle the following situation:
-- 
2.27.0



Re: [PATCH stable-rc queue/4.9 1/1] futex: Provide distinct return value when owner is exiting

2021-02-22 Thread Xiaoming Ni

On 2021/2/22 18:16, Greg KH wrote:

On Mon, Feb 22, 2021 at 03:03:28PM +0800, Xiaoming Ni wrote:

From: Thomas Gleixner

commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream.

This commit is already in the 4.9 tree.  If the backport was incorrect,
say that here, and describe what went wrong and why this commit fixes
it.

Also state what commit this fixes as well, otherwise this changelog just
looks like it is being applied again to the tree, which doesn't make
much sense.

thanks,

greg k-h
.


I wrote a cover for it. but forgot to adjust the title of the cover:

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/


I found a dead code in the queue/4.9 branch of the stable-rc repository.

2021-02-03:
commit c27f392040e2f6 ("futex: Provide distinct return value when
 owner is exiting")
The function handle_exit_race does not exist. Therefore, the
change in handle_exit_race() is ignored in the patch round.

2021-02-22:
commit e55cb811e612 ("futex: Cure exit race")
Define the handle_exit_race() function,
but no branch in the function returns EBUSY.
As a result, dead code occurs in the attach_to_pi_owner():

int ret = handle_exit_race(uaddr, uval, p);
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

To fix the dead code, modify the commit e55cb811e612 ("futex: Cure exit 
race"),

or install a patch to incorporate the changes in handle_exit_race().

I am unfamiliar with the processing of the stable-rc queue branch,
and I cannot find the patch mail of the current branch in
https://lore.kernel.org/lkml/?q=%22futex%3A+Cure+exit+race%22
Therefore, I re-integrated commit ac31c7ff8624 ("futex: Provide distinct
 return value when owner is exiting").
 And wrote a cover (but forgot to adjust the title of the cover):

https://lore.kernel.org/lkml/20210222070328.102384-1-nixiaom...@huawei.com/

Thanks
Xiaoming Ni




[PATCH stable-rc queue/4.9 0/1] repatch

2021-02-21 Thread Xiaoming Ni
I found a dead code in the queue/4.9 branch of the stable-rc repository.

2021-02-03:
commit c27f392040e2f6 ("futex: Provide distinct return value when
 owner is exiting")
The function handle_exit_race does not exist. Therefore, the
change in handle_exit_race() is ignored in the patch round.

2021-02-22:
commit e55cb811e612 ("futex: Cure exit race")
Define the handle_exit_race() function,
but no branch in the function returns EBUSY.
As a result, dead code occurs in the attach_to_pi_owner():

int ret = handle_exit_race(uaddr, uval, p);
...
if (ret == -EBUSY)
*exiting = p; /* dead code */

To fix the dead code, modify the commit e55cb811e612 ("futex: Cure exit race"), 
or install a patch to incorporate the changes in handle_exit_race().

I am unfamiliar with the processing of the stable-rc queue branch,
and I cannot find the patch mail of the current branch in
https://lore.kernel.org/lkml/?q=%22futex%3A+Cure+exit+race%22
Therefore, I re-integrated commit ac31c7ff8624 ("futex: Provide distinct
 return value when owner is exiting").

-

Thomas Gleixner (1):
  futex: Provide distinct return value when owner is exiting

 kernel/futex.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

-- 
2.27.0



[PATCH stable-rc queue/4.9 1/1] futex: Provide distinct return value when owner is exiting

2021-02-21 Thread Xiaoming Ni
From: Thomas Gleixner 

commit ac31c7ff8624409ba3c4901df9237a616c187a5d upstream.

attach_to_pi_owner() returns -EAGAIN for various cases:

 - Owner task is exiting
 - Futex value has changed

The caller drops the held locks (hash bucket, mmap_sem) and retries the
operation. In case of the owner task exiting this can result in a live
lock.

As a preparatory step for seperating those cases, provide a distinct return
value (EBUSY) for the owner exiting case.

No functional change.

Signed-off-by: Thomas Gleixner 
Reviewed-by: Ingo Molnar 
Acked-by: Peter Zijlstra (Intel) 
Link: https://lkml.kernel.org/r/20191106224556.935606...@linutronix.de

[nixiaoming: Modify handle_exit_race() to avoid dead code.]
Cc: sta...@vger.kernel.org # queue/4.9
Signed-off-by: Xiaoming Ni 
---
 kernel/futex.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index b65dbb5d60bb..0fd785410150 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1207,11 +1207,11 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval,
u32 uval2;
 
/*
-* If the futex exit state is not yet FUTEX_STATE_DEAD, wait
-* for it to finish.
+* If the futex exit state is not yet FUTEX_STATE_DEAD, tell the
+* caller that the alleged owner is busy.
 */
if (tsk && tsk->futex_state != FUTEX_STATE_DEAD)
-   return -EAGAIN;
+   return -EBUSY;
 
/*
 * Reread the user space value to handle the following situation:
-- 
2.27.0



[PATCH v4] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-18 Thread Xiaoming Ni
The process_sysctl_arg() does not check whether val is empty before
 invoking strlen(val). If the command line parameter () is incorrectly
 configured and val is empty, oops is triggered.

For example:
  "hung_task_panic=1" is incorrectly written as "hung_task_panic", oops is
  triggered. The call stack is as follows:
Kernel command line:  hung_task_panic
..
Call trace:
__pi_strlen+0x10/0x98
parse_args+0x278/0x344
do_sysctl_args+0x8c/0xfc
kernel_init+0x5c/0xf4
ret_from_fork+0x10/0x30

To fix it, check whether "val" is empty when "phram" is a sysctl field.
Error codes are returned in the failure branch, and error logs are
generated by parse_args().

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
 from kernel command line")
Cc: sta...@kernel.org # v5.8-rc1+
Signed-off-by: Xiaoming Ni 

-
v4: According to Vlastimil Babka's recommendations
  add check len == 0, and cc stable
v3: https://lore.kernel.org/lkml/20210112033155.91502-1-nixiaom...@huawei.com/
  Return -EINVAL, When phram is the sysctl field and val is empty.

v2: https://lore.kernel.org/lkml/20210108023339.55917-1-nixiaom...@huawei.com/
  Added log output of the failure branch based on the review comments of Kees 
Cook.

v1: https://lore.kernel.org/lkml/20201224074256.117413-1-nixiaom...@huawei.com/

-
---
 fs/proc/proc_sysctl.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..d2018f70d1fa 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1770,6 +1770,12 @@ static int process_sysctl_arg(char *param, char *val,
return 0;
}
 
+   if (!val)
+   return -EINVAL;
+   len = strlen(val);
+   if (len == 0)
+   return -EINVAL;
+
/*
 * To set sysctl options, we use a temporary mount of proc, look up the
 * respective sys/ file and write to it. To avoid mounting it when no
@@ -1811,7 +1817,6 @@ static int process_sysctl_arg(char *param, char *val,
file, param, val);
goto out;
}
-   len = strlen(val);
wret = kernel_write(file, val, len, );
if (wret < 0) {
err = wret;
-- 
2.27.0



Re: [PATCH v3] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-16 Thread Xiaoming Ni

On 2021/1/12 19:42, Vlastimil Babka wrote:

On 1/12/21 8:24 AM, Michal Hocko wrote:


If we're going to do a separate "patch: make process_sysctl_arg()
return an errno instead of 0" then fine, we can discuss that.  But it's
conceptually a different work from fixing this situation.
.


However, are the logs generated by process_sysctl_arg() clearer and more
accurate than parse_args()? Should the logs generated by
process_sysctl_arg() be deleted?


I think the individual logs are very useful and should be retained.


Yes, other sysfs specific error messages are likely useful. I just fail
to see why a missing value should be handled here when there is an
existing handling in the caller. Not sure whether a complete shadow
reporting in process_sysctl_arg is a deliberate decision or not.
Vlastimil?


Yes, it's a way to have more useful sysctl-specific reports than the generic
ones. And I think I was inspired by some other existing code, but don't remember
exactly. The options are:

1) the current sysctl-specific reports, return 0 as the values are only consumed
2) be silent and return error, invent new error codes to have generic report be
more useful for sysctl, but inevitably lose some nuances anyway
3) a mix where 2) is used for situations where generic report is sufficient
enough, 1) where not

Patch v2 went with option 1), v3 with option 3). I think it's down to
preferences. I would personally go with v2 and message similar to the existing
ones, i.e.:

"Failed to set sysctl parameter '%s': no value given\n"

Also we seem to be silently doing nothing when strlen(val) == 0, i.e.
"hung_task_panic=" was passed. Worth reporting the same error.

But v3 is fine with me as well. The generic error message works. We could just
add "if (!len) return -EINVAL" below the strlen() call.

Also please Cc: stable.


Anyway one way or the other, all I care about is to have a reporting in
place because this shouldn't be a silent failure.




The current v2 is already in the linux-next branch and throws a new 
error: 
https://lore.kernel.org/lkml/cb54e349-7147-0a1f-a349-1e16ba603...@infradead.org/


This bug has been mentioned in the previous discussion and has been 
fixed in the current v3 patch. 
https://lore.kernel.org/linux-fsdevel/20210149.20A58E1@keescook/


What am I supposed to do now?
- Resend V3?
- Rewrite a new fix patch based on the current code of linux-next.
- Develop a new V4 patch: Use V2 to discuss how to use the Patch4 
solution. 
https://lore.kernel.org/linux-fsdevel/bc098af4-c0cd-212e-d09d-46d617d0a...@huawei.com/#t


Thanks
Xiaoming Ni






Re: [PATCH v3] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-11 Thread Xiaoming Ni

On 2021/1/12 12:33, Andrew Morton wrote:

On Tue, 12 Jan 2021 11:31:55 +0800 Xiaoming Ni  wrote:


The process_sysctl_arg() does not check whether val is empty before
  invoking strlen(val). If the command line parameter () is incorrectly
  configured and val is empty, oops is triggered.

--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1770,6 +1770,9 @@ static int process_sysctl_arg(char *param, char *val,
return 0;
}
  
+	if (!val)

+   return -EINVAL;
+


I think v2 (return 0) was preferable.  Because all the other error-out
cases in process_sysctl_arg() also do a `return 0'.


https://lore.kernel.org/lkml/bc098af4-c0cd-212e-d09d-46d617d0a...@huawei.com/

patch4:
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,9 @@ static int process_sysctl_arg(char *param, 
char *val,

loff_t pos = 0;
ssize_t wret;

+   if (!val)
+   return 0;
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
param += sizeof("sysctl") - 1;

Is this the version you're talking about?



If we're going to do a separate "patch: make process_sysctl_arg()
return an errno instead of 0" then fine, we can discuss that.  But it's
conceptually a different work from fixing this situation.
.

However, are the logs generated by process_sysctl_arg() clearer and more 
accurate than parse_args()? Should the logs generated by 
process_sysctl_arg() be deleted?


Thanks
Xiaoming Ni



[PATCH v3] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-11 Thread Xiaoming Ni
The process_sysctl_arg() does not check whether val is empty before
 invoking strlen(val). If the command line parameter () is incorrectly
 configured and val is empty, oops is triggered.

For example:
  "hung_task_panic=1" is incorrectly written as "hung_task_panic", oops is
  triggered. The call stack is as follows:
Kernel command line:  hung_task_panic
..
Call trace:
__pi_strlen+0x10/0x98
parse_args+0x278/0x344
do_sysctl_args+0x8c/0xfc
kernel_init+0x5c/0xf4
ret_from_fork+0x10/0x30

To fix it, check whether "val" is empty when "phram" is a sysctl field.
Error codes are returned in the failure branch, and error logs are
generated by parse_args().

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
 from kernel command line")
Signed-off-by: Xiaoming Ni 

-
v3:
  Return -EINVAL, When phram is the sysctl field and val is empty.

v2: https://lore.kernel.org/lkml/20210108023339.55917-1-nixiaom...@huawei.com/
  Added log output of the failure branch based on the review comments of Kees 
Cook.

v1: https://lore.kernel.org/lkml/20201224074256.117413-1-nixiaom...@huawei.com/

-
---
 fs/proc/proc_sysctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..d493a50058a5 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1770,6 +1770,9 @@ static int process_sysctl_arg(char *param, char *val,
return 0;
}
 
+   if (!val)
+   return -EINVAL;
+
/*
 * To set sysctl options, we use a temporary mount of proc, look up the
 * respective sys/ file and write to it. To avoid mounting it when no
-- 
2.27.0



Re: [PATCH v2] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-10 Thread Xiaoming Ni

On 2021/1/9 9:50, Andrew Morton wrote:

On Fri, 8 Jan 2021 21:10:25 +0100 Michal Hocko  wrote:


Why would that matter? A missing value is clearly a error path and it
should be reported.


This test is in the correct place. I think it's just a question of the
return values.


I was probably not clear. The test for val is at the right place. I
would just expect -EINVAL and have the generic code to report.


It does seem a bit screwy that process_sysctl_arg() returns zero in all
situations (parse_args() is set up to handle an error return from it).
But this patch is consistent with all the other error handling in
process_sysctl_arg().
.




Set the kernel startup parameter to "nosmp nokaslr hung_task_panic"
and test the startup logs of different patches.

patch1:
+++ b/fs/proc/proc_sysctl.c
	@@ -1757,6 +1757,11 @@ static int process_sysctl_arg(char *param, char 
*val,

loff_t pos = 0;
ssize_t wret;

+   if (!val) {
	+   pr_err("Missing param value! Expected 
'%s=...value...'\n", param);

+   return 0;
+   }
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 
0) {
param += sizeof("sysctl") - 1;

sysctl log for patch1:
Missing param value! Expected 'nosmp=...value...'
Missing param value! Expected 'nokaslr=...value...'
Missing param value! Expected 'hung_task_panic=...value...'

patch2:
+++ b/fs/proc/proc_sysctl.c
@@ -1756,6 +1756,8 @@ static int process_sysctl_arg(char *param, char 
*val,
int err;
loff_t pos = 0;
ssize_t wret;
+   if (!val)
+   return -EINVAL;

if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 
0) {
param += sizeof("sysctl") - 1;

sysctl log for patch2:
Setting sysctl args: `' invalid for parameter `nosmp'
Setting sysctl args: `' invalid for parameter `nokaslr'
Setting sysctl args: `' invalid for parameter `hung_task_panic'

patch3:
+++ b/fs/proc/proc_sysctl.c
@@ -1770,6 +1770,9 @@ static int process_sysctl_arg(char *param, char 
*val,
return 0;
}

+   if (!val)
+   return -EINVAL;
+
/*
 * To set sysctl options, we use a temporary mount of 
proc, look up the
 * respective sys/ file and write to it. To avoid 
mounting it when no

sysctl log for patch3:
Setting sysctl args: `' invalid for parameter `hung_task_panic'

patch4:
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,9 @@ static int process_sysctl_arg(char *param, char 
*val,
loff_t pos = 0;
ssize_t wret;

+   if (!val)
+   return 0;
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 
0) {
param += sizeof("sysctl") - 1;

sysctl log for patch3:
no log

When process_sysctl_arg() is called, the param parameter may not be the 
sysctl parameter.


Patch3 or patch4, which is better?

Thanks
Xiaoming Ni



Re: [PATCH v2] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-10 Thread Xiaoming Ni

On 2021/1/9 17:10, Andy Shevchenko wrote:



On Friday, January 8, 2021, Xiaoming Ni <mailto:nixiaom...@huawei.com>> wrote:


The process_sysctl_arg() does not check whether val is empty before
  invoking strlen(val). If the command line parameter () is incorrectly
  configured and val is empty, oops is triggered.

For example, "hung_task_panic=1" is incorrectly written as
"hung_task_panic".

log:


Can you drop redundant (not significant) lines from the log to avoid 
noisy commit message?



ok,
Thank you for your advice.
I will update it in v3 patch.

Thanks

Xiaoming Ni.



Re: [PATCH v2] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-08 Thread Xiaoming Ni

On 2021/1/8 17:21, Michal Hocko wrote:

On Fri 08-01-21 10:33:39, Xiaoming Ni wrote:

The process_sysctl_arg() does not check whether val is empty before
  invoking strlen(val). If the command line parameter () is incorrectly
  configured and val is empty, oops is triggered.

For example, "hung_task_panic=1" is incorrectly written as "hung_task_panic".

log:
Kernel command line:  hung_task_panic

[000n] user address but active_mm is swapper
Internal error: Oops: 9605 [#1] SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.1 #1
Hardware name: linux,dummy-virt (DT)
pstate: 4005 (nZcv daif -PAN -UAO -TCO BTYPE=--)
pc : __pi_strlen+0x10/0x98
lr : process_sysctl_arg+0x1e4/0x2ac
sp : ffc01104bd40
x29: ffc01104bd40 x28: 
x27: ff80c0a4691e x26: ffc0102a7c8c
x25:  x24: ffc01104be80
x23: ff80c22f0b00 x22: ff80c02e28c0
x21: ffc0109f9000 x20: 
x19: ffc0107c08de x18: 0003
x17: ffc01105d000 x16: 0054
x15:  x14: 3030253078413830
x13:  x12: 
x11: 0101010101010101 x10: 0005
x9 : 0003 x8 : ff80c0980c08
x7 :  x6 : 0002
x5 : ff80c0235000 x4 : ff810f7c7ee0
x3 : 043a x2 : 00bdcc4ebacf1a54
x1 :  x0 : 
Call trace:
 __pi_strlen+0x10/0x98
 parse_args+0x278/0x344
 do_sysctl_args+0x8c/0xfc
 kernel_init+0x5c/0xf4
 ret_from_fork+0x10/0x30
Code: b200c3eb 927cec01 f2400c07 54000301 (a8c10c22)

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
  from kernel command line")
Signed-off-by: Xiaoming Ni 


Thanks for catching this!


-
v2:
Added log output of the failure branch based on the review comments of Kees 
Cook.
v1: https://lore.kernel.org/lkml/20201224074256.117413-1-nixiaom...@huawei.com/
-
---
  fs/proc/proc_sysctl.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..dc1a56515e86 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,11 @@ static int process_sysctl_arg(char *param, char *val,
loff_t pos = 0;
ssize_t wret;
  
+	if (!val) {

+   pr_err("Missing param value! Expected '%s=...value...'\n", 
param);
+   return 0;
I may need to move the validation code for val to the end of the 
validation code for param to prevent non-sysctl arguments from 
triggering the current print.

Or delete the print and keep it silent for a little better performance.
Which is better?



+   }


Shouldn't you return an error here? Also my understanding is that
parse_args is responsible for reporting the error.


All exception branches in process_sysctl_arg record logs and return 0.
Do I need to keep the same processing in the new branch?



+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
param += sizeof("sysctl") - 1;
  
--

2.27.0




Thanks
Xiaoming Ni


[PATCH v2] proc_sysctl: fix oops caused by incorrect command parameters.

2021-01-07 Thread Xiaoming Ni
The process_sysctl_arg() does not check whether val is empty before
 invoking strlen(val). If the command line parameter () is incorrectly
 configured and val is empty, oops is triggered.

For example, "hung_task_panic=1" is incorrectly written as "hung_task_panic".

log:
Kernel command line:  hung_task_panic

[000n] user address but active_mm is swapper
Internal error: Oops: 9605 [#1] SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.1 #1
Hardware name: linux,dummy-virt (DT)
pstate: 4005 (nZcv daif -PAN -UAO -TCO BTYPE=--)
pc : __pi_strlen+0x10/0x98
lr : process_sysctl_arg+0x1e4/0x2ac
sp : ffc01104bd40
x29: ffc01104bd40 x28: 
x27: ff80c0a4691e x26: ffc0102a7c8c
x25:  x24: ffc01104be80
x23: ff80c22f0b00 x22: ff80c02e28c0
x21: ffc0109f9000 x20: 
x19: ffc0107c08de x18: 0003
x17: ffc01105d000 x16: 0054
x15:  x14: 3030253078413830
x13:  x12: 
x11: 0101010101010101 x10: 0005
x9 : 0003 x8 : ff80c0980c08
x7 :  x6 : 0002
x5 : ff80c0235000 x4 : ff810f7c7ee0
x3 : 043a x2 : 00bdcc4ebacf1a54
x1 :  x0 : 
Call trace:
 __pi_strlen+0x10/0x98
 parse_args+0x278/0x344
 do_sysctl_args+0x8c/0xfc
 kernel_init+0x5c/0xf4
 ret_from_fork+0x10/0x30
Code: b200c3eb 927cec01 f2400c07 54000301 (a8c10c22)

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
 from kernel command line")
Signed-off-by: Xiaoming Ni 

-
v2:
   Added log output of the failure branch based on the review comments of Kees 
Cook.
v1: https://lore.kernel.org/lkml/20201224074256.117413-1-nixiaom...@huawei.com/
-
---
 fs/proc/proc_sysctl.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..dc1a56515e86 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,11 @@ static int process_sysctl_arg(char *param, char *val,
loff_t pos = 0;
ssize_t wret;
 
+   if (!val) {
+   pr_err("Missing param value! Expected '%s=...value...'\n", 
param);
+   return 0;
+   }
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
param += sizeof("sysctl") - 1;
 
-- 
2.27.0



Re: [PATCH] proc_sysclt: fix oops caused by incorrect command parameters.

2021-01-06 Thread Xiaoming Ni

On 2021/1/7 7:46, Kees Cook wrote:

subject typo: "sysclt" -> "sysctl"

On Thu, Dec 24, 2020 at 03:42:56PM +0800, Xiaoming Ni wrote:

The process_sysctl_arg() does not check whether val is empty before
  invoking strlen(val). If the command line parameter () is incorrectly
  configured and val is empty, oops is triggered.

For example, "hung_task_panic=1" is incorrectly written as "hung_task_panic".

log:
Kernel command line:  hung_task_panic

[000n] user address but active_mm is swapper
Internal error: Oops: 9605 [#1] SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.1 #1
Hardware name: linux,dummy-virt (DT)
pstate: 4005 (nZcv daif -PAN -UAO -TCO BTYPE=--)
pc : __pi_strlen+0x10/0x98
lr : process_sysctl_arg+0x1e4/0x2ac
sp : ffc01104bd40
x29: ffc01104bd40 x28: 
x27: ff80c0a4691e x26: ffc0102a7c8c
x25:  x24: ffc01104be80
x23: ff80c22f0b00 x22: ff80c02e28c0
x21: ffc0109f9000 x20: 
x19: ffc0107c08de x18: 0003
x17: ffc01105d000 x16: 0054
x15:  x14: 3030253078413830
x13:  x12: 
x11: 0101010101010101 x10: 0005
x9 : 0003 x8 : ff80c0980c08
x7 :  x6 : 0002
x5 : ff80c0235000 x4 : ff810f7c7ee0
x3 : 043a x2 : 00bdcc4ebacf1a54
x1 :  x0 : 
Call trace:
 __pi_strlen+0x10/0x98
 parse_args+0x278/0x344
 do_sysctl_args+0x8c/0xfc
 kernel_init+0x5c/0xf4
 ret_from_fork+0x10/0x30
Code: b200c3eb 927cec01 f2400c07 54000301 (a8c10c22)

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
  from kernel command line")
Signed-off-by: Xiaoming Ni 
---
  fs/proc/proc_sysctl.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..4516411a2b44 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,9 @@ static int process_sysctl_arg(char *param, char *val,
loff_t pos = 0;
ssize_t wret;
  
+	if (!val)

+   return 0;
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
param += sizeof("sysctl") - 1;


Otherwise, yeah, this is a good test to add. I would make it more
verbose, though:

if (!val) {
pr_err("Missing param value! Expected '%s=...value...'\n", 
param);
return 0;
}


Yes, it's better to add log output.
Thank you for your review.
Do I need to send V2 patch based on review comments?

Thanks
Xiaoming Ni



ping //Re: [PATCH] proc_sysclt: fix oops caused by incorrect command parameters.

2021-01-03 Thread Xiaoming Ni

ping


On 2020/12/24 15:42, Xiaoming Ni wrote:

The process_sysctl_arg() does not check whether val is empty before
  invoking strlen(val). If the command line parameter () is incorrectly
  configured and val is empty, oops is triggered.

For example, "hung_task_panic=1" is incorrectly written as "hung_task_panic".

log:
Kernel command line:  hung_task_panic

[000n] user address but active_mm is swapper
Internal error: Oops: 9605 [#1] SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.1 #1
Hardware name: linux,dummy-virt (DT)
pstate: 4005 (nZcv daif -PAN -UAO -TCO BTYPE=--)
pc : __pi_strlen+0x10/0x98
lr : process_sysctl_arg+0x1e4/0x2ac
sp : ffc01104bd40
x29: ffc01104bd40 x28: 
x27: ff80c0a4691e x26: ffc0102a7c8c
x25:  x24: ffc01104be80
x23: ff80c22f0b00 x22: ff80c02e28c0
x21: ffc0109f9000 x20: 
x19: ffc0107c08de x18: 0003
x17: ffc01105d000 x16: 0054
x15:  x14: 3030253078413830
x13:  x12: 
x11: 0101010101010101 x10: 0005
x9 : 0003 x8 : ff80c0980c08
x7 :  x6 : 0002
x5 : ff80c0235000 x4 : ff810f7c7ee0
x3 : 043a x2 : 00bdcc4ebacf1a54
x1 :  x0 : 
Call trace:
 __pi_strlen+0x10/0x98
 parse_args+0x278/0x344
 do_sysctl_args+0x8c/0xfc
 kernel_init+0x5c/0xf4
 ret_from_fork+0x10/0x30
Code: b200c3eb 927cec01 f2400c07 54000301 (a8c10c22)

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
  from kernel command line")
Signed-off-by: Xiaoming Ni 
---
  fs/proc/proc_sysctl.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..4516411a2b44 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,9 @@ static int process_sysctl_arg(char *param, char *val,
loff_t pos = 0;
ssize_t wret;
  
+	if (!val)

+   return 0;
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
param += sizeof("sysctl") - 1;
  





[PATCH] proc_sysclt: fix oops caused by incorrect command parameters.

2020-12-23 Thread Xiaoming Ni
The process_sysctl_arg() does not check whether val is empty before
 invoking strlen(val). If the command line parameter () is incorrectly
 configured and val is empty, oops is triggered.

For example, "hung_task_panic=1" is incorrectly written as "hung_task_panic".

log:
Kernel command line:  hung_task_panic

[000n] user address but active_mm is swapper
Internal error: Oops: 9605 [#1] SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.10.1 #1
Hardware name: linux,dummy-virt (DT)
pstate: 4005 (nZcv daif -PAN -UAO -TCO BTYPE=--)
pc : __pi_strlen+0x10/0x98
lr : process_sysctl_arg+0x1e4/0x2ac
sp : ffc01104bd40
x29: ffc01104bd40 x28: 
x27: ff80c0a4691e x26: ffc0102a7c8c
x25:  x24: ffc01104be80
x23: ff80c22f0b00 x22: ff80c02e28c0
x21: ffc0109f9000 x20: 
x19: ffc0107c08de x18: 0003
x17: ffc01105d000 x16: 0054
x15:  x14: 3030253078413830
x13:  x12: 
x11: 0101010101010101 x10: 0005
x9 : 0003 x8 : ff80c0980c08
x7 :  x6 : 0002
x5 : ff80c0235000 x4 : ff810f7c7ee0
x3 : 043a x2 : 00bdcc4ebacf1a54
x1 :  x0 : 
Call trace:
 __pi_strlen+0x10/0x98
 parse_args+0x278/0x344
 do_sysctl_args+0x8c/0xfc
 kernel_init+0x5c/0xf4
 ret_from_fork+0x10/0x30
Code: b200c3eb 927cec01 f2400c07 54000301 (a8c10c22)

Fixes: 3db978d480e2843 ("kernel/sysctl: support setting sysctl parameters
 from kernel command line")
Signed-off-by: Xiaoming Ni 
---
 fs/proc/proc_sysctl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 317899222d7f..4516411a2b44 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1757,6 +1757,9 @@ static int process_sysctl_arg(char *param, char *val,
loff_t pos = 0;
ssize_t wret;
 
+   if (!val)
+   return 0;
+
if (strncmp(param, "sysctl", sizeof("sysctl") - 1) == 0) {
param += sizeof("sysctl") - 1;
 
-- 
2.27.0



Re: [PATCH] powerpc:Don't print raw EIP/LR hex values in dump_stack() and show_regs()

2020-12-22 Thread Xiaoming Ni

On 2020/12/22 1:12, Segher Boessenkool wrote:

On Mon, Dec 21, 2020 at 04:42:23PM +, David Laight wrote:

From: Segher Boessenkool

Sent: 21 December 2020 16:32

On Mon, Dec 21, 2020 at 04:17:21PM +0100, Christophe Leroy wrote:

Le 21/12/2020 à 04:27, Xiaoming Ni a écrit :

Since the commit 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR
infrastructure"), the powerpc system is ready to support KASLR.
To reduces the risk of invalidating address randomization, don't print the
EIP/LR hex values in dump_stack() and show_regs().



I think your change is not enough to hide EIP address, see below a dump
with you patch, you get "Faulting instruction address: 0xc03a0c14"


As far as I can see the patch does nothing to the GPR printout.  Often
GPRs contain code addresses.  As one example, the LR is moved via a GPR
(often GPR0, but not always) for storing on the stack.

So this needs more work.


If the dump_stack() is from an oops you need the real EIP value
on order to stand any chance of making headway.


Or at least the function name + offset, yes.


When the system is healthy, only symbols and offsets are printed,
Output address and symbol + offset when the system is dying
Does this meet both debugging and security requirements?
For example:

+static void __show_regs_ip_lr(const char *flag, unsigned long addr)
+{
+ if (system_going_down()) { /* panic oops reboot */
+ pr_cont("%s["REG"] %pS", flag, addr, (void *)addr);
+ } else {
+ pr_cont("%s%pS", flag, (void *)addr);
+ }
+}
+
 static void __show_regs(struct pt_regs *regs)
 {
int i, trap;

-   printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
-  regs->nip, regs->link, regs->ctr);
+ __show_regs_ip_lr("NIP: ", regs->nip);
+ __show_regs_ip_lr(" LR: ", regs->link);
+ pr_cont(" CTR: "REG"\n", regs->ctr);
printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
   regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR:  "REG" ", regs->msr);





Otherwise you might just as well just print 'borked - tough luck'.


Yes.  ASLR is a house of cards.  But that isn't constructive wrt this
patch :-)


Segher
.



Thanks
Xiaoming Ni


[PATCH] powerpc:Don't print raw EIP/LR hex values in dump_stack() and show_regs()

2020-12-20 Thread Xiaoming Ni
Since the commit 2b0e86cc5de6 ("powerpc/fsl_booke/32: implement KASLR
infrastructure"), the powerpc system is ready to support KASLR.
To reduces the risk of invalidating address randomization, don't print the
EIP/LR hex values in dump_stack() and show_regs().

This patch follows x86 and arm64's lead:
commit a25ffd3a6302a6 ("arm64: traps: Don't print stack or raw
 PC/LR values in backtraces")
commit bb5e5ce545f203 ("x86/dumpstack: Remove kernel text
 addresses from stack dump")

Signed-off-by: Xiaoming Ni 
---
 arch/powerpc/kernel/process.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index a66f435dabbf..913cf1ea702e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1455,8 +1455,8 @@ static void __show_regs(struct pt_regs *regs)
 {
int i, trap;
 
-   printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
-  regs->nip, regs->link, regs->ctr);
+   printk("NIP: %pS LR: %pS CTR: "REG"\n",
+  (void *)regs->nip, (void *)regs->link, regs->ctr);
printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
   regs, regs->trap, print_tainted(), init_utsname()->release);
printk("MSR:  "REG" ", regs->msr);
@@ -1493,8 +1493,8 @@ static void __show_regs(struct pt_regs *regs)
 * above info out without failing
 */
if (IS_ENABLED(CONFIG_KALLSYMS)) {
-   printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
-   printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+   printk("NIP %pS\n", (void *)regs->nip);
+   printk("LR %pS\n", (void *)regs->link);
}
 }
 
@@ -2160,8 +2160,8 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack,
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (!firstframe || ip != lr) {
-   printk("%s["REG"] ["REG"] %pS",
-   loglvl, sp, ip, (void *)ip);
+   printk("%s ["REG"] %pS",
+   loglvl, sp, (void *)ip);
ret_addr = ftrace_graph_ret_addr(current,
_idx, ip, stack);
if (ret_addr != ip)
-- 
2.27.0



[PATCH v2 0/4] panic: Add new API in_panic_state()

2020-12-19 Thread Xiaoming Ni
For some features (such as hang_task, ledtrig-activity, ledtrig-heartbeat)
different processing logics need to be performed based on whether the
current system is in panic state:
1: Register hook for panic_notifier_list.
2. Assign a value to the global variable in the hook function.
3. Determine whether the system is in panic state based on the
  global variable and perform different processing.
Duplicate code snippets exist, and the timing judgment is relatively lag.
Therefore, consider extracting the new API: bool in_panic_state(void).



v2: Rename api to in_panic_state as recommended by Pavel Machek, Tetsuo
 Handa, Randy Dunlap.

v1: https://lore.kernel.org/lkml/20201218114406.61906-1-nixiaom...@huawei.com/
  API name: is_being_panic


Xiaoming Ni (4):
  panic: Add new API in_panic_state()
  hung_task: Replace "did_panic" with in_panic_state()
  leds:trigger:ledtrig-activity Replace "panic_detected" with
in_panic_state()
  leds:trigger:ledtrig-heartbeat: Replace "panic_heartbeats" with
in_panic_state()

 drivers/leds/trigger/ledtrig-activity.c  | 19 +--
 drivers/leds/trigger/ledtrig-heartbeat.c | 19 +--
 include/linux/kernel.h   |  1 +
 kernel/hung_task.c   | 17 +
 kernel/panic.c   |  6 ++
 5 files changed, 10 insertions(+), 52 deletions(-)

-- 
2.27.0



[PATCH v2 4/4] leds:trigger:ledtrig-heartbeat: Replace "panic_heartbeats" with in_panic_state()

2020-12-19 Thread Xiaoming Ni
Replace the global variable "panic_heartbeats" with in_panic_state()

Signed-off-by: Xiaoming Ni 
---
 drivers/leds/trigger/ledtrig-heartbeat.c | 19 +--
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c 
b/drivers/leds/trigger/ledtrig-heartbeat.c
index 36b6709afe9f..f24d64cf0a62 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -19,8 +19,6 @@
 #include 
 #include "../leds.h"
 
-static int panic_heartbeats;
-
 struct heartbeat_trig_data {
struct led_classdev *led_cdev;
unsigned int phase;
@@ -39,7 +37,7 @@ static void led_heartbeat_function(struct timer_list *t)
 
led_cdev = heartbeat_data->led_cdev;
 
-   if (unlikely(panic_heartbeats)) {
+   if (unlikely(in_panic_state())) {
led_set_brightness_nosleep(led_cdev, LED_OFF);
return;
}
@@ -169,28 +167,15 @@ static int heartbeat_reboot_notifier(struct 
notifier_block *nb,
return NOTIFY_DONE;
 }
 
-static int heartbeat_panic_notifier(struct notifier_block *nb,
-unsigned long code, void *unused)
-{
-   panic_heartbeats = 1;
-   return NOTIFY_DONE;
-}
-
 static struct notifier_block heartbeat_reboot_nb = {
.notifier_call = heartbeat_reboot_notifier,
 };
 
-static struct notifier_block heartbeat_panic_nb = {
-   .notifier_call = heartbeat_panic_notifier,
-};
-
 static int __init heartbeat_trig_init(void)
 {
int rc = led_trigger_register(_led_trigger);
 
if (!rc) {
-   atomic_notifier_chain_register(_notifier_list,
-  _panic_nb);
register_reboot_notifier(_reboot_nb);
}
return rc;
@@ -199,8 +184,6 @@ static int __init heartbeat_trig_init(void)
 static void __exit heartbeat_trig_exit(void)
 {
unregister_reboot_notifier(_reboot_nb);
-   atomic_notifier_chain_unregister(_notifier_list,
-_panic_nb);
led_trigger_unregister(_led_trigger);
 }
 
-- 
2.27.0



[PATCH v2 3/4] leds:trigger:ledtrig-activity Replace "panic_detected" with in_panic_state()

2020-12-19 Thread Xiaoming Ni
Replace the global variable "panic_detected" with in_panic_state()

Signed-off-by: Xiaoming Ni 
---
 drivers/leds/trigger/ledtrig-activity.c | 19 +--
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/leds/trigger/ledtrig-activity.c 
b/drivers/leds/trigger/ledtrig-activity.c
index 14ba7faaed9e..9675c9348516 100644
--- a/drivers/leds/trigger/ledtrig-activity.c
+++ b/drivers/leds/trigger/ledtrig-activity.c
@@ -17,8 +17,6 @@
 #include 
 #include "../leds.h"
 
-static int panic_detected;
-
 struct activity_data {
struct timer_list timer;
struct led_classdev *led_cdev;
@@ -47,7 +45,7 @@ static void led_activity_function(struct timer_list *t)
if (test_and_clear_bit(LED_BLINK_BRIGHTNESS_CHANGE, 
_cdev->work_flags))
led_cdev->blink_brightness = led_cdev->new_blink_brightness;
 
-   if (unlikely(panic_detected)) {
+   if (unlikely(in_panic_state())) {
/* full brightness in case of panic */
led_set_brightness_nosleep(led_cdev, 
led_cdev->blink_brightness);
return;
@@ -226,28 +224,15 @@ static int activity_reboot_notifier(struct notifier_block 
*nb,
return NOTIFY_DONE;
 }
 
-static int activity_panic_notifier(struct notifier_block *nb,
-   unsigned long code, void *unused)
-{
-   panic_detected = 1;
-   return NOTIFY_DONE;
-}
-
 static struct notifier_block activity_reboot_nb = {
.notifier_call = activity_reboot_notifier,
 };
 
-static struct notifier_block activity_panic_nb = {
-   .notifier_call = activity_panic_notifier,
-};
-
 static int __init activity_init(void)
 {
int rc = led_trigger_register(_led_trigger);
 
if (!rc) {
-   atomic_notifier_chain_register(_notifier_list,
-  _panic_nb);
register_reboot_notifier(_reboot_nb);
}
return rc;
@@ -256,8 +241,6 @@ static int __init activity_init(void)
 static void __exit activity_exit(void)
 {
unregister_reboot_notifier(_reboot_nb);
-   atomic_notifier_chain_unregister(_notifier_list,
-_panic_nb);
led_trigger_unregister(_led_trigger);
 }
 
-- 
2.27.0



[PATCH v2 2/4] hung_task: Replace "did_panic" with in_panic_state()

2020-12-19 Thread Xiaoming Ni
Replace the global variable "did_panic" with in_panic_state()

Signed-off-by: Xiaoming Ni 
---
 kernel/hung_task.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index bb2e3e15c84c..2747cd6dd35e 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -50,7 +50,6 @@ unsigned long __read_mostly 
sysctl_hung_task_check_interval_secs;
 
 int __read_mostly sysctl_hung_task_warnings = 10;
 
-static int __read_mostly did_panic;
 static bool hung_task_show_lock;
 static bool hung_task_call_panic;
 static bool hung_task_show_all_bt;
@@ -72,18 +71,6 @@ unsigned int __read_mostly 
sysctl_hung_task_all_cpu_backtrace;
 unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
 
-static int
-hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
-   did_panic = 1;
-
-   return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
-   .notifier_call = hung_task_panic,
-};
-
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -223,7 +210,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
 * If the system crashed already then all bets are off,
 * do not report extra hung tasks:
 */
-   if (test_taint(TAINT_DIE) || did_panic)
+   if (test_taint(TAINT_DIE) || unlikely(in_panic_state()))
return;
 
hung_task_show_lock = false;
@@ -347,8 +334,6 @@ static int watchdog(void *dummy)
 
 static int __init hung_task_init(void)
 {
-   atomic_notifier_chain_register(_notifier_list, _block);
-
/* Disable hung task detector on suspend */
pm_notifier(hungtask_pm_notify, 0);
 
-- 
2.27.0



[PATCH v2 1/4] panic: Add new API in_panic_state()

2020-12-19 Thread Xiaoming Ni
For some features (such as hang_task, ledtrig-activity, ledtrig-heartbeat)
different processing logics need to be performed based on whether the
current system is in panic state:
1: Register hook for panic_notifier_list.
2. Assign a value to the global variable in the hook function.
3. Determine whether the system is in panic state based on the
  global variable and perform different processing.
Duplicate code snippets exist, and the timing judgment is relatively lag.
Therefore, consider extracting the new API: bool in_panic_state(void).

Signed-off-by: Xiaoming Ni 
---
 include/linux/kernel.h | 1 +
 kernel/panic.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f7902d8c1048..c9a9078157b6 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -167,6 +167,7 @@ void __might_fault(const char *file, int line);
 static inline void might_fault(void) { }
 #endif
 
+extern bool in_panic_state(void);
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 __printf(1, 2)
diff --git a/kernel/panic.c b/kernel/panic.c
index 332736a72a58..351627883a04 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -125,6 +125,12 @@ void __weak crash_smp_send_stop(void)
 
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
+bool in_panic_state(void)
+{
+   return (atomic_read(_cpu) != PANIC_CPU_INVALID);
+}
+EXPORT_SYMBOL(in_panic_state);
+
 /*
  * A variant of panic() called from NMI context. We return if we've already
  * panicked on this CPU. If another CPU already panicked, loop in
-- 
2.27.0



Re: [PATCH 2/4] hung_task: Replace "did_panic" with is_be_panic()

2020-12-19 Thread Xiaoming Ni

On 2020/12/19 1:06, Randy Dunlap wrote:

On 12/18/20 6:36 AM, Tetsuo Handa wrote:

On 2020/12/18 21:59, Pavel Machek wrote:

On Fri 2020-12-18 19:44:04, Xiaoming Ni wrote:
Plus.. is_being_panic is not really english. "is_paniccing" would be
closer...?


Or in_panic() ?



Yes, or  in_panic_state()



Thank you,
I'll resend the patch later on according to your suggestion.

Thanks
Xiaoming Ni
.


[PATCH 3/4] leds:trigger:ledtrig-activity Replace "panic_detected" with is_be_panic()

2020-12-18 Thread Xiaoming Ni
Replace the global variable "panic_detected" with is_be_panic()

Signed-off-by: Xiaoming Ni 
---
 drivers/leds/trigger/ledtrig-activity.c | 19 +--
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/leds/trigger/ledtrig-activity.c 
b/drivers/leds/trigger/ledtrig-activity.c
index 14ba7faaed9e..bbacb3dbe341 100644
--- a/drivers/leds/trigger/ledtrig-activity.c
+++ b/drivers/leds/trigger/ledtrig-activity.c
@@ -17,8 +17,6 @@
 #include 
 #include "../leds.h"
 
-static int panic_detected;
-
 struct activity_data {
struct timer_list timer;
struct led_classdev *led_cdev;
@@ -47,7 +45,7 @@ static void led_activity_function(struct timer_list *t)
if (test_and_clear_bit(LED_BLINK_BRIGHTNESS_CHANGE, 
_cdev->work_flags))
led_cdev->blink_brightness = led_cdev->new_blink_brightness;
 
-   if (unlikely(panic_detected)) {
+   if (unlikely(is_being_panic())) {
/* full brightness in case of panic */
led_set_brightness_nosleep(led_cdev, 
led_cdev->blink_brightness);
return;
@@ -226,28 +224,15 @@ static int activity_reboot_notifier(struct notifier_block 
*nb,
return NOTIFY_DONE;
 }
 
-static int activity_panic_notifier(struct notifier_block *nb,
-   unsigned long code, void *unused)
-{
-   panic_detected = 1;
-   return NOTIFY_DONE;
-}
-
 static struct notifier_block activity_reboot_nb = {
.notifier_call = activity_reboot_notifier,
 };
 
-static struct notifier_block activity_panic_nb = {
-   .notifier_call = activity_panic_notifier,
-};
-
 static int __init activity_init(void)
 {
int rc = led_trigger_register(_led_trigger);
 
if (!rc) {
-   atomic_notifier_chain_register(_notifier_list,
-  _panic_nb);
register_reboot_notifier(_reboot_nb);
}
return rc;
@@ -256,8 +241,6 @@ static int __init activity_init(void)
 static void __exit activity_exit(void)
 {
unregister_reboot_notifier(_reboot_nb);
-   atomic_notifier_chain_unregister(_notifier_list,
-_panic_nb);
led_trigger_unregister(_led_trigger);
 }
 
-- 
2.27.0



[PATCH 0/4] Add new API is_being_panic()

2020-12-18 Thread Xiaoming Ni
Add is_being_panic() to check whether the system is in panic state.
Used to replace the global variable used to determine the panic status
in other features: hung_task ledtrig-activity ledtrig-heartbeat


Xiaoming Ni (4):
  panic: Add new API is_being_panic()
  hung_task: Replace "did_panic" with is_be_panic()
  leds:trigger:ledtrig-activity Replace "panic_detected" with
is_be_panic()
  leds:trigger:ledtrig-heartbeat: Replace "panic_heartbeats" with
is_be_panic()

 drivers/leds/trigger/ledtrig-activity.c  | 19 +--
 drivers/leds/trigger/ledtrig-heartbeat.c | 19 +--
 include/linux/kernel.h   |  1 +
 kernel/hung_task.c   | 17 +
 kernel/panic.c   |  6 ++
 5 files changed, 10 insertions(+), 52 deletions(-)

-- 
2.27.0



[PATCH 2/4] hung_task: Replace "did_panic" with is_be_panic()

2020-12-18 Thread Xiaoming Ni
Replace the global variable "did_panic" with is_be_panic()

Signed-off-by: Xiaoming Ni 
---
 kernel/hung_task.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index bb2e3e15c84c..3374b993da4c 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -50,7 +50,6 @@ unsigned long __read_mostly 
sysctl_hung_task_check_interval_secs;
 
 int __read_mostly sysctl_hung_task_warnings = 10;
 
-static int __read_mostly did_panic;
 static bool hung_task_show_lock;
 static bool hung_task_call_panic;
 static bool hung_task_show_all_bt;
@@ -72,18 +71,6 @@ unsigned int __read_mostly 
sysctl_hung_task_all_cpu_backtrace;
 unsigned int __read_mostly sysctl_hung_task_panic =
CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
 
-static int
-hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
-   did_panic = 1;
-
-   return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
-   .notifier_call = hung_task_panic,
-};
-
 static void check_hung_task(struct task_struct *t, unsigned long timeout)
 {
unsigned long switch_count = t->nvcsw + t->nivcsw;
@@ -223,7 +210,7 @@ static void check_hung_uninterruptible_tasks(unsigned long 
timeout)
 * If the system crashed already then all bets are off,
 * do not report extra hung tasks:
 */
-   if (test_taint(TAINT_DIE) || did_panic)
+   if (test_taint(TAINT_DIE) || unlikely(is_being_panic()))
return;
 
hung_task_show_lock = false;
@@ -347,8 +334,6 @@ static int watchdog(void *dummy)
 
 static int __init hung_task_init(void)
 {
-   atomic_notifier_chain_register(_notifier_list, _block);
-
/* Disable hung task detector on suspend */
pm_notifier(hungtask_pm_notify, 0);
 
-- 
2.27.0



[PATCH 4/4] leds:trigger:ledtrig-heartbeat: Replace "panic_heartbeats" with is_be_panic()

2020-12-18 Thread Xiaoming Ni
Replace the global variable "panic_heartbeats" with is_be_panic()

Signed-off-by: Xiaoming Ni 
---
 drivers/leds/trigger/ledtrig-heartbeat.c | 19 +--
 1 file changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c 
b/drivers/leds/trigger/ledtrig-heartbeat.c
index 36b6709afe9f..3c34d49f0ed8 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -19,8 +19,6 @@
 #include 
 #include "../leds.h"
 
-static int panic_heartbeats;
-
 struct heartbeat_trig_data {
struct led_classdev *led_cdev;
unsigned int phase;
@@ -39,7 +37,7 @@ static void led_heartbeat_function(struct timer_list *t)
 
led_cdev = heartbeat_data->led_cdev;
 
-   if (unlikely(panic_heartbeats)) {
+   if (unlikely(is_being_panic())) {
led_set_brightness_nosleep(led_cdev, LED_OFF);
return;
}
@@ -169,28 +167,15 @@ static int heartbeat_reboot_notifier(struct 
notifier_block *nb,
return NOTIFY_DONE;
 }
 
-static int heartbeat_panic_notifier(struct notifier_block *nb,
-unsigned long code, void *unused)
-{
-   panic_heartbeats = 1;
-   return NOTIFY_DONE;
-}
-
 static struct notifier_block heartbeat_reboot_nb = {
.notifier_call = heartbeat_reboot_notifier,
 };
 
-static struct notifier_block heartbeat_panic_nb = {
-   .notifier_call = heartbeat_panic_notifier,
-};
-
 static int __init heartbeat_trig_init(void)
 {
int rc = led_trigger_register(_led_trigger);
 
if (!rc) {
-   atomic_notifier_chain_register(_notifier_list,
-  _panic_nb);
register_reboot_notifier(_reboot_nb);
}
return rc;
@@ -199,8 +184,6 @@ static int __init heartbeat_trig_init(void)
 static void __exit heartbeat_trig_exit(void)
 {
unregister_reboot_notifier(_reboot_nb);
-   atomic_notifier_chain_unregister(_notifier_list,
-_panic_nb);
led_trigger_unregister(_led_trigger);
 }
 
-- 
2.27.0



[PATCH 1/4] panic: Add new API is_being_panic()

2020-12-18 Thread Xiaoming Ni
Add is_being_panic() to check whether the system is in panic state.
Used to replace the global variable used to determine the panic status
in other features: hung_task ledtrig-activity ledtrig-heartbeat

Signed-off-by: Xiaoming Ni 
---
 include/linux/kernel.h | 1 +
 kernel/panic.c | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f7902d8c1048..3d6f344771c1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -167,6 +167,7 @@ void __might_fault(const char *file, int line);
 static inline void might_fault(void) { }
 #endif
 
+extern bool is_being_panic(void);
 extern struct atomic_notifier_head panic_notifier_list;
 extern long (*panic_blink)(int state);
 __printf(1, 2)
diff --git a/kernel/panic.c b/kernel/panic.c
index 332736a72a58..3b6a487702b0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -125,6 +125,12 @@ void __weak crash_smp_send_stop(void)
 
 atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID);
 
+bool is_being_panic(void)
+{
+   return (atomic_read(_cpu) != PANIC_CPU_INVALID);
+}
+EXPORT_SYMBOL(is_being_panic);
+
 /*
  * A variant of panic() called from NMI context. We return if we've already
  * panicked on this CPU. If another CPU already panicked, loop in
-- 
2.27.0



[PATCH] dma/qcom/gpi: Fixes a format mismatch

2020-12-18 Thread Xiaoming Ni
drivers/dma/qcom/gpi.c:1419:3: warning: format '%lu' expects argument of
 type 'long unsigned int', but argument 8 has type 'size_t {aka unsigned
 int}' [-Wformat=]
drivers/dma/qcom/gpi.c:1427:31: warning: format '%lu' expects argument of
 type 'long unsigned int', but argument 3 has type 'size_t {aka unsigned
 int}' [-Wformat=]
drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of
 type 'long long unsigned int', but argument 4 has type 'dma_addr_t {aka
 unsigned int}' [-Wformat=]
drivers/dma/qcom/gpi.c:1447:3: warning: format '%llx' expects argument of
 type 'long long unsigned int', but argument 5 has type 'phys_addr_t {aka
 unsigned int}' [-Wformat=]

Signed-off-by: Xiaoming Ni 
---
 drivers/dma/qcom/gpi.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index d2334f535de2..556c070a514c 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -1416,7 +1416,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 
elements,
len = 1 << bit;
ring->alloc_size = (len + (len - 1));
dev_dbg(gpii->gpi_dev->dev,
-   "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%lu\n",
+   "#el:%u el_size:%u len:%u actual_len:%llu alloc_size:%zu\n",
  elements, el_size, (elements * el_size), len,
  ring->alloc_size);
 
@@ -1424,7 +1424,7 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 
elements,
   ring->alloc_size,
   >dma_handle, GFP_KERNEL);
if (!ring->pre_aligned) {
-   dev_err(gpii->gpi_dev->dev, "could not alloc size:%lu mem for 
ring\n",
+   dev_err(gpii->gpi_dev->dev, "could not alloc size:%zu mem for 
ring\n",
ring->alloc_size);
return -ENOMEM;
}
@@ -1444,8 +1444,8 @@ static int gpi_alloc_ring(struct gpi_ring *ring, u32 
elements,
smp_wmb();
 
dev_dbg(gpii->gpi_dev->dev,
-   "phy_pre:0x%0llx phy_alig:0x%0llx len:%u el_size:%u 
elements:%u\n",
-   ring->dma_handle, ring->phys_addr, ring->len,
+   "phy_pre:%pad phy_alig:%pa len:%u el_size:%u elements:%u\n",
+   >dma_handle, >phys_addr, ring->len,
ring->el_size, ring->elements);
 
return 0;
-- 
2.27.0



Re: ping // [PATCH] mtd:cfi_cmdset_0002: fix atomic sleep bug when CONFIG_MTD_XIP=y

2020-12-07 Thread Xiaoming Ni

On 2020/12/8 2:59, Vignesh Raghavendra wrote:

Hi Xiaoming,

On 12/7/20 4:23 PM, Miquel Raynal wrote:

Hi Xiaoming,

Xiaoming Ni  wrote on Mon, 7 Dec 2020 18:48:33
+0800:


ping

On 2020/11/27 21:07, Xiaoming Ni wrote:

When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
To avoid sleep in interrupt context, we need to call local_irq_enable()
before schedule().

The problem call stack is as follows:
bug1:
do_write_oneword_retry()
xip_disable()
local_irq_disable()
do_write_oneword_once()
schedule()
bug2:
do_write_buffer()
xip_disable()
local_irq_disable()
do_write_buffer_wait()
schedule()
bug3:
do_erase_chip()
xip_disable()
local_irq_disable()
schedule()
bug4:
do_erase_oneblock()
xip_disable()
local_irq_disable()
schedule()

Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
Cc: sta...@vger.kernel.org # v2.6.13
Signed-off-by: Xiaoming Ni 
---
   drivers/mtd/chips/cfi_cmdset_0002.c | 16 
   1 file changed, 16 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c 
b/drivers/mtd/chips/cfi_cmdset_0002.c
index a1f3e1031c3d..12c3776f093a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct 
map_info *map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();


The fix really seems strange to me. I will let Vignesh decide but I
think we should consider updating/fixing xip_disable instead.


Agree with Miquel. Have you done any testing
or is this purely based on code inspection?


I don't have the corresponding device test environment.
I found the problem through code review.



What about comment before xip_disable() function:

/*
  * No interrupt what so ever can be serviced while the flash isn't in array
  * mode.  This is ensured by the xip_disable() and xip_enable() functions
  * enclosing any code path where the flash is known not to be in array mode.
  * And within a XIP disabled code path, only functions marked with __xipram
  * may be called and nothing else (it's a good thing to inspect generated
  * assembly to make sure inline functions were actually inlined and that gcc
  * didn't emit calls to its own support functions). Also configuring MTD CFI
  * support to a single buswidth and a single interleave is also recommended.
  */

So, I don't think the fix is as simple as this patch.


+xip_enable();
 schedule();
+xip_disable();

Do I need to change it to this?




Regards
Vignesh
.



Thanks
Xiaoming Ni



ping // [PATCH] mtd:cfi_cmdset_0002: fix atomic sleep bug when CONFIG_MTD_XIP=y

2020-12-07 Thread Xiaoming Ni

ping

On 2020/11/27 21:07, Xiaoming Ni wrote:

When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
To avoid sleep in interrupt context, we need to call local_irq_enable()
before schedule().

The problem call stack is as follows:
bug1:
do_write_oneword_retry()
xip_disable()
local_irq_disable()
do_write_oneword_once()
schedule()
bug2:
do_write_buffer()
xip_disable()
local_irq_disable()
do_write_buffer_wait()
schedule()
bug3:
do_erase_chip()
xip_disable()
local_irq_disable()
schedule()
bug4:
do_erase_oneblock()
xip_disable()
local_irq_disable()
schedule()

Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
Cc: sta...@vger.kernel.org # v2.6.13
Signed-off-by: Xiaoming Ni 
---
  drivers/mtd/chips/cfi_cmdset_0002.c | 16 
  1 file changed, 16 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c 
b/drivers/mtd/chips/cfi_cmdset_0002.c
index a1f3e1031c3d..12c3776f093a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct 
map_info *map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
timeo = jiffies + (HZ / 2); /* FIXME */
mutex_lock(>mutex);
@@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info 
*map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
timeo = jiffies + (HZ / 2); /* FIXME */
mutex_lock(>mutex);
@@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, 
struct flchip *chip)
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
mutex_lock(>mutex);
continue;
@@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info 
*map, struct flchip *chip,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
mutex_lock(>mutex);
continue;





[PATCH] mtd:cfi_cmdset_0002: fix atomic sleep bug when CONFIG_MTD_XIP=y

2020-11-27 Thread Xiaoming Ni
When CONFIG_MTD_XIP=y, local_irq_disable() is called in xip_disable().
To avoid sleep in interrupt context, we need to call local_irq_enable()
before schedule().

The problem call stack is as follows:
bug1:
do_write_oneword_retry()
xip_disable()
local_irq_disable()
do_write_oneword_once()
schedule()
bug2:
do_write_buffer()
xip_disable()
local_irq_disable()
do_write_buffer_wait()
schedule()
bug3:
do_erase_chip()
xip_disable()
local_irq_disable()
schedule()
bug4:
do_erase_oneblock()
xip_disable()
local_irq_disable()
schedule()

Fixes: 02b15e343aee ("[MTD] XIP for AMD CFI flash.")
Cc: sta...@vger.kernel.org # v2.6.13
Signed-off-by: Xiaoming Ni 
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c 
b/drivers/mtd/chips/cfi_cmdset_0002.c
index a1f3e1031c3d..12c3776f093a 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -1682,7 +1682,11 @@ static int __xipram do_write_oneword_once(struct 
map_info *map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
timeo = jiffies + (HZ / 2); /* FIXME */
mutex_lock(>mutex);
@@ -1962,7 +1966,11 @@ static int __xipram do_write_buffer_wait(struct map_info 
*map,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
timeo = jiffies + (HZ / 2); /* FIXME */
mutex_lock(>mutex);
@@ -2461,7 +2469,11 @@ static int __xipram do_erase_chip(struct map_info *map, 
struct flchip *chip)
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
mutex_lock(>mutex);
continue;
@@ -2560,7 +2572,11 @@ static int __xipram do_erase_oneblock(struct map_info 
*map, struct flchip *chip,
set_current_state(TASK_UNINTERRUPTIBLE);
add_wait_queue(>wq, );
mutex_unlock(>mutex);
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_enable();
schedule();
+   if (IS_ENABLED(CONFIG_MTD_XIP))
+   local_irq_disable();
remove_wait_queue(>wq, );
mutex_lock(>mutex);
continue;
-- 
2.27.0



ping //Re: [PATCH v2] arm:traps: Don't print stack or raw PC/LR hex values in backtraces

2020-10-26 Thread Xiaoming Ni

ping

On 2020/10/16 10:31, Xiaoming Ni wrote:

Printing raw pointer values in backtraces has potential security
implications and are of questionable value anyway.

This patch follows x86 and arm64's lead and removes the "Exception stack:"
dump from kernel backtraces:
commit a25ffd3a6302a6 ("arm64: traps: Don't print stack or raw
 PC/LR values in backtraces")
commit bb5e5ce545f203 ("x86/dumpstack: Remove kernel text
 addresses from stack dump")

Signed-off-by: Xiaoming Ni 

---
v2:
  Delete [] from the stack according to the email discussion
  in patch V1, Other information processing will be discussed in subsequent
  patches.

v1: https://lore.kernel.org/lkml/20201009075957.110017-1-nixiaom...@huawei.com/
   1. Don't print stack or raw PC/LR hex values in backtraces
   2. Don't print stack mem in backtraces
   3. if (!panic_on_oops), Don't print stack mem in __die()
---
  arch/arm/kernel/process.c | 3 +--
  arch/arm/kernel/traps.c   | 4 ++--
  2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 8e6ace03e960..71c9e5597d39 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -121,8 +121,7 @@ void __show_regs(struct pt_regs *regs)
  
  	printk("PC is at %pS\n", (void *)instruction_pointer(regs));

printk("LR is at %pS\n", (void *)regs->ARM_lr);
-   printk("pc : [<%08lx>]lr : [<%08lx>]psr: %08lx\n",
-  regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr);
+   printk("psr: %08lx\n", regs->ARM_cpsr);
printk("sp : %08lx  ip : %08lx  fp : %08lx\n",
   regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 17d5a785df28..911bbf164875 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -68,8 +68,8 @@ void dump_backtrace_entry(unsigned long where, unsigned long 
from,
unsigned long end = frame + 4 + sizeof(struct pt_regs);
  
  #ifdef CONFIG_KALLSYMS

-   printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n",
-   loglvl, where, (void *)where, from, (void *)from);
+   printk("%s (%ps) from (%pS)\n",
+   loglvl, (void *)where, (void *)from);
  #else
printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n",
loglvl, where, from);





[PATCH] arm:traps:Don't dump the memory in non-system reset scenarios

2020-10-26 Thread Xiaoming Ni
Do not dump the memory in non-system reset scenarios to prevent virtual
 address information leakage.

This patch follows x86 and arm64's lead and removes the "Exception stack:"
dump from kernel backtraces:
commit a25ffd3a6302a6 ("arm64: traps: Don't print stack or raw
 PC/LR values in backtraces")
commit 0ee1dd9f5e7eae ("x86/dumpstack: Remove raw stack dump")

Signed-off-by: Xiaoming Ni 
---
 arch/arm/kernel/traps.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 911bbf164875..34e268378972 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -60,13 +60,9 @@ static int __init user_debug_setup(char *str)
 __setup("user_debug=", user_debug_setup);
 #endif
 
-static void dump_mem(const char *, const char *, unsigned long, unsigned long);
-
 void dump_backtrace_entry(unsigned long where, unsigned long from,
  unsigned long frame, const char *loglvl)
 {
-   unsigned long end = frame + 4 + sizeof(struct pt_regs);
-
 #ifdef CONFIG_KALLSYMS
printk("%s (%ps) from (%pS)\n",
loglvl, (void *)where, (void *)from);
@@ -74,9 +70,6 @@ void dump_backtrace_entry(unsigned long where, unsigned long 
from,
printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n",
loglvl, where, from);
 #endif
-
-   if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE))
-   dump_mem(loglvl, "Exception stack", frame + 4, end);
 }
 
 void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl)
@@ -125,6 +118,12 @@ static void dump_mem(const char *lvl, const char *str, 
unsigned long bottom,
mm_segment_t fs;
int i;
 
+   /*
+* To prevent virtual address information leakage, memory
+* information cannot be printed in non-reset scenarios.
+*/
+   if (panic_on_oops == 0)
+   return;
/*
 * We need to switch to kernel mode so that we can use __get_user
 * to safely read from kernel space.  Note that we now dump the
-- 
2.27.0



[PATCH v2] arm:traps: Don't print stack or raw PC/LR hex values in backtraces

2020-10-15 Thread Xiaoming Ni
Printing raw pointer values in backtraces has potential security
implications and are of questionable value anyway.

This patch follows x86 and arm64's lead and removes the "Exception stack:"
dump from kernel backtraces:
commit a25ffd3a6302a6 ("arm64: traps: Don't print stack or raw
 PC/LR values in backtraces")
commit bb5e5ce545f203 ("x86/dumpstack: Remove kernel text
 addresses from stack dump")

Signed-off-by: Xiaoming Ni 

---
v2:
 Delete [] from the stack according to the email discussion
 in patch V1, Other information processing will be discussed in subsequent
 patches.

v1: https://lore.kernel.org/lkml/20201009075957.110017-1-nixiaom...@huawei.com/
  1. Don't print stack or raw PC/LR hex values in backtraces
  2. Don't print stack mem in backtraces
  3. if (!panic_on_oops), Don't print stack mem in __die()
---
 arch/arm/kernel/process.c | 3 +--
 arch/arm/kernel/traps.c   | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 8e6ace03e960..71c9e5597d39 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -121,8 +121,7 @@ void __show_regs(struct pt_regs *regs)
 
printk("PC is at %pS\n", (void *)instruction_pointer(regs));
printk("LR is at %pS\n", (void *)regs->ARM_lr);
-   printk("pc : [<%08lx>]lr : [<%08lx>]psr: %08lx\n",
-  regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr);
+   printk("psr: %08lx\n", regs->ARM_cpsr);
printk("sp : %08lx  ip : %08lx  fp : %08lx\n",
   regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 17d5a785df28..911bbf164875 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -68,8 +68,8 @@ void dump_backtrace_entry(unsigned long where, unsigned long 
from,
unsigned long end = frame + 4 + sizeof(struct pt_regs);
 
 #ifdef CONFIG_KALLSYMS
-   printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n",
-   loglvl, where, (void *)where, from, (void *)from);
+   printk("%s (%ps) from (%pS)\n",
+   loglvl, (void *)where, (void *)from);
 #else
printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n",
loglvl, where, from);
-- 
2.27.0



Re: [PATCH] arm:traps: Don't print stack or raw PC/LR values in backtraces

2020-10-11 Thread Xiaoming Ni

On 2020/10/12 5:32, Russell King - ARM Linux admin wrote:

On Fri, Oct 09, 2020 at 10:18:20AM +0200, Sebastian Andrzej Siewior wrote:

On 2020-10-09 09:08:50 [+0100], Russell King - ARM Linux admin wrote:

I am really not happy about this - it hurts at least my ability to
debug the kernel when people post oopses to the mailing list. If
people wish to make the kernel harder to debug, and are prepared
to be told "your kernel is undebuggable" then this patch is fine.


I haven't look at the patch but don't they keep/add the representation:
   PC: symbol+offset/size
   LR: symbol+offset/size

? This is needed at very least as a replacement for the missing address.


I don't have a problem getting rid of the hex numbers in [< >]
although then I will need to convert the symbol back to an address
using the vmlinux to then calculate its address to then find the
appropriate place in the objdump output - because objdump does
_not_ use the symbol+offset annotation.  Yes, I really do look up
the numeric addresses in the objdump output to then read the
disassembly.

$ objdump -d vmlinux | less

and then search for the address is the fastest and most convenient
way for me rather than having to deal with some random script.

Maybe I'm just antequated about how I do my debugging, but this
seems to me to be the most efficient and fastest way.

The loading address of the kernel module is not fixed, so symbol+offset 
is more useful than a hexadecimal address when the call stack contains 
kernel module symbols.
Delete the PC/LR address and retain the sysbol+offset. The kernel can 
still be debugged.


Thanks
Xiaoming Ni


Re: [PATCH] arm:traps: Don't print stack or raw PC/LR values in backtraces

2020-10-09 Thread Xiaoming Ni

On 2020/10/9 16:18, Sebastian Andrzej Siewior wrote:

On 2020-10-09 09:08:50 [+0100], Russell King - ARM Linux admin wrote:

I am really not happy about this - it hurts at least my ability to
debug the kernel when people post oopses to the mailing list. If


In the reset scenario, dump_mem is retained:

@@ -125,6 +118,9 @@ static void dump_mem(const char *lvl, const char 
*str, unsigned long bottom,

mm_segment_t fs;
int i;

+ /* Do not print virtual addresses in non-reset scenarios */
+ if (!panic_on_oops)
+ return;



people wish to make the kernel harder to debug, and are prepared
to be told "your kernel is undebuggable" then this patch is fine.


I haven't look at the patch but don't they keep/add the representation:
   PC: symbol+offset/size
   LR: symbol+offset/size

? This is needed at very least as a replacement for the missing address.


Yes, only %08lx was deleted, but %ps is still retained.

-   printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n",
-   loglvl, where, (void *)where, from, (void *)from);
+   printk("%s (%ps) from (%pS)\n",
+   loglvl, (void *)where, (void *)from);

Thanks
Xiaoming Ni



[PATCH] efi:mokvar-table: fix build error

2020-10-09 Thread Xiaoming Ni
drivers/firmware/efi/mokvar-table.c:139:5: error: implicit declaration of
 function 'early_memunmap'; did you mean 'devm_memunmap'?
 [-Werror=implicit-function-declaration]
drivers/firmware/efi/mokvar-table.c:148:9: error: implicit declaration of
 function 'early_memremap'; did you mean 'devm_memremap'?
 [-Werror=implicit-function-declaration]

add #include  to fix this build error

Signed-off-by: Xiaoming Ni 
---
 drivers/firmware/efi/mokvar-table.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/efi/mokvar-table.c 
b/drivers/firmware/efi/mokvar-table.c
index 72a9e1736fef..c23d3686f1c7 100644
--- a/drivers/firmware/efi/mokvar-table.c
+++ b/drivers/firmware/efi/mokvar-table.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * The LINUX_EFI_MOK_VARIABLE_TABLE_GUID config table is a packed
-- 
2.27.0



[PATCH] arm:traps: Don't print stack or raw PC/LR values in backtraces

2020-10-09 Thread Xiaoming Ni
Printing raw pointer values in backtraces has potential security
implications and are of questionable value anyway.

This patch follows x86 and arm64's lead and removes the "Exception stack:"
dump from kernel backtraces:
commit a25ffd3a6302a6 ("arm64: traps: Don't print stack or raw
 PC/LR values in backtraces")
commit 0ee1dd9f5e7eae ("x86/dumpstack: Remove raw stack dump")
commit bb5e5ce545f203 ("x86/dumpstack: Remove kernel text
 addresses from stack dump")

Signed-off-by: Xiaoming Ni 
---
 arch/arm/kernel/process.c |  3 +--
 arch/arm/kernel/traps.c   | 12 +---
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 8e6ace03e960..71c9e5597d39 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -121,8 +121,7 @@ void __show_regs(struct pt_regs *regs)
 
printk("PC is at %pS\n", (void *)instruction_pointer(regs));
printk("LR is at %pS\n", (void *)regs->ARM_lr);
-   printk("pc : [<%08lx>]lr : [<%08lx>]psr: %08lx\n",
-  regs->ARM_pc, regs->ARM_lr, regs->ARM_cpsr);
+   printk("psr: %08lx\n", regs->ARM_cpsr);
printk("sp : %08lx  ip : %08lx  fp : %08lx\n",
   regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
printk("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 17d5a785df28..b0b188e01070 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -60,23 +60,18 @@ static int __init user_debug_setup(char *str)
 __setup("user_debug=", user_debug_setup);
 #endif
 
-static void dump_mem(const char *, const char *, unsigned long, unsigned long);
-
 void dump_backtrace_entry(unsigned long where, unsigned long from,
  unsigned long frame, const char *loglvl)
 {
unsigned long end = frame + 4 + sizeof(struct pt_regs);
 
 #ifdef CONFIG_KALLSYMS
-   printk("%s[<%08lx>] (%ps) from [<%08lx>] (%pS)\n",
-   loglvl, where, (void *)where, from, (void *)from);
+   printk("%s (%ps) from (%pS)\n",
+   loglvl, (void *)where, (void *)from);
 #else
printk("%sFunction entered at [<%08lx>] from [<%08lx>]\n",
loglvl, where, from);
 #endif
-
-   if (in_entry_text(from) && end <= ALIGN(frame, THREAD_SIZE))
-   dump_mem(loglvl, "Exception stack", frame + 4, end);
 }
 
 void dump_backtrace_stm(u32 *stack, u32 instruction, const char *loglvl)
@@ -125,6 +120,9 @@ static void dump_mem(const char *lvl, const char *str, 
unsigned long bottom,
mm_segment_t fs;
int i;
 
+   /* Do not print virtual addresses in non-reset scenarios */
+   if (!panic_on_oops)
+   return;
/*
 * We need to switch to kernel mode so that we can use __get_user
 * to safely read from kernel space.  Note that we now dump the
-- 
2.27.0



Re: Question: Why is there no notification when a file is opened using filp_open()?

2020-09-09 Thread Xiaoming Ni

On 2020/9/9 11:44, Amir Goldstein wrote:

On Tue, Sep 8, 2020 at 8:19 PM Matthew Wilcox  wrote:


On Tue, Sep 08, 2020 at 04:18:29PM +0300, Amir Goldstein wrote:

On Tue, Sep 8, 2020 at 3:53 PM Xiaoming Ni  wrote:

For example, in fs/coredump.c, do_coredump() calls filp_open() to
generate core files.
In this scenario, the fsnotify_open() notification is missing.


I am not convinced that we should generate an event.
You will have to explain in what is the real world use case that requires this
event to be generated.


Take the typical usage for fsnotify of a graphical file manager.
It would be nice if the file manager showed a corefile as soon as it
appeared in a directory rather than waiting until some other operation
in that directory caused those directory contents to be refreshed.


fsnotify_open() is not the correct notification for file managers IMO.
fsnotify_create() is and it will be called in this case.

If the reason you are interested in open events is because you want
to monitor the entire filesystem then welcome to the future -
FAN_CREATE is supported since kernel v5.1.

Is there another real life case you have in mind where you think users
should be able to get an open fd for a file that the kernel has opened?
Because that is what FAN_OPEN will do.



There are also cases where file is opened in read-only mode using 
filp_open().


case1: nfsd4_init_recdir() call filp_open()
filp_open()
nfsd4_init_recdir() fs/nfsd/nfs4recover.c#L543

L70: static char user_recovery_dirname[PATH_MAX] = 
"/var/lib/nfs/v4recovery";
L543: nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | 
O_DIRECTORY, 0);



case2: ima_read_policy()
filp_open()
kernel_read_file_from_path()  fs/exec.c#L1004
ima_read_policy()  security/integrity/ima/ima_fs.c#L286
ima_write_policy() security/integrity/ima/ima_fs.c#L335
ima_measure_policy_ops   security/integrity/ima/ima_fs.c#L443
sys_write()

case3: use do_file_open_root() to open file
do_file_open_root()
file_open_root()   fs/open.c#L1159
kernel_read_file_from_path_initns()  fs/exec.c#L1029
fw_get_filesystem_firmware()  drivers/base/firmware_loader/main.c#L498

Do we need to add fsnotify_open() in these scenarios?

Thanks
Xiaoming Ni


Re: Question: Why is there no notification when a file is opened using filp_open()?

2020-09-08 Thread Xiaoming Ni

On 2020/9/8 18:06, Amir Goldstein wrote:

On Tue, Sep 8, 2020 at 11:02 AM Xiaoming Ni  wrote:


The file opening action on the system may be from user-mode sys_open()
or kernel-mode filp_open().
Currently, fsnotify_open() is invoked in do_sys_openat2().
But filp_open() is not notified. Why? Is this an omission?

Do we need to call fsnotify_open() in filp_open() or  do_filp_open() to
ensure that both user-mode and kernel-mode file opening operations can
be notified?



Do you have a specific use case of kernel filp_open() in mind?



For example, in fs/coredump.c, do_coredump() calls filp_open() to 
generate core files.

In this scenario, the fsnotify_open() notification is missing.

Thanks
Xiaoming Ni



Question: Why is there no notification when a file is opened using filp_open()?

2020-09-08 Thread Xiaoming Ni
The file opening action on the system may be from user-mode sys_open() 
or kernel-mode filp_open().

Currently, fsnotify_open() is invoked in do_sys_openat2().
But filp_open() is not notified. Why? Is this an omission?

Do we need to call fsnotify_open() in filp_open() or  do_filp_open() to 
ensure that both user-mode and kernel-mode file opening operations can 
be notified?


Thanks
Xiaoming Ni


ping//Re: [PATCH] security: fix some spelling mistakes in the comments by codespell

2020-08-30 Thread Xiaoming Ni

ping

On 2020/8/22 11:05, Xiaoming Ni wrote:

ecurity/commoncap.c: capabilties ==> capabilities
security/lsm_audit.c: programers ==> programmers
security/tomoyo/audit.c: stuct ==> struct
security/tomoyo/common.c: Poiter ==> Pointer
security/smack/smack_lsm.c: overriden ==> overridden overridden
security/smack/smackfs.c: overriden ==> overridden
security/integrity/ima/ima_template_lib.c: algoritm ==> algorithm

Signed-off-by: Xiaoming Ni 
---
  security/commoncap.c  | 2 +-
  security/integrity/ima/ima_template_lib.c | 2 +-
  security/lsm_audit.c  | 2 +-
  security/smack/smack_lsm.c| 2 +-
  security/smack/smackfs.c  | 2 +-
  security/tomoyo/audit.c   | 2 +-
  security/tomoyo/common.c  | 2 +-
  7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/security/commoncap.c b/security/commoncap.c
index 59bf3c1674c8..2c3a0f1b6876 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1046,7 +1046,7 @@ int cap_task_fix_setuid(struct cred *new, const struct 
cred *old, int flags)
break;
  
  	case LSM_SETID_FS:

-   /* juggle the capabilties to follow FSUID changes, unless
+   /* juggle the capabilities to follow FSUID changes, unless
 * otherwise suppressed
 *
 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
diff --git a/security/integrity/ima/ima_template_lib.c 
b/security/integrity/ima/ima_template_lib.c
index c022ee9e2a4e..9513564ee0b2 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -231,7 +231,7 @@ static int ima_eventdigest_init_common(const u8 *digest, 
u32 digestsize,
 * digest formats:
 *  - DATA_FMT_DIGEST: digest
 *  - DATA_FMT_DIGEST_WITH_ALGO: [] + ':' + '\0' + digest,
-*where  is provided if the hash algoritm is not
+*where  is provided if the hash algorithm is not
 *SHA1 or MD5
 */
u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 53d0d183db8f..a0ff2e441069 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -212,7 +212,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
char comm[sizeof(current->comm)];
  
  	/*

-* To keep stack sizes in check force programers to notice if they
+* To keep stack sizes in check force programmers to notice if they
 * start making this union too large!  See struct lsm_network_audit
 * as an example of how to deal with large data.
 */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8c0893eb5aa8..960d4641239c 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1784,7 +1784,7 @@ static int smack_file_send_sigiotask(struct task_struct 
*tsk,
 */
file = container_of(fown, struct file, f_owner);
  
-	/* we don't log here as rc can be overriden */

+   /* we don't log here as rc can be overridden */
blob = smack_file(file);
skp = *blob;
rc = smk_access(skp, tkp, MAY_DELIVER, NULL);
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 9c4308077574..5c581ec814ac 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -111,7 +111,7 @@ struct smack_known *smack_syslog_label;
  /*
   * Ptrace current rule
   * SMACK_PTRACE_DEFAULTregular smack ptrace rules (/proc based)
- * SMACK_PTRACE_EXACT  labels must match, but can be overriden with
+ * SMACK_PTRACE_EXACT  labels must match, but can be overridden with
   *   CAP_SYS_PTRACE
   * SMACK_PTRACE_DRACONIAN  lables must match, CAP_SYS_PTRACE has no effect
   */
diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
index 3c96e8402e94..b51bad121c11 100644
--- a/security/tomoyo/audit.c
+++ b/security/tomoyo/audit.c
@@ -311,7 +311,7 @@ static LIST_HEAD(tomoyo_log);
  /* Lock for "struct list_head tomoyo_log". */
  static DEFINE_SPINLOCK(tomoyo_log_lock);
  
-/* Length of "stuct list_head tomoyo_log". */

+/* Length of "struct list_head tomoyo_log". */
  static unsigned int tomoyo_log_count;
  
  /**

diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 4bee32bfe16d..38bdc0ffc312 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2608,7 +2608,7 @@ ssize_t tomoyo_read_control(struct tomoyo_io_buffer 
*head, char __user *buffer,
  /**
   * tomoyo_parse_policy - Parse a policy line.
   *
- * @head: Poiter to "struct tomoyo_io_buffer".
+ * @head: Pointer to "struct tomoyo_io_buffer".
   * @line: Line to parse.
   *
   * Returns 0 on success, negative value otherwise.






[PATCH] arm64: fix some spelling mistakes in the comments by codespell

2020-08-27 Thread Xiaoming Ni
arch/arm64/include/asm/cpu_ops.h:24: necesary ==> necessary
arch/arm64/include/asm/kvm_arm.h:69: maintainance ==> maintenance
arch/arm64/include/asm/cpufeature.h:361: capabilties ==> capabilities
arch/arm64/kernel/perf_regs.c:19: compatability ==> compatibility
arch/arm64/kernel/smp_spin_table.c:86: endianess ==> endianness
arch/arm64/kernel/smp_spin_table.c:88: endianess ==> endianness
arch/arm64/kvm/vgic/vgic-mmio-v3.c:1004: targetting ==> targeting
arch/arm64/kvm/vgic/vgic-mmio-v3.c:1005: targetting ==> targeting

Signed-off-by: Xiaoming Ni 
---
 arch/arm64/include/asm/cpu_ops.h| 2 +-
 arch/arm64/include/asm/cpufeature.h | 2 +-
 arch/arm64/include/asm/kvm_arm.h| 2 +-
 arch/arm64/kernel/perf_regs.c   | 2 +-
 arch/arm64/kernel/smp_spin_table.c  | 4 ++--
 arch/arm64/kvm/vgic/vgic-mmio-v3.c  | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/cpu_ops.h b/arch/arm64/include/asm/cpu_ops.h
index d28e8f37d3b4..e95c4df83911 100644
--- a/arch/arm64/include/asm/cpu_ops.h
+++ b/arch/arm64/include/asm/cpu_ops.h
@@ -21,7 +21,7 @@
  * mechanism for doing so, tests whether it is possible to boot
  * the given CPU.
  * @cpu_boot:  Boots a cpu into the kernel.
- * @cpu_postboot: Optionally, perform any post-boot cleanup or necesary
+ * @cpu_postboot: Optionally, perform any post-boot cleanup or necessary
  * synchronisation. Called from the cpu being booted.
  * @cpu_can_disable: Determines whether a CPU can be disabled based on
  * mechanism-specific information.
diff --git a/arch/arm64/include/asm/cpufeature.h 
b/arch/arm64/include/asm/cpufeature.h
index 89b4f0142c28..3a42dc8e697c 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -358,7 +358,7 @@ static inline int cpucap_default_scope(const struct 
arm64_cpu_capabilities *cap)
 }
 
 /*
- * Generic helper for handling capabilties with multiple (match,enable) pairs
+ * Generic helper for handling capabilities with multiple (match,enable) pairs
  * of call backs, sharing the same capability bit.
  * Iterate over each entry to see if at least one matches.
  */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 51c1d9918999..21f91aebc052 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -66,7 +66,7 @@
  * TWI:Trap WFI
  * TIDCP:  Trap L2CTLR/L2ECTLR
  * BSU_IS: Upgrade barriers to the inner shareable domain
- * FB: Force broadcast of all maintainance operations
+ * FB: Force broadcast of all maintenance operations
  * AMO:Override CPSR.A and enable signaling with VA
  * IMO:Override CPSR.I and enable signaling with VI
  * FMO:Override CPSR.F and enable signaling with VF
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 666b225aeb3a..94e8718e7229 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -16,7 +16,7 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 
/*
 * Our handling of compat tasks (PERF_SAMPLE_REGS_ABI_32) is weird, but
-* we're stuck with it for ABI compatability reasons.
+* we're stuck with it for ABI compatibility reasons.
 *
 * For a 32-bit consumer inspecting a 32-bit task, then it will look at
 * the first 16 registers (see arch/arm/include/uapi/asm/perf_regs.h).
diff --git a/arch/arm64/kernel/smp_spin_table.c 
b/arch/arm64/kernel/smp_spin_table.c
index c8a3fee00c11..5892e79fa429 100644
--- a/arch/arm64/kernel/smp_spin_table.c
+++ b/arch/arm64/kernel/smp_spin_table.c
@@ -83,9 +83,9 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu)
 
/*
 * We write the release address as LE regardless of the native
-* endianess of the kernel. Therefore, any boot-loaders that
+* endianness of the kernel. Therefore, any boot-loaders that
 * read this address need to convert this address to the
-* boot-loader's endianess before jumping. This is mandated by
+* boot-loader's endianness before jumping. This is mandated by
 * the boot protocol.
 */
writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr);
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c 
b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 5c786b915cd3..52d6f24f65dc 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -1001,8 +1001,8 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, 
bool allow_group1)
raw_spin_lock_irqsave(>irq_lock, flags);
 
/*
-* An access targetting Group0 SGIs can only generate
-* those, while an access targetting Group1 SGIs can
+* An access targeting Group0 SGIs can only generate
+* those, while an acces

[PATCH] security: fix some spelling mistakes in the comments by codespell

2020-08-21 Thread Xiaoming Ni
ecurity/commoncap.c: capabilties ==> capabilities
security/lsm_audit.c: programers ==> programmers
security/tomoyo/audit.c: stuct ==> struct
security/tomoyo/common.c: Poiter ==> Pointer
security/smack/smack_lsm.c: overriden ==> overridden overridden
security/smack/smackfs.c: overriden ==> overridden
security/integrity/ima/ima_template_lib.c: algoritm ==> algorithm

Signed-off-by: Xiaoming Ni 
---
 security/commoncap.c  | 2 +-
 security/integrity/ima/ima_template_lib.c | 2 +-
 security/lsm_audit.c  | 2 +-
 security/smack/smack_lsm.c| 2 +-
 security/smack/smackfs.c  | 2 +-
 security/tomoyo/audit.c   | 2 +-
 security/tomoyo/common.c  | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/security/commoncap.c b/security/commoncap.c
index 59bf3c1674c8..2c3a0f1b6876 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1046,7 +1046,7 @@ int cap_task_fix_setuid(struct cred *new, const struct 
cred *old, int flags)
break;
 
case LSM_SETID_FS:
-   /* juggle the capabilties to follow FSUID changes, unless
+   /* juggle the capabilities to follow FSUID changes, unless
 * otherwise suppressed
 *
 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
diff --git a/security/integrity/ima/ima_template_lib.c 
b/security/integrity/ima/ima_template_lib.c
index c022ee9e2a4e..9513564ee0b2 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -231,7 +231,7 @@ static int ima_eventdigest_init_common(const u8 *digest, 
u32 digestsize,
 * digest formats:
 *  - DATA_FMT_DIGEST: digest
 *  - DATA_FMT_DIGEST_WITH_ALGO: [] + ':' + '\0' + digest,
-*where  is provided if the hash algoritm is not
+*where  is provided if the hash algorithm is not
 *SHA1 or MD5
 */
u8 buffer[CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 };
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 53d0d183db8f..a0ff2e441069 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -212,7 +212,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
char comm[sizeof(current->comm)];
 
/*
-* To keep stack sizes in check force programers to notice if they
+* To keep stack sizes in check force programmers to notice if they
 * start making this union too large!  See struct lsm_network_audit
 * as an example of how to deal with large data.
 */
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 8c0893eb5aa8..960d4641239c 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1784,7 +1784,7 @@ static int smack_file_send_sigiotask(struct task_struct 
*tsk,
 */
file = container_of(fown, struct file, f_owner);
 
-   /* we don't log here as rc can be overriden */
+   /* we don't log here as rc can be overridden */
blob = smack_file(file);
skp = *blob;
rc = smk_access(skp, tkp, MAY_DELIVER, NULL);
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index 9c4308077574..5c581ec814ac 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -111,7 +111,7 @@ struct smack_known *smack_syslog_label;
 /*
  * Ptrace current rule
  * SMACK_PTRACE_DEFAULTregular smack ptrace rules (/proc based)
- * SMACK_PTRACE_EXACT  labels must match, but can be overriden with
+ * SMACK_PTRACE_EXACT  labels must match, but can be overridden with
  *CAP_SYS_PTRACE
  * SMACK_PTRACE_DRACONIAN  lables must match, CAP_SYS_PTRACE has no effect
  */
diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c
index 3c96e8402e94..b51bad121c11 100644
--- a/security/tomoyo/audit.c
+++ b/security/tomoyo/audit.c
@@ -311,7 +311,7 @@ static LIST_HEAD(tomoyo_log);
 /* Lock for "struct list_head tomoyo_log". */
 static DEFINE_SPINLOCK(tomoyo_log_lock);
 
-/* Length of "stuct list_head tomoyo_log". */
+/* Length of "struct list_head tomoyo_log". */
 static unsigned int tomoyo_log_count;
 
 /**
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index 4bee32bfe16d..38bdc0ffc312 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -2608,7 +2608,7 @@ ssize_t tomoyo_read_control(struct tomoyo_io_buffer 
*head, char __user *buffer,
 /**
  * tomoyo_parse_policy - Parse a policy line.
  *
- * @head: Poiter to "struct tomoyo_io_buffer".
+ * @head: Pointer to "struct tomoyo_io_buffer".
  * @line: Line to parse.
  *
  * Returns 0 on success, negative value otherwise.
-- 
2.27.0



[PATCH v2] s390: fix build error for sys_call_table_emu

2020-06-18 Thread Xiaoming Ni
Build error on s390:
arch/s390/kernel/entry.o: in function `sys_call_table_emu':
>> (.rodata+0x1288): undefined reference to `__s390_'

In commit ("All arch: remove system call sys_sysctl")
 148  commonfdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_sysctl  
compat_sys_sysctl
+149  common_sysctl sys_ni_syscall
 150  commonmlock   sys_mlock   
sys_mlock

After the patch is integrated, there is a format error in the generated
arch/s390/include/generated/asm/syscall_table.h:
SYSCALL(sys_fdatasync, sys_fdatasync)
SYSCALL(sys_ni_syscall,) /* cause build error */
SYSCALL(sys_mlock,sys_mlock)

According to the guidance of Heiko Carstens, use "-" to fill the empty system 
call
 Similarly, modify tools/perf/arch/s390/entry/syscalls/syscall.tbl.

Fixes: ("All arch: remove system call sys_sysctl")
Fixes: 
https://lore.kernel.org/linuxppc-dev/20200616030734.87257-1-nixiaom...@huawei.com/
Reported-by: kernel test robot 
Signed-off-by: Xiaoming Ni 

changes in v2:
use "-" to fill the empty system call

v1: https://lore.kernel.org/lkml/20200618110320.104013-1-nixiaom...@huawei.com/
---
 arch/s390/kernel/syscalls/syscall.tbl   | 2 +-
 tools/perf/arch/s390/entry/syscalls/syscall.tbl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kernel/syscalls/syscall.tbl 
b/arch/s390/kernel/syscalls/syscall.tbl
index f17aaf6fe5de..04c34c2ed916 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
 146  commonwritev  sys_writev  
compat_sys_writev
 147  commongetsid  sys_getsid  
sys_getsid
 148  commonfdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_ni_syscall
+149  common_sysctl -   -
 150  commonmlock   sys_mlock   
sys_mlock
 151  commonmunlock sys_munlock 
sys_munlock
 152  commonmlockallsys_mlockall
sys_mlockall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl 
b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 0193f9b98753..29144b79a49d 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -138,7 +138,7 @@
 146  commonwritev  sys_writev  
compat_sys_writev
 147  commongetsid  sys_getsid  
sys_getsid
 148  commonfdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_ni_syscall
+149  common_sysctl -   -
 150  commonmlock   sys_mlock   
compat_sys_mlock
 151  commonmunlock sys_munlock 
compat_sys_munlock
 152  commonmlockallsys_mlockall
sys_mlockall
-- 
2.27.0



Re: [PATCH] s390: fix build error for sys_call_table_emu

2020-06-18 Thread Xiaoming Ni

On 2020/6/18 19:27, Heiko Carstens wrote:

On Thu, Jun 18, 2020 at 07:03:20PM +0800, Xiaoming Ni wrote:

Build error on s390:
arch/s390/kernel/entry.o: in function `sys_call_table_emu':
>> (.rodata+0x1288): undefined reference to `__s390_'

In commit ("All arch: remove system call sys_sysctl")
  148  common   fdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_sysctl  
compat_sys_sysctl
+149  common_sysctl sys_ni_syscall
  150  common   mlock   sys_mlock   
sys_mlock

After the patch is integrated, there is a format error in the generated
arch/s390/include/generated/asm/syscall_table.h:
SYSCALL(sys_fdatasync, sys_fdatasync)
SYSCALL(sys_ni_syscall,) /* cause build error */
SYSCALL(sys_mlock,sys_mlock)

There are holes in the system call number in
  arch/s390/kernel/syscalls/syscall.tbl. When generating syscall_table.h,
these hole numbers will be automatically filled with "NI_SYSCALL".
Therefore, delete the number 149 to fix the current compilation failure.
  Similarly, modify tools/perf/arch/s390/entry/syscalls/syscall.tbl.

Fixes: ("All arch: remove system call sys_sysctl")
Fixes: 
https://lore.kernel.org/linuxppc-dev/20200616030734.87257-1-nixiaom...@huawei.com/
Reported-by: kernel test robot 
Signed-off-by: Xiaoming Ni 
---
  arch/s390/kernel/syscalls/syscall.tbl   | 1 -
  tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 -
  2 files changed, 2 deletions(-)

diff --git a/arch/s390/kernel/syscalls/syscall.tbl 
b/arch/s390/kernel/syscalls/syscall.tbl
index f17aaf6fe5de..bcaf93994e3c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,6 @@
  146  common   writev  sys_writev  
compat_sys_writev
  147  common   getsid  sys_getsid  
sys_getsid
  148  common   fdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_ni_syscall


This is not correct. It should be changed to:

149  common _sysctl -   -


thanks for your guidance


Otherwise the generated __NR__sysctl define will be lost from
unistd.h, which should not happen. Looking at the link above it
_looks_ like a similar mistake was done for arm64.

Using holes will cause the definition of __NR__sysctl to be missing in 
include/asm/unistd_32.h and include/asm/unistd_64.h


For arm64, I observed that "sys_afs_syscall", "sys_get_kernel_syms" and 
other commented out syscalls have no corresponding definition _NR_XXX in 
unistd.h, is it not a problem on arm64?


/* 127 was sys_create_module */
__SYSCALL(127, sys_ni_syscall)

/* 130 was sys_get_kernel_syms */
__SYSCALL(130, sys_ni_syscall)

/* 137 was sys_afs_syscall */
__SYSCALL(137, sys_ni_syscall)


Thanks
Xiaoming Ni




[PATCH] s390: fix build error for sys_call_table_emu

2020-06-18 Thread Xiaoming Ni
Build error on s390:
arch/s390/kernel/entry.o: in function `sys_call_table_emu':
>> (.rodata+0x1288): undefined reference to `__s390_'

In commit ("All arch: remove system call sys_sysctl")
 148  commonfdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_sysctl  
compat_sys_sysctl
+149  common_sysctl sys_ni_syscall
 150  commonmlock   sys_mlock   
sys_mlock

After the patch is integrated, there is a format error in the generated
arch/s390/include/generated/asm/syscall_table.h:
SYSCALL(sys_fdatasync, sys_fdatasync)
SYSCALL(sys_ni_syscall,) /* cause build error */
SYSCALL(sys_mlock,sys_mlock)

There are holes in the system call number in
 arch/s390/kernel/syscalls/syscall.tbl. When generating syscall_table.h,
these hole numbers will be automatically filled with "NI_SYSCALL".
Therefore, delete the number 149 to fix the current compilation failure.
 Similarly, modify tools/perf/arch/s390/entry/syscalls/syscall.tbl.

Fixes: ("All arch: remove system call sys_sysctl")
Fixes: 
https://lore.kernel.org/linuxppc-dev/20200616030734.87257-1-nixiaom...@huawei.com/
Reported-by: kernel test robot 
Signed-off-by: Xiaoming Ni 
---
 arch/s390/kernel/syscalls/syscall.tbl   | 1 -
 tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/s390/kernel/syscalls/syscall.tbl 
b/arch/s390/kernel/syscalls/syscall.tbl
index f17aaf6fe5de..bcaf93994e3c 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -138,7 +138,6 @@
 146  commonwritev  sys_writev  
compat_sys_writev
 147  commongetsid  sys_getsid  
sys_getsid
 148  commonfdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_ni_syscall
 150  commonmlock   sys_mlock   
sys_mlock
 151  commonmunlock sys_munlock 
sys_munlock
 152  commonmlockallsys_mlockall
sys_mlockall
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl 
b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 0193f9b98753..eb77d0d01d8f 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -138,7 +138,6 @@
 146  commonwritev  sys_writev  
compat_sys_writev
 147  commongetsid  sys_getsid  
sys_getsid
 148  commonfdatasync   sys_fdatasync   
sys_fdatasync
-149  common_sysctl sys_ni_syscall
 150  commonmlock   sys_mlock   
compat_sys_mlock
 151  commonmunlock sys_munlock 
compat_sys_munlock
 152  commonmlockallsys_mlockall
sys_mlockall
-- 
2.27.0



Re: [PATCH 0/3] Convert nsproxy, groups, and creds to refcount_t

2020-06-14 Thread Xiaoming Ni

On 2020/6/13 2:34, Kees Cook wrote:

This series was never applied[1], and was recently pointed out as
missing[2]. If someone has a tree for this, please take it. Otherwise,
please Ack and I'll send it to Linus.

Thanks!

-Kees

[1] 
https://lore.kernel.org/lkml/20190306110549.7628-1-elena.reshet...@intel.com/
[2] 
https://lore.kernel.org/lkml/1591957695-118312-1-git-send-email-nixiaom...@huawei.com/

Elena Reshetova (3):
   nsproxy: convert nsproxy.count to refcount_t
   groups: convert group_info.usage to refcount_t
   creds: convert cred.usage to refcount_t

  include/linux/cred.h| 15 +++---
  include/linux/nsproxy.h |  7 +++
  kernel/cred.c   | 44 -
  kernel/groups.c |  2 +-
  kernel/nsproxy.c|  6 +++---
  net/sunrpc/auth.c   |  2 +-
  6 files changed, 38 insertions(+), 38 deletions(-)



Should mm->mm_users also be replaced by refcount_t?

In addition, is it better to change all variables that use 
atomic_dec_and_test to control the release process to refconut_t?


Thanks
Xiaoming Ni



Re: [PATCH 3/3] creds: convert cred.usage to refcount_t

2020-06-14 Thread Xiaoming Ni

On 2020/6/13 2:34, Kees Cook wrote:

From: Elena Reshetova 

atomic_t variables are currently used to implement reference
counters with the following properties:
  - counter is initialized to 1 using atomic_set()
  - a resource is freed upon counter reaching zero
  - once counter reaches zero, its further
increments aren't allowed
  - counter schema uses basic atomic operations
(set, inc, inc_not_zero, dec_and_test, etc.)

Such atomic variables should be converted to a newly provided
refcount_t type and API that prevents accidental counter overflows
and underflows. This is important since overflows and underflows
can lead to use-after-free situation and be exploitable.

The variable cred.usage is used as pure reference counter.
Convert it to refcount_t and fix up the operations.

**Important note for maintainers:

Some functions from refcount_t API defined in lib/refcount.c
have different memory ordering guarantees than their atomic
counterparts.Please check Documentation/core-api/refcount-vs-atomic.rst
for more information.

Normally the differences should not matter since refcount_t provides
enough guarantees to satisfy the refcounting use cases, but in
some rare cases it might matter.
Please double check that you don't have some undocumented
memory guarantees for this variable usage.

For the cred.usage it might make a difference
in following places:
  - get_task_cred(): increment in refcount_inc_not_zero() only
guarantees control dependency on success vs. fully ordered
atomic counterpart
  - put_cred(): decrement in refcount_dec_and_test() only
provides RELEASE ordering and ACQUIRE ordering on success
vs. fully ordered atomic counterpart

Suggested-by: Kees Cook 
Signed-off-by: Elena Reshetova 
Reviewed-by: David Windsor 
Reviewed-by: Hans Liljestrand 
Link: https://lore.kernel.org/r/20190306110549.7628-4-elena.reshet...@intel.com
Signed-off-by: Kees Cook 


Currently this patch is better than my RFC patch
Looks good to me.

Thanks
Xiaoming Ni




[PATCH RFC] cred: Add WARN to detect wrong use of get/put_cred

2020-06-12 Thread Xiaoming Ni
Cred release and usage check code flow:
1. put_cred()
if (atomic_dec_and_test(&(cred)->usage))
__put_cred(cred);

2. __put_cred()
BUG_ON(atomic_read(>usage) != 0);
call_rcu(>rcu, put_cred_rcu);

3. put_cred_rcu()
if (atomic_read(>usage) != 0)
panic("CRED: put_cred_rcu() sees %p with usage %d\n",
   cred, atomic_read(>usage));
kmem_cache_free(cred_jar, cred);

If panic is triggered on put_cred_rcu(), there are two possibilities
1. Call get_cred() after __put_cred(), usage > 0
2. Call put_cred() after __put_cred(), usage < 0
Since put_cred_rcu is an asynchronous behavior, it is no longer the first
scene when panic, there is no information about the murderer in the panic
call stack...

So, add WARN() in get_cred()/put_cred(), and pray to catch the murderer
at the first scene.

Signed-off-by: Xiaoming Ni 
---
 include/linux/cred.h | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 18639c0..c00d5a1 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -224,11 +224,16 @@ static inline bool cap_ambient_invariant_ok(const struct 
cred *cred)
  *
  * Get a reference on the specified set of new credentials.  The caller must
  * release the reference.
+ *
+ * Initialize usage to 1 during cred resource allocation,
+ * so when calling get_cred, usage cannot be 0.
  */
 static inline struct cred *get_new_cred(struct cred *cred)
 {
-   atomic_inc(>usage);
-   return cred;
+   if (atomic_inc_not_zero(>usage))
+   return cred;
+   WARN(1, "get_new_cred after __put_cred");
+   return NULL;
 }
 
 /**
@@ -280,11 +285,14 @@ static inline const struct cred *get_cred_rcu(const 
struct cred *cred)
 static inline void put_cred(const struct cred *_cred)
 {
struct cred *cred = (struct cred *) _cred;
+   int usage;
 
if (cred) {
validate_creds(cred);
-   if (atomic_dec_and_test(&(cred)->usage))
+   usage = atomic_dec_return(&(cred)->usage);
+   if (usage == 0)
__put_cred(cred);
+   WARN(usage < 0, "put_cred after __put_cred");
}
 }
 
-- 
1.8.5.6



Re: [PATCH] sysctl: Delete the code of sys_sysctl

2020-06-10 Thread Xiaoming Ni

On 2020/6/10 3:20, Eric W. Biederman wrote:

Xiaoming Ni  writes:


Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
sys_sysctl has lost its actual role: any input can only return an error.


The remaining code does have a role.  It reports programs that attempt
to use the removed sysctl.

It would help if your change description had a reason why we don't want
to warn people that a program has used a removed system call.

Probably something like:

   We have been warning about people using the sysctl system call for years
   and believe there are no more users.  Even if there are users of this
   interface if they have not complained or fixed their code by now they
   probably are not going to, so there is no point in warning them any
   longer.

With a change like that made to the patch description.

Acked-by: "Eric W. Biederman" 


Thanks for your guidance
I will add it in the v2 version

Thanks
Xiaoming Ni




Re: [PATCH] sysctl: Delete the code of sys_sysctl

2020-06-10 Thread Xiaoming Ni

On 2020/6/9 23:40, Kees Cook wrote:

On Tue, Jun 09, 2020 at 02:20:05PM +0800, Xiaoming Ni wrote:

Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
sys_sysctl has lost its actual role: any input can only return an error.

Delete the code and return -ENOSYS directly at the function entry

Signed-off-by: Xiaoming Ni 


Looks right to me.

Reviewed-by: Kees Cook 

Should this be taken a step further and just remove the syscall entirely
and update the per-arch tables with the ENOSYS hole?

-Kees


Searching for git log, I found a commit record that deleted syscall:
commit f5b94099739722 ("All Arch: remove linkage for sys_nfsservctl 
system call"). Could I use sys_ni_syscall to implement the hole as in 
the example here?

E.g:
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 7b3832d..f36fda6 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -162,7 +162,7 @@
 146common  writev  sys_writev
 147common  getsid  sys_getsid
 148common  fdatasync   sys_fdatasync
-149common  _sysctl sys_sysctl
+149  common  _sysctl sys_ni_syscall
 150common  mlock   sys_mlock
 151common  munlock sys_munlock
 152common  mlockallsys_mlockall
diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h

index f8dafe9..ca41bb7 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -308,8 +308,8 @@
 __SYSCALL(__NR_getsid, sys_getsid)
 #define __NR_fdatasync 148
 __SYSCALL(__NR_fdatasync, sys_fdatasync)
-#define __NR__sysctl 149
-__SYSCALL(__NR__sysctl, compat_sys_sysctl)
+ /* 149 was sys_sysctl */
+__SYSCALL(149, sys_ni_syscall)
 #define __NR_mlock 150
 __SYSCALL(__NR_mlock, sys_mlock)
 #define __NR_munlock 151


In this case, I need to modify a lot of code in v2. Can I add 
"Reviewed-by: Kees Cook " to the v2 patch?


Thanks
Xiaoming Ni



[PATCH] sysctl: Delete the code of sys_sysctl

2020-06-09 Thread Xiaoming Ni
Since the commit 61a47c1ad3a4dc ("sysctl: Remove the sysctl system call"),
sys_sysctl has lost its actual role: any input can only return an error.

Delete the code and return -ENOSYS directly at the function entry

Signed-off-by: Xiaoming Ni 
---
 kernel/sysctl_binary.c | 146 +
 1 file changed, 2 insertions(+), 144 deletions(-)

diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 7d550cc..41a88f8 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,126 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
-#include 
 #include 
-#include "../fs/xfs/xfs_sysctl.h"
-#include 
-#include 
 #include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
 #include 
 
-static ssize_t binary_sysctl(const int *name, int nlen,
-   void __user *oldval, size_t oldlen, void __user *newval, size_t newlen)
-{
-   return -ENOSYS;
-}
-
-static void deprecated_sysctl_warning(const int *name, int nlen)
-{
-   int i;
-
-   /*
-* CTL_KERN/KERN_VERSION is used by older glibc and cannot
-* ever go away.
-*/
-   if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
-   return;
-
-   if (printk_ratelimit()) {
-   printk(KERN_INFO
-   "warning: process `%s' used the deprecated sysctl "
-   "system call with ", current->comm);
-   for (i = 0; i < nlen; i++)
-   printk(KERN_CONT "%d.", name[i]);
-   printk(KERN_CONT "\n");
-   }
-   return;
-}
-
-#define WARN_ONCE_HASH_BITS 8
-#define WARN_ONCE_HASH_SIZE (1<nlen. */
-   if (nlen < 0 || nlen > CTL_MAXNAME)
-   return -ENOTDIR;
-   /* Read in the sysctl name for simplicity */
-   for (i = 0; i < nlen; i++)
-   if (get_user(name[i], args_name + i))
-   return -EFAULT;
-
-   warn_on_bintable(name, nlen);
-
-   return binary_sysctl(name, nlen, oldval, oldlen, newval, newlen);
-}
-
 SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args)
 {
-   struct __sysctl_args tmp;
-   size_t oldlen = 0;
-   ssize_t result;
-
-   if (copy_from_user(, args, sizeof(tmp)))
-   return -EFAULT;
-
-   if (tmp.oldval && !tmp.oldlenp)
-   return -EFAULT;
-
-   if (tmp.oldlenp && get_user(oldlen, tmp.oldlenp))
-   return -EFAULT;
-
-   result = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, oldlen,
-  tmp.newval, tmp.newlen);
-
-   if (result >= 0) {
-   oldlen = result;
-   result = 0;
-   }
-
-   if (tmp.oldlenp && put_user(oldlen, tmp.oldlenp))
-   return -EFAULT;
-
-   return result;
+   return -ENOSYS;
 }
 
 
@@ -138,34 +23,7 @@ struct compat_sysctl_args {
 
 COMPAT_SYSCALL_DEFINE1(sysctl, struct compat_sysctl_args __user *, args)
 {
-   struct compat_sysctl_args tmp;
-   compat_size_t __user *compat_oldlenp;
-   size_t oldlen = 0;
-   ssize_t result;
-
-   if (copy_from_user(, args, sizeof(tmp)))
-   return -EFAULT;
-
-   if (tmp.oldval && !tmp.oldlenp)
-   return -EFAULT;
-
-   compat_oldlenp = compat_ptr(tmp.oldlenp);
-   if (compat_oldlenp && get_user(oldlen, compat_oldlenp))
-   return -EFAULT;
-
-   result = do_sysctl(compat_ptr(tmp.name), tmp.nlen,
-  compat_ptr(tmp.oldval), oldlen,
-  compat_ptr(tmp.newval), tmp.newlen);
-
-   if (result >= 0) {
-   oldlen = result;
-   result = 0;
-   }
-
-   if (compat_oldlenp && put_user(oldlen, compat_oldlenp))
-   return -EFAULT;
-
-   return result;
+   return -ENOSYS;
 }
 
 #endif /* CONFIG_COMPAT */
-- 
1.8.5.6



[PATCH] ASoC: max98390: fix build warning detected by -Wformat

2020-06-04 Thread Xiaoming Ni
Fix build warning:
sound/soc/codecs/max98390.c:781:3: warning: format '%ld' expects
argument of type 'long int', but argument 4 has type 'size_t {aka
const unsigned int}' [-Wformat=]

Signed-off-by: Xiaoming Ni 
---
 sound/soc/codecs/max98390.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/codecs/max98390.c b/sound/soc/codecs/max98390.c
index b9ce44d..884ee55 100644
--- a/sound/soc/codecs/max98390.c
+++ b/sound/soc/codecs/max98390.c
@@ -778,7 +778,7 @@ static int max98390_dsm_init(struct snd_soc_component 
*component)
}
 
dev_dbg(component->dev,
-   "max98390: param fw size %ld\n",
+   "max98390: param fw size %zu\n",
fw->size);
dsm_param = (char *)fw->data;
dsm_param += MAX98390_DSM_PAYLOAD_OFFSET;
-- 
1.8.5.6



Re: [PATCH 13/13] fs: move binfmt_misc sysctl to its own file

2020-06-04 Thread Xiaoming Ni

On 2020/5/29 15:41, Luis Chamberlain wrote:

This moves the binfmt_misc sysctl to its own file to help remove
clutter from kernel/sysctl.c.

Signed-off-by: Luis Chamberlain 
---
  fs/binfmt_misc.c | 1 +
  kernel/sysctl.c  | 7 ---
  2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index f69a043f562b..656b3f5f3bbf 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -821,6 +821,7 @@ static int __init init_misc_binfmt(void)
int err = register_filesystem(_fs_type);
if (!err)
insert_binfmt(_format);
+   register_sysctl_empty_subdir("fs", "binfmt_misc");
return err;
  }

build error when CONFIG_BINFMT_MISC=m

ERROR: modpost: "register_sysctl_empty_subdir" [fs/binfmt_misc.ko] 
undefined!


diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 27f0c9ea..4129dfb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2853,6 +2853,7 @@ void register_sysctl_empty_subdir(const char *base,
 {
register_sysctl_subdir(base, subdir, sysctl_mount_point);
 }
+EXPORT_SYMBOL_GPL(register_sysctl_empty_subdir);
 #endif /* CONFIG_SYSCTL */


Thanks
Xiaoming Ni




Re: [PATCH 11/13] random: simplify sysctl declaration with register_sysctl_subdir()

2020-05-29 Thread Xiaoming Ni

On 2020/5/29 18:26, Greg KH wrote:

On Fri, May 29, 2020 at 07:41:06AM +, Luis Chamberlain wrote:

From: Xiaoming Ni 

Move random_table sysctl from kernel/sysctl.c to drivers/char/random.c
and use register_sysctl_subdir() to help remove the clutter out of
kernel/sysctl.c.

Signed-off-by: Xiaoming Ni 
Signed-off-by: Luis Chamberlain 
---
  drivers/char/random.c  | 14 --
  include/linux/sysctl.h |  1 -
  kernel/sysctl.c|  5 -
  3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/char/random.c b/drivers/char/random.c
index a7cf6aa65908..73fd4b6e9c18 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2101,8 +2101,7 @@ static int proc_do_entropy(struct ctl_table *table, int 
write,
  }
  
  static int sysctl_poolsize = INPUT_POOL_WORDS * 32;

-extern struct ctl_table random_table[];
-struct ctl_table random_table[] = {
+static struct ctl_table random_table[] = {
{
.procname   = "poolsize",
.data   = _poolsize,
@@ -2164,6 +2163,17 @@ struct ctl_table random_table[] = {
  #endif
{ }
  };
+
+/*
+ * rand_initialize() is called before sysctl_init(),
+ * so we cannot call register_sysctl_init() in rand_initialize()
+ */
+static int __init random_sysctls_init(void)
+{
+   register_sysctl_subdir("kernel", "random", random_table);


No error checking?

:(

It was my mistake, I forgot to add a comment here.
Same as the comment of register_sysctl_init(),
There is almost no failure here during the system initialization phase,
and failure in time does not affect the main function.

Thanks
Xiaoming Ni





Re: [PATCH 09/13] firmware_loader: simplify sysctl declaration with register_sysctl_subdir()

2020-05-29 Thread Xiaoming Ni

On 2020/5/29 18:26, Greg KH wrote:

On Fri, May 29, 2020 at 07:41:04AM +, Luis Chamberlain wrote:

From: Xiaoming Ni 

Move the firmware config sysctl table to fallback_table.c and use the
new register_sysctl_subdir() helper. This removes the clutter from
kernel/sysctl.c.

Signed-off-by: Xiaoming Ni 
Signed-off-by: Luis Chamberlain 
---
  drivers/base/firmware_loader/fallback.c   |  4 
  drivers/base/firmware_loader/fallback.h   | 11 ++
  drivers/base/firmware_loader/fallback_table.c | 22 +--
  include/linux/sysctl.h|  1 -
  kernel/sysctl.c   |  7 --
  5 files changed, 35 insertions(+), 10 deletions(-)


So it now takes more lines than the old stuff?  :(


CONFIG_FW_LOADER = m
Before cleaning, no matter whether ko is loaded or not, the sysctl
interface will be created, but now we need to add register and
unregister interfaces, so the number of lines of code has increased



diff --git a/drivers/base/firmware_loader/fallback.c 
b/drivers/base/firmware_loader/fallback.c
index d9ac7296205e..8190653ae9a3 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -200,12 +200,16 @@ static struct class firmware_class = {
  
  int register_sysfs_loader(void)

  {
+   int ret = register_firmware_config_sysctl();
+   if (ret != 0)
+   return ret;


checkpatch :(

This is my fault,  thanks for your guidance




return class_register(_class);


And if that fails?

Yes, it is better to call register_firmware_config_sysctl() after 
class_register().

thanks for your guidance.



  }
  
  void unregister_sysfs_loader(void)

  {
class_unregister(_class);
+   unregister_firmware_config_sysctl();
  }
  
  static ssize_t firmware_loading_show(struct device *dev,

diff --git a/drivers/base/firmware_loader/fallback.h 
b/drivers/base/firmware_loader/fallback.h
index 06f4577733a8..7d2cb5f6ceb8 100644
--- a/drivers/base/firmware_loader/fallback.h
+++ b/drivers/base/firmware_loader/fallback.h
@@ -42,6 +42,17 @@ void fw_fallback_set_default_timeout(void);
  
  int register_sysfs_loader(void);

  void unregister_sysfs_loader(void);
+#ifdef CONFIG_SYSCTL
+extern int register_firmware_config_sysctl(void);
+extern void unregister_firmware_config_sysctl(void);
+#else
+static inline int register_firmware_config_sysctl(void)
+{
+   return 0;
+}
+static inline void unregister_firmware_config_sysctl(void) { }
+#endif /* CONFIG_SYSCTL */
+
  #else /* CONFIG_FW_LOADER_USER_HELPER */
  static inline int firmware_fallback_sysfs(struct firmware *fw, const char 
*name,
  struct device *device,
diff --git a/drivers/base/firmware_loader/fallback_table.c 
b/drivers/base/firmware_loader/fallback_table.c
index 46a731dede6f..4234aa5ee5df 100644
--- a/drivers/base/firmware_loader/fallback_table.c
+++ b/drivers/base/firmware_loader/fallback_table.c
@@ -24,7 +24,7 @@ struct firmware_fallback_config fw_fallback_config = {
  EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE);
  
  #ifdef CONFIG_SYSCTL

-struct ctl_table firmware_config_table[] = {
+static struct ctl_table firmware_config_table[] = {
{
.procname   = "force_sysfs_fallback",
.data   = _fallback_config.force_sysfs_fallback,
@@ -45,4 +45,22 @@ struct ctl_table firmware_config_table[] = {
},
{ }
  };
-#endif
+
+static struct ctl_table_header *hdr;
+int register_firmware_config_sysctl(void)
+{
+   if (hdr)
+   return -EEXIST;


How can hdr be set?


It's my mistake, register_firmware_config_sysctl() is not exported,
there will be no repeated calls.
thanks for your guidance.


+   hdr = register_sysctl_subdir("kernel", "firmware_config",
+firmware_config_table);
+   if (!hdr)
+   return -ENOMEM;
+   return 0;
+}
+
+void unregister_firmware_config_sysctl(void)
+{
+   if (hdr)
+   unregister_sysctl_table(hdr);


Why can't unregister_sysctl_table() take a null pointer value?

Sorry, I didn't notice that the unregister_sysctl_table() already checks
the input parameters.
thanks for your guidance.



And what sets 'hdr' (worst name for a static variable) to NULL so that
it knows not to be unregistered again as it looks like
register_firmware_config_sysctl() could be called multiple times.


How about renaming hdr to firmware_config_sysct_table_header?

+ if (hdr)
+   return -EEXIST;
After deleting this code in register_firmware_config_sysctl(), and 
considering register_firmware_config_sysctl() and 
unregister_firmware_config_sysctl() are not exported, whether there is

no need to add  "hdr = NULL;" ?

Thanks
Xiaoming Ni






Re: [PATCH v4 1/4] sysctl: Add register_sysctl_init() interface

2020-05-29 Thread Xiaoming Ni

On 2020/5/29 15:36, Luis Chamberlain wrote:

On Fri, May 29, 2020 at 03:27:22PM +0800, Xiaoming Ni wrote:

On 2020/5/29 15:09, Luis Chamberlain wrote:

On Tue, May 19, 2020 at 11:31:08AM +0800, Xiaoming Ni wrote:

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3358,6 +3358,25 @@ int __init sysctl_init(void)
kmemleak_not_leak(hdr);
return 0;
   }
+
+/*
+ * The sysctl interface is used to modify the interface value,
+ * but the feature interface has default values. Even if register_sysctl fails,
+ * the feature body function can also run. At the same time, malloc small
+ * fragment of memory during the system initialization phase, almost does
+ * not fail. Therefore, the function return is designed as void
+ */


Let's use kdoc while at it. Can you convert this to proper kdoc?


Sorry, I do n’t know the format requirements of Kdoc, can you give me some
tips for writing?


Sure, include/net/mac80211.h is a good example.


+void __init register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name)
+{
+   struct ctl_table_header *hdr = register_sysctl(path, table);
+
+   if (unlikely(!hdr)) {
+   pr_err("failed when register_sysctl %s to %s\n", table_name, 
path);
+   return;


table_name is only used for this, however we can easily just make
another _register_sysctl_init() helper first, and then use a macro
which will concatenate this to something useful if you want to print
a string. I see no point in the description for this, specially since
the way it was used was not to be descriptive, but instead just a name
followed by some underscore and something else.


Good idea, I will fix and send the patch to you as soon as possible


No rush :)


+   }
+   kmemleak_not_leak(hdr);


Is it *wrong* to run kmemleak_not_leak() when hdr was not allocated?
If so, can you fix the sysctl __init call itself?

I don't understand here, do you mean that register_sysctl_init () does not
need to call kmemleak_not_leak (hdr), or does it mean to add check hdr
before calling kmemleak_not_leak (hdr) in sysctl_init ()?


I'm asking that the way you are adding it, you don't run
kmemleak_not_leak(hdr) if the hdr allocation filed. If that is
right then it seems that sysctl_init() might not be doing it
right.

Can that code be shared somehow?

   Luis


void __ref kmemleak_not_leak(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);

if (kmemleak_enabled && ptr && !IS_ERR(ptr))
make_gray_object((unsigned long)ptr);
}
EXPORT_SYMBOL(kmemleak_not_leak);

In the code of kmemleak_not_leak(), it is verified that the pointer is 
valid, so kmemleak_not_leak (NULL) will not be a problem.
At the same time, there is no need to call kmemleak_not_leak() in the 
failed branch of register_sysctl_init().


Thanks
Xiaoming Ni



Re: [PATCH v4 1/4] sysctl: Add register_sysctl_init() interface

2020-05-29 Thread Xiaoming Ni

On 2020/5/29 15:09, Luis Chamberlain wrote:

On Tue, May 19, 2020 at 11:31:08AM +0800, Xiaoming Ni wrote:

--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3358,6 +3358,25 @@ int __init sysctl_init(void)
kmemleak_not_leak(hdr);
return 0;
  }
+
+/*
+ * The sysctl interface is used to modify the interface value,
+ * but the feature interface has default values. Even if register_sysctl fails,
+ * the feature body function can also run. At the same time, malloc small
+ * fragment of memory during the system initialization phase, almost does
+ * not fail. Therefore, the function return is designed as void
+ */


Let's use kdoc while at it. Can you convert this to proper kdoc?

Sorry, I do n’t know the format requirements of Kdoc, can you give me 
some tips for writing?




+void __init register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name)
+{
+   struct ctl_table_header *hdr = register_sysctl(path, table);
+
+   if (unlikely(!hdr)) {
+   pr_err("failed when register_sysctl %s to %s\n", table_name, 
path);
+   return;


table_name is only used for this, however we can easily just make
another _register_sysctl_init() helper first, and then use a macro
which will concatenate this to something useful if you want to print
a string. I see no point in the description for this, specially since
the way it was used was not to be descriptive, but instead just a name
followed by some underscore and something else.


Good idea, I will fix and send the patch to you as soon as possible


+   }
+   kmemleak_not_leak(hdr);


Is it *wrong* to run kmemleak_not_leak() when hdr was not allocated?
If so, can you fix the sysctl __init call itself?
I don't understand here, do you mean that register_sysctl_init () does 
not need to call kmemleak_not_leak (hdr), or does it mean to add check 
hdr before calling kmemleak_not_leak (hdr) in sysctl_init ()?




PS. Since you have given me your series, feel free to send me a patch
as a follow up to this in privae and I can integrate it into my tree.

   Luis



Thanks
Xiaoming Ni




Re: [PATCH v2 1/1] userfaultfd/sysctl: add vm.unprivileged_userfaultfd

2020-05-28 Thread Xiaoming Ni

On 2020/5/27 22:21, Peter Xu wrote:

On Wed, May 27, 2020 at 02:54:13PM +0800, Xiaoming Ni wrote:


On Tue, Mar 19, 2019 at 11:07:22AM +0800, Peter Xu wrote:

Add a global sysctl knob "vm.unprivileged_userfaultfd" to control
whether userfaultfd is allowed by unprivileged users.  When this is
set to zero, only privileged users (root user, or users with the
CAP_SYS_PTRACE capability) will be able to use the userfaultfd
syscalls.


Hello


Hi, Xiaoming,


I am a bit confused about this patch, can you help to answer it.

Why the sysctl interface of fs/userfaultfd.c belongs to vm_table instead of
fs_table ?

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cefdca0a86be517bc390fc4541e3674b8e7803b0


Because I think it makes more sense to put the new key into where it suites
better, irrelevant to which directory the variable is declared.  To me,
unprivileged_userfaultfd is definitely more suitable for vm rather than fs,
because userfaultfd is really about memory management rather than file system.

Thanks,



Thank you for your answer
Since userfaultfd and vm are more closely related, will there be 
consideration to move fs/userfaultfd.c to the mm directory in the future?


Thanks
Xiaoming Ni



Re: [PATCH v2 1/1] userfaultfd/sysctl: add vm.unprivileged_userfaultfd

2020-05-27 Thread Xiaoming Ni



On Tue, Mar 19, 2019 at 11:07:22AM +0800, Peter Xu wrote:
> Add a global sysctl knob "vm.unprivileged_userfaultfd" to control
> whether userfaultfd is allowed by unprivileged users.  When this is
> set to zero, only privileged users (root user, or users with the
> CAP_SYS_PTRACE capability) will be able to use the userfaultfd
> syscalls.

Hello
I am a bit confused about this patch, can you help to answer it.

Why the sysctl interface of fs/userfaultfd.c belongs to vm_table instead 
of fs_table ?


https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cefdca0a86be517bc390fc4541e3674b8e7803b0

Thanks
Xiaoming Ni





Re: [PATCH v4 0/4] cleaning up the sysctls table (hung_task watchdog)

2020-05-19 Thread Xiaoming Ni

On 2020/5/20 11:31, Andrew Morton wrote:

On Tue, 19 May 2020 11:31:07 +0800 Xiaoming Ni  wrote:


Kernel/sysctl.c


eek!



  fs/proc/proc_sysctl.c|   2 +-
  include/linux/sched/sysctl.h |  14 +--
  include/linux/sysctl.h   |  13 ++-
  kernel/hung_task.c   |  77 +++-
  kernel/sysctl.c  | 214 +++
  kernel/watchdog.c| 101 
  6 files changed, 224 insertions(+), 197 deletions(-)


Here's what we presently have happening in linux-next's kernel/sysctl.c:

  sysctl.c | 3109 
++-
  1 file changed, 1521 insertions(+), 1588 deletions(-)


So this is not a good time for your patch!

Can I suggest that you set the idea aside and take a look after 5.8-rc1
is released?



ok, I will make v5 patch based on 5.8-rc1 after 5.8-rc1 is released,
And add more sysctl table cleanup.

Thanks
Xiaoming Ni




Re: [PATCH v4 2/4] sysctl: Move some boundary constants form sysctl.c to sysctl_vals

2020-05-19 Thread Xiaoming Ni

On 2020/5/19 12:44, Tetsuo Handa wrote:

On 2020/05/19 12:31, Xiaoming Ni wrote:

Some boundary (.extra1 .extra2) constants (E.g: neg_one two) in
sysctl.c are used in multiple features. Move these variables to
sysctl_vals to avoid adding duplicate variables when cleaning up
sysctls table.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 


I feel that it is use of

void *extra1;
void *extra2;

in "struct ctl_table" that requires constant values indirection.
Can't we get rid of sysctl_vals using some "union" like below?

struct ctl_table {
const char *procname;   /* Text ID for /proc/sys, or zero */
void *data;
int maxlen;
umode_t mode;
struct ctl_table *child;/* Deprecated */
proc_handler *proc_handler; /* Callback for text formatting */
struct ctl_table_poll *poll;
union {
void *min_max_ptr[2];
int min_max_int[2];
long min_max_long[2];
};
} __randomize_layout;

.



net/decnet/dn_dev.c:
static void dn_dev_sysctl_register(struct net_device *dev, struct 
dn_dev_parms *parms)

{
struct dn_dev_sysctl_table *t;
int i;

char path[sizeof("net/decnet/conf/") + IFNAMSIZ];

t = kmemdup(_dev_sysctl, sizeof(*t), GFP_KERNEL);
if (t == NULL)
return;

for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
long offset = (long)t->dn_dev_vars[i].data;
t->dn_dev_vars[i].data = ((char *)parms) + offset;
}

snprintf(path, sizeof(path), "net/decnet/conf/%s",
dev? dev->name : parms->name);

t->dn_dev_vars[0].extra1 = (void *)dev;

t->sysctl_header = register_net_sysctl(_net, path, t->dn_dev_vars);
if (t->sysctl_header == NULL)
kfree(t);
else
parms->sysctl = t;
}

A small amount of code is not used as a boundary value when using 
extra1. This scenario may not be suitable for renaming to min_max_ptr.


Should we add const to extra1 extra2 ?

--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -124,8 +124,8 @@ struct ctl_table {
struct ctl_table *child;/* Deprecated */
proc_handler *proc_handler; /* Callback for text formatting */
struct ctl_table_poll *poll;
-   void *extra1;
-   void *extra2;
+   const void *extra1;
+   const void *extra2;
 } __randomize_layout;


Thanks
Xiaoming Ni









[PATCH v4 0/4] cleaning up the sysctls table (hung_task watchdog)

2020-05-18 Thread Xiaoming Ni
Kernel/sysctl.c contains more than 190 interface files, and there are a 
large number of config macro controls. When modifying the sysctl 
interface directly in kernel/sysctl.c, conflicts are very easy to occur.
E.g: 
https://lore.kernel.org/lkml/99095805-8cbe-d140-e2f1-0c5a3e84d...@huawei.com/

Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

So consider cleaning up the sysctls table, details are in:
   https://kernelnewbies.org/KernelProjects/proc
   https://lore.kernel.org/lkml/20200513141421.gp11...@42.do-not-panic.com/#t

The current patch set extracts register_sysctl_init and some sysctl_vals
variables, and clears the interface of hung_task and watchdog in sysctl.c.

The current patch set is based on linux-next, commit 72bc15d0018ebfbc9
("Add linux-next specific files for 20200518").

changes in v4:
  Handle the conflict with the commit d4ee116819ed714f ("kernel/hung_task.c:
  introduce sysctl to print all traces when a hung task is detected"),
  move the sysctl interface hung_task_all_cpu_backtrace to hung_task.c.

V3: 
https://lore.kernel.org/lkml/1589774397-42485-1-git-send-email-nixiaom...@huawei.com/
  base on commit b9bbe6ed63b2b9 ("Linux 5.7-rc6")
changes in v3:
  1. make hung_task_timeout_max to be const
  2. fix build warning:
 kernel/watchdog.c:779:14: warning: initialization discards 'const'
 qualifier from pointer target type [-Wdiscarded-qualifiers]
 .extra2  = ,
^

V2: 
https://lore.kernel.org/lkml/1589619315-65827-1-git-send-email-nixiaom...@huawei.com/
changes in v2:
  1. Adjusted the order of patches, first do public function
 extraction, then do feature code movement
  2. Move hung_task sysctl to hung_task.c instead of adding new file
  3. Extract multiple common variables instead of only neg_one, and keep
 the order of member values in sysctl_vals
  4. Add const modification to the variable sixty in watchdog sysctl

V1: 
https://lore.kernel.org/lkml/1589517224-123928-1-git-send-email-nixiaom...@huawei.com/

Xiaoming Ni (4):
  sysctl: Add register_sysctl_init() interface
  sysctl: Move some boundary constants form sysctl.c to sysctl_vals
  hung_task: Move hung_task sysctl interface to hung_task.c
  watchdog: move watchdog sysctl interface to watchdog.c

 fs/proc/proc_sysctl.c|   2 +-
 include/linux/sched/sysctl.h |  14 +--
 include/linux/sysctl.h   |  13 ++-
 kernel/hung_task.c   |  77 +++-
 kernel/sysctl.c  | 214 +++
 kernel/watchdog.c| 101 
 6 files changed, 224 insertions(+), 197 deletions(-)

-- 
1.8.5.6


[PATCH v4 1/4] sysctl: Add register_sysctl_init() interface

2020-05-18 Thread Xiaoming Ni
In order to eliminate the duplicate code for registering the sysctl
interface during the initialization of each feature, add the
register_sysctl_init() interface

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 19 +++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 50bb7f3..857ba93 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -197,6 +197,8 @@ struct ctl_table_header *register_sysctl_paths(const struct 
ctl_path *path,
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
+extern void register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name);
 void do_sysctl_args(void);
 
 extern int pwrsw_enabled;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc1fcba..8afd713 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -3358,6 +3358,25 @@ int __init sysctl_init(void)
kmemleak_not_leak(hdr);
return 0;
 }
+
+/*
+ * The sysctl interface is used to modify the interface value,
+ * but the feature interface has default values. Even if register_sysctl fails,
+ * the feature body function can also run. At the same time, malloc small
+ * fragment of memory during the system initialization phase, almost does
+ * not fail. Therefore, the function return is designed as void
+ */
+void __init register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name)
+{
+   struct ctl_table_header *hdr = register_sysctl(path, table);
+
+   if (unlikely(!hdr)) {
+   pr_err("failed when register_sysctl %s to %s\n", table_name, 
path);
+   return;
+   }
+   kmemleak_not_leak(hdr);
+}
 #endif /* CONFIG_SYSCTL */
 /*
  * No sense putting this after each symbol definition, twice,
-- 
1.8.5.6



[PATCH v4 2/4] sysctl: Move some boundary constants form sysctl.c to sysctl_vals

2020-05-18 Thread Xiaoming Ni
Some boundary (.extra1 .extra2) constants (E.g: neg_one two) in
sysctl.c are used in multiple features. Move these variables to
sysctl_vals to avoid adding duplicate variables when cleaning up
sysctls table.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 fs/proc/proc_sysctl.c  |  2 +-
 include/linux/sysctl.h | 11 ---
 kernel/sysctl.c| 39 +--
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5b405f3..3d65e7d 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -24,7 +24,7 @@
 static const struct inode_operations proc_sys_dir_operations;
 
 /* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 1000, INT_MAX };
 EXPORT_SYMBOL(sysctl_vals);
 
 /* Support for permanently empty directories */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 857ba93..97586ee 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,9 +38,14 @@
 struct ctl_dir;
 
 /* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO((void *)_vals[0])
-#define SYSCTL_ONE ((void *)_vals[1])
-#define SYSCTL_INT_MAX ((void *)_vals[2])
+#define SYSCTL_NEG_ONE ((void *)_vals[0])
+#define SYSCTL_ZERO((void *)_vals[1])
+#define SYSCTL_ONE ((void *)_vals[2])
+#define SYSCTL_TWO ((void *)_vals[3])
+#define SYSCTL_FOUR((void *)_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)_vals[5])
+#define SYSCTL_ONE_THOUSAND((void *)_vals[6])
+#define SYSCTL_INT_MAX ((void *)_vals[7])
 
 extern const int sysctl_vals[];
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8afd713..38469bf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -111,14 +111,9 @@
 static int sixty = 60;
 #endif
 
-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
 static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 1;
 #endif
@@ -1885,7 +1880,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
-   .extra1 = _one,
+   .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
 #endif
@@ -2227,7 +,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax_sysadmin,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
 #endif
{
@@ -2487,7 +2482,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
-   .extra1 = _one,
+   .extra1 = SYSCTL_NEG_ONE,
},
 #endif
 #ifdef CONFIG_RT_MUTEXES
@@ -2549,7 +2544,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.mode   = 0644,
.proc_handler   = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _hundred,
+   .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname   = "perf_event_max_stack",
@@ -2567,7 +2562,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.mode   = 0644,
.proc_handler   = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _thousand,
+   .extra2 = SYSCTL_ONE_THOUSAND,
},
 #endif
{
@@ -2642,7 +2637,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = "panic_on_oom",
@@ -2651,7 +2646,7 @@ int proc_do_static_key(struct ctl_table *table, int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = "oom_kill_allocating_task",
@@ -2696,7 +2691,7 @@ int proc_do_static_key(struct ctl_table *table, int write,

[PATCH v4 4/4] watchdog: move watchdog sysctl interface to watchdog.c

2020-05-18 Thread Xiaoming Ni
Move watchdog syscl interface to watchdog.c.
Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 kernel/sysctl.c   |  96 ---
 kernel/watchdog.c | 101 ++
 2 files changed, 101 insertions(+), 96 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b7fd4e6..88235fa 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -100,16 +100,10 @@
 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
 #include 
 #endif
-#ifdef CONFIG_LOCKUP_DETECTOR
-#include 
-#endif
 
 #if defined(CONFIG_SYSCTL)
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_LOCKUP_DETECTOR
-static int sixty = 60;
-#endif
 
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
@@ -2231,96 +2225,6 @@ int proc_do_static_key(struct ctl_table *table, int 
write,
.mode   = 0444,
.proc_handler   = proc_dointvec,
},
-#if defined(CONFIG_LOCKUP_DETECTOR)
-   {
-   .procname   = "watchdog",
-   .data   = _user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "watchdog_thresh",
-   .data   = _thresh,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog_thresh,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = ,
-   },
-   {
-   .procname   = "nmi_watchdog",
-   .data   = _watchdog_user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = NMI_WATCHDOG_SYSCTL_PERM,
-   .proc_handler   = proc_nmi_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "watchdog_cpumask",
-   .data   = _cpumask_bits,
-   .maxlen = NR_CPUS,
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog_cpumask,
-   },
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-   {
-   .procname   = "soft_watchdog",
-   .data   = _watchdog_user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_soft_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "softlockup_panic",
-   .data   = _panic,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#ifdef CONFIG_SMP
-   {
-   .procname   = "softlockup_all_cpu_backtrace",
-   .data   = _softlockup_all_cpu_backtrace,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-   {
-   .procname   = "hardlockup_panic",
-   .data   = _panic,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#ifdef CONFIG_SMP
-   {
-   .procname   = "hardlockup_all_cpu_backtrace",
-   .data   = _hardlockup_all_cpu_backtrace,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname   = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 1b93953..b000326 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -758,6 +758,106 @@ int proc_watchdog_cpumask(struct ctl_table *table, int 
write,
mutex_unlock

[PATCH v4 3/4] hung_task: Move hung_task sysctl interface to hung_task.c

2020-05-18 Thread Xiaoming Ni
Move hung_task sysctl interface to hung_task.c.
Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 include/linux/sched/sysctl.h | 14 +---
 kernel/hung_task.c   | 77 +++-
 kernel/sysctl.c  | 62 ---
 3 files changed, 77 insertions(+), 76 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 660ac49..fcd397a8 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,20 +7,8 @@
 struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
-
-#ifdef CONFIG_SMP
-extern unsigned int sysctl_hung_task_all_cpu_backtrace;
-#else
-#define sysctl_hung_task_all_cpu_backtrace 0
-#endif /* CONFIG_SMP */
-
-extern int  sysctl_hung_task_check_count;
-extern unsigned int  sysctl_hung_task_panic;
+/* used for hung_task and block/ */
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-   void *buffer, size_t *lenp, loff_t *ppos);
 #else
 /* Avoid need for ifdefs elsewhere in the code */
 enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index a672db8..d67df599 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -63,6 +63,9 @@
  * Defaults to 0, can be changed via sysctl.
  */
 unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace;
+#else
+/* Avoid need for ifdefs elsewhere in the code */
+enum { sysctl_hung_task_timeout_secs = 0 };
 #endif /* CONFIG_SMP */
 
 /*
@@ -265,10 +268,11 @@ static long hung_timeout_jiffies(unsigned long 
last_checked,
MAX_SCHEDULE_TIMEOUT;
 }
 
+#ifdef CONFIG_SYSCTL
 /*
  * Process updating of timeout sysctl
  */
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
  void __user *buffer,
  size_t *lenp, loff_t *ppos)
 {
@@ -285,6 +289,76 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, 
int write,
return ret;
 }
 
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static struct ctl_table hung_task_sysctls[] = {
+#ifdef CONFIG_SMP
+   {
+   .procname   = "hung_task_all_cpu_backtrace",
+   .data   = _hung_task_all_cpu_backtrace,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   .extra2 = SYSCTL_ONE,
+   },
+#endif /* CONFIG_SMP */
+   {
+   .procname   = "hung_task_panic",
+   .data   = _hung_task_panic,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   .extra2 = SYSCTL_ONE,
+   },
+   {
+   .procname   = "hung_task_check_count",
+   .data   = _hung_task_check_count,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   },
+   {
+   .procname   = "hung_task_timeout_secs",
+   .data   = _hung_task_timeout_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = (void *)_task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_check_interval_secs",
+   .data   = _hung_task_check_interval_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = (void *)_task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_warnings",
+   .data   = _hung_task_warnings,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_NEG_ONE,
+   },
+   {}
+};
+
+static void __init hung_task_sysctl_init(void)
+{
+   register_sysctl_init("ker

Re: [PATCH v3 0/4] cleaning up the sysctls table (hung_task watchdog)

2020-05-18 Thread Xiaoming Ni

On 2020/5/19 1:16, Luis Chamberlain wrote:

On Mon, May 18, 2020 at 11:59:53AM +0800, Xiaoming Ni wrote:

Kernel/sysctl.c contains more than 190 interface files, and there are a
large number of config macro controls. When modifying the sysctl
interface directly in kernel/sysctl.c, conflicts are very easy to occur.
E.g: https://lkml.org/lkml/2020/5/10/413.


FWIW un the future please avoid using lkmk.org and instead use
https://lkml.kernel.org/r/ for references.


Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

So consider cleaning up the sysctls table, details are in:
https://kernelnewbies.org/KernelProjects/proc
https://lkml.org/lkml/2020/5/13/990

The current patch set extracts register_sysctl_init and some sysctl_vals
variables, and clears the interface of hung_task and watchdog in sysctl.c.

The current patch set is based on commit b9bbe6ed63b2b9 ("Linux 5.7-rc6"),
which conflicts with the latest branch of linux-next:
9b4caf6941fc41d ("kernel / hung_task.c: introduce sysctl to print
all traces when a hung task is detected")

Should I modify to make patch based on the "linux-next" branch to avoid
conflicts, or other branches?


If you can do that, that would be appreciated. I have a sysctl fs cleanup
stuff, so I can take your patches, and put my work ont op of yours and
then send this to Andrew once done.

   Luis

Ok, I will redo the v4 version based on the linux-next branch as soon as 
possible


I want to continue to participate in the subsequent sysctl cleanup, how 
to push the subsequent cleanup patch to your series, and minimize conflict


Thanks
Xiaoming Ni




[PATCH v3 4/4] watchdog: move watchdog sysctl interface to watchdog.c

2020-05-17 Thread Xiaoming Ni
Move watchdog syscl interface to watchdog.c.
Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 kernel/sysctl.c   |  96 ---
 kernel/watchdog.c | 101 ++
 2 files changed, 101 insertions(+), 96 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 90e3ea8..cd85a1c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -97,9 +97,6 @@
 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
 #include 
 #endif
-#ifdef CONFIG_LOCKUP_DETECTOR
-#include 
-#endif
 
 #if defined(CONFIG_SYSCTL)
 
@@ -120,9 +117,6 @@
 #endif
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_LOCKUP_DETECTOR
-static int sixty = 60;
-#endif
 
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
@@ -883,96 +877,6 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0444,
.proc_handler   = proc_dointvec,
},
-#if defined(CONFIG_LOCKUP_DETECTOR)
-   {
-   .procname   = "watchdog",
-   .data   = _user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "watchdog_thresh",
-   .data   = _thresh,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog_thresh,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = ,
-   },
-   {
-   .procname   = "nmi_watchdog",
-   .data   = _watchdog_user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = NMI_WATCHDOG_SYSCTL_PERM,
-   .proc_handler   = proc_nmi_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "watchdog_cpumask",
-   .data   = _cpumask_bits,
-   .maxlen = NR_CPUS,
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog_cpumask,
-   },
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-   {
-   .procname   = "soft_watchdog",
-   .data   = _watchdog_user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_soft_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "softlockup_panic",
-   .data   = _panic,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#ifdef CONFIG_SMP
-   {
-   .procname   = "softlockup_all_cpu_backtrace",
-   .data   = _softlockup_all_cpu_backtrace,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-   {
-   .procname   = "hardlockup_panic",
-   .data   = _panic,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#ifdef CONFIG_SMP
-   {
-   .procname   = "hardlockup_all_cpu_backtrace",
-   .data   = _hardlockup_all_cpu_backtrace,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname   = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b6b1f54..8c833b6 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -756,6 +756,106 @@ int proc_watchdog_cpumask(struct ctl_table *table, int 
write,
mutex_unlock

[PATCH v3 0/4] cleaning up the sysctls table (hung_task watchdog)

2020-05-17 Thread Xiaoming Ni
Kernel/sysctl.c contains more than 190 interface files, and there are a 
large number of config macro controls. When modifying the sysctl 
interface directly in kernel/sysctl.c, conflicts are very easy to occur.
E.g: https://lkml.org/lkml/2020/5/10/413.

Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

So consider cleaning up the sysctls table, details are in:
https://kernelnewbies.org/KernelProjects/proc
https://lkml.org/lkml/2020/5/13/990

The current patch set extracts register_sysctl_init and some sysctl_vals
variables, and clears the interface of hung_task and watchdog in sysctl.c.

The current patch set is based on commit b9bbe6ed63b2b9 ("Linux 5.7-rc6"),
which conflicts with the latest branch of linux-next:
9b4caf6941fc41d ("kernel / hung_task.c: introduce sysctl to print
all traces when a hung task is detected")

Should I modify to make patch based on the "linux-next" branch to avoid
conflicts, or other branches?

changes in v3:
  1. make hung_task_timeout_max to be const
  2. fix build warning:
 kernel/watchdog.c:779:14: warning: initialization discards 'const'
 qualifier from pointer target type [-Wdiscarded-qualifiers]
 .extra2  = ,
^

V2: https://lkml.org/lkml/2020/5/16/81
changes in v2:
  1. Adjusted the order of patches, first do public function
 extraction, then do feature code movement
  2. Move hung_task sysctl to hung_task.c instead of adding new file
  3. Extract multiple common variables instead of only neg_one, and keep
 the order of member values in sysctl_vals
  4. Add const modification to the variable sixty in watchdog sysctl

V1: https://lkml.org/lkml/2020/5/15/17

Xiaoming Ni (4):
  sysctl: Add register_sysctl_init() interface
  sysctl: Move some boundary constants form sysctl.c to sysctl_vals
  hung_task: Move hung_task sysctl interface to hung_task.c
  watchdog: move watchdog sysctl interface to watchdog.c

 fs/proc/proc_sysctl.c|   2 +-
 include/linux/sched/sysctl.h |   8 +-
 include/linux/sysctl.h   |  13 ++-
 kernel/hung_task.c   |  63 +-
 kernel/sysctl.c  | 202 ---
 kernel/watchdog.c| 101 ++
 6 files changed, 210 insertions(+), 179 deletions(-)

-- 
1.8.5.6


[PATCH v3 3/4] hung_task: Move hung_task sysctl interface to hung_task.c

2020-05-17 Thread Xiaoming Ni
Move hung_task sysctl interface to hung_task.c.
Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 include/linux/sched/sysctl.h |  8 +-
 kernel/hung_task.c   | 63 +++-
 kernel/sysctl.c  | 50 ---
 3 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215..c075e09 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,14 +7,8 @@
 struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
-extern int  sysctl_hung_task_check_count;
-extern unsigned int  sysctl_hung_task_panic;
+/* used for hung_task and block/ */
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-void __user *buffer,
-size_t *lenp, loff_t *ppos);
 #else
 /* Avoid need for ifdefs elsewhere in the code */
 enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c..d010c96 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -215,10 +215,11 @@ static long hung_timeout_jiffies(unsigned long 
last_checked,
MAX_SCHEDULE_TIMEOUT;
 }
 
+#ifdef CONFIG_SYSCTL
 /*
  * Process updating of timeout sysctl
  */
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
  void __user *buffer,
  size_t *lenp, loff_t *ppos)
 {
@@ -235,6 +236,65 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, 
int write,
return ret;
 }
 
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static struct ctl_table hung_task_sysctls[] = {
+   {
+   .procname   = "hung_task_panic",
+   .data   = _hung_task_panic,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   .extra2 = SYSCTL_ONE,
+   },
+   {
+   .procname   = "hung_task_check_count",
+   .data   = _hung_task_check_count,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   },
+   {
+   .procname   = "hung_task_timeout_secs",
+   .data   = _hung_task_timeout_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = (void *)_task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_check_interval_secs",
+   .data   = _hung_task_check_interval_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = (void *)_task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_warnings",
+   .data   = _hung_task_warnings,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_NEG_ONE,
+   },
+   {}
+};
+
+static void __init hung_task_sysctl_init(void)
+{
+   register_sysctl_init("kernel", hung_task_sysctls, "hung_task_sysctls");
+}
+#else
+#define hung_task_sysctl_init() do { } while (0)
+#endif
+
+
 static atomic_t reset_hung_task = ATOMIC_INIT(0);
 
 void reset_hung_task_detector(void)
@@ -304,6 +364,7 @@ static int __init hung_task_init(void)
pm_notifier(hungtask_pm_notify, 0);
 
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+   hung_task_sysctl_init();
 
return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 76c3237..90e3ea8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -144,13 +144,6 @@
 static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
 
-/*
- * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
- * and hung_tas

[PATCH v3 1/4] sysctl: Add register_sysctl_init() interface

2020-05-17 Thread Xiaoming Ni
In order to eliminate the duplicate code for registering the sysctl
interface during the initialization of each feature, add the
register_sysctl_init() interface

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 19 +++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 02fa844..43f8ef9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -206,6 +206,8 @@ struct ctl_table_header *register_sysctl_paths(const struct 
ctl_path *path,
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
+extern void register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name);
 
 extern struct ctl_table sysctl_mount_point[];
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8..c96122f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1968,6 +1968,25 @@ int __init sysctl_init(void)
return 0;
 }
 
+/*
+ * The sysctl interface is used to modify the interface value,
+ * but the feature interface has default values. Even if register_sysctl fails,
+ * the feature body function can also run. At the same time, malloc small
+ * fragment of memory during the system initialization phase, almost does
+ * not fail. Therefore, the function return is designed as void
+ */
+void __init register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name)
+{
+   struct ctl_table_header *hdr = register_sysctl(path, table);
+
+   if (unlikely(!hdr)) {
+   pr_err("failed when register_sysctl %s to %s\n", table_name, 
path);
+   return;
+   }
+   kmemleak_not_leak(hdr);
+}
+
 #endif /* CONFIG_SYSCTL */
 
 /*
-- 
1.8.5.6



[PATCH v3 2/4] sysctl: Move some boundary constants form sysctl.c to sysctl_vals

2020-05-17 Thread Xiaoming Ni
Some boundary (.extra1 .extra2) constants (E.g: neg_one two) in
sysctl.c are used in multiple features. Move these variables to
sysctl_vals to avoid adding duplicate variables when cleaning up
sysctls table.

Signed-off-by: Xiaoming Ni 
Reviewed-by: Kees Cook 
---
 fs/proc/proc_sysctl.c  |  2 +-
 include/linux/sysctl.h | 11 ---
 kernel/sysctl.c| 37 -
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b6f5d45..3f77e64 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,7 +23,7 @@
 static const struct inode_operations proc_sys_dir_operations;
 
 /* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 1000, INT_MAX };
 EXPORT_SYMBOL(sysctl_vals);
 
 /* Support for permanently empty directories */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 43f8ef9..bf97c30 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,9 +38,14 @@
 struct ctl_dir;
 
 /* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO((void *)_vals[0])
-#define SYSCTL_ONE ((void *)_vals[1])
-#define SYSCTL_INT_MAX ((void *)_vals[2])
+#define SYSCTL_NEG_ONE ((void *)_vals[0])
+#define SYSCTL_ZERO((void *)_vals[1])
+#define SYSCTL_ONE ((void *)_vals[2])
+#define SYSCTL_TWO ((void *)_vals[3])
+#define SYSCTL_FOUR((void *)_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)_vals[5])
+#define SYSCTL_ONE_THOUSAND((void *)_vals[6])
+#define SYSCTL_INT_MAX ((void *)_vals[7])
 
 extern const int sysctl_vals[];
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c96122f..76c3237 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -124,14 +124,9 @@
 static int sixty = 60;
 #endif
 
-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
 static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 1;
 #endif
@@ -547,7 +542,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
-   .extra1 = _one,
+   .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
 #endif
@@ -878,7 +873,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax_sysadmin,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
 #endif
{
@@ -1187,7 +1182,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _hundred,
+   .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname   = "perf_event_max_stack",
@@ -1205,7 +1200,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _thousand,
+   .extra2 = SYSCTL_ONE_THOUSAND,
},
 #endif
{
@@ -1280,7 +1275,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = "panic_on_oom",
@@ -1289,7 +1284,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = "oom_kill_allocating_task",
@@ -1334,7 +1329,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = dirty_background_ratio_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _hundred,
+   .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname   = "dirty_background_b

Re: [PATCH v2 3/4] hung_task: Move hung_task sysctl interface to hung_task.c

2020-05-16 Thread Xiaoming Ni

On 2020/5/17 10:43, Kees Cook wrote:

On Sat, May 16, 2020 at 04:55:14PM +0800, Xiaoming Ni wrote:

+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);


Please make this const. With that done, yes, looks great!

Reviewed-by: Kees Cook 



Thank you for your guidance, I will fix it in v3

In addition, I am a bit confused about the patch submission, and hope to 
get everyone's answer.
  I made this patch based on the master branch. But as in conflict at 
https://lkml.org/lkml/2020/5/10/413, my patch will inevitably conflict. 
Should I modify to make patch based on "linux-next" branch to avoid 
conflict, or other branches?


Thanks
Xiaoming Ni



[PATCH v2 1/4] sysctl: Add register_sysctl_init() interface

2020-05-16 Thread Xiaoming Ni
In order to eliminate the duplicate code for registering the sysctl
interface during the initialization of each feature, add the
register_sysctl_init() interface

Signed-off-by: Xiaoming Ni 
---
 include/linux/sysctl.h |  2 ++
 kernel/sysctl.c| 19 +++
 2 files changed, 21 insertions(+)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 02fa844..43f8ef9 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -206,6 +206,8 @@ struct ctl_table_header *register_sysctl_paths(const struct 
ctl_path *path,
 void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
+extern void register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name);
 
 extern struct ctl_table sysctl_mount_point[];
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b9ff323..20c31f5 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1970,6 +1970,25 @@ int __init sysctl_init(void)
return 0;
 }
 
+/*
+ * The sysctl interface is used to modify the interface value,
+ * but the feature interface has default values. Even if register_sysctl fails,
+ * the feature body function can also run. At the same time, malloc small
+ * fragment of memory during the system initialization phase, almost does
+ * not fail. Therefore, the function return is designed as void
+ */
+void __init register_sysctl_init(const char *path, struct ctl_table *table,
+const char *table_name)
+{
+   struct ctl_table_header *hdr = register_sysctl(path, table);
+
+   if (unlikely(!hdr)) {
+   pr_err("failed when register_sysctl %s to %s\n", table_name, 
path);
+   return;
+   }
+   kmemleak_not_leak(hdr);
+}
+
 #endif /* CONFIG_SYSCTL */
 
 /*
-- 
1.8.5.6



[PATCH v2 0/4] cleaning up the sysctls table (hung_task watchdog)

2020-05-16 Thread Xiaoming Ni
Kernel/sysctl.c contains more than 190 interface files, and there are a 
large number of config macro controls. When modifying the sysctl 
interface directly in kernel/sysctl.c, conflicts are very easy to occur.
E.g: https://lkml.org/lkml/2020/5/10/413.

Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

So consider cleaning up the sysctls table, details are in:
https://kernelnewbies.org/KernelProjects/proc
https://lkml.org/lkml/2020/5/13/990

The current patch set extracts register_sysctl_init and some sysctl_vals
variables, and clears the interface of hung_task and watchdog in sysctl.c.

changes in v2:
  1. Adjusted the order of patches, first do public function
 extraction, then do feature code movement
  2. Move hung_task sysctl to hung_task.c instead of adding new file
  3. Extract multiple common variables instead of only neg_one, and keep
 the order of member values in sysctl_vals
  4. Add const modification to the variable sixty in watchdog sysctl

V1: https://lkml.org/lkml/2020/5/15/17

Xiaoming Ni (4):
  sysctl: Add register_sysctl_init() interface
  sysctl: Move some boundary constants form sysctl.c to sysctl_vals
  hung_task: Move hung_task sysctl interface to hung_task.c
  watchdog: move watchdog sysctl interface to watchdog.c

 fs/proc/proc_sysctl.c|   2 +-
 include/linux/sched/sysctl.h |   8 +-
 include/linux/sysctl.h   |  13 ++-
 kernel/hung_task.c   |  63 +-
 kernel/sysctl.c  | 202 ---
 kernel/watchdog.c| 101 ++
 6 files changed, 210 insertions(+), 179 deletions(-)

-- 
1.8.5.6



[PATCH v2 3/4] hung_task: Move hung_task sysctl interface to hung_task.c

2020-05-16 Thread Xiaoming Ni
Move hung_task sysctl interface to hung_task.c.
Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

Signed-off-by: Xiaoming Ni 
---
 include/linux/sched/sysctl.h |  8 +-
 kernel/hung_task.c   | 63 +++-
 kernel/sysctl.c  | 50 ---
 3 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215..c075e09 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -7,14 +7,8 @@
 struct ctl_table;
 
 #ifdef CONFIG_DETECT_HUNG_TASK
-extern int  sysctl_hung_task_check_count;
-extern unsigned int  sysctl_hung_task_panic;
+/* used for hung_task and block/ */
 extern unsigned long sysctl_hung_task_timeout_secs;
-extern unsigned long sysctl_hung_task_check_interval_secs;
-extern int sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-void __user *buffer,
-size_t *lenp, loff_t *ppos);
 #else
 /* Avoid need for ifdefs elsewhere in the code */
 enum { sysctl_hung_task_timeout_secs = 0 };
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 14a625c..76ea496 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -215,10 +215,11 @@ static long hung_timeout_jiffies(unsigned long 
last_checked,
MAX_SCHEDULE_TIMEOUT;
 }
 
+#ifdef CONFIG_SYSCTL
 /*
  * Process updating of timeout sysctl
  */
-int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
  void __user *buffer,
  size_t *lenp, loff_t *ppos)
 {
@@ -235,6 +236,65 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, 
int write,
return ret;
 }
 
+/*
+ * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
+ * and hung_task_check_interval_secs
+ */
+static unsigned long hung_task_timeout_max = (LONG_MAX / HZ);
+static struct ctl_table hung_task_sysctls[] = {
+   {
+   .procname   = "hung_task_panic",
+   .data   = _hung_task_panic,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   .extra2 = SYSCTL_ONE,
+   },
+   {
+   .procname   = "hung_task_check_count",
+   .data   = _hung_task_check_count,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_ZERO,
+   },
+   {
+   .procname   = "hung_task_timeout_secs",
+   .data   = _hung_task_timeout_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = _task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_check_interval_secs",
+   .data   = _hung_task_check_interval_secs,
+   .maxlen = sizeof(unsigned long),
+   .mode   = 0644,
+   .proc_handler   = proc_dohung_task_timeout_secs,
+   .extra2 = _task_timeout_max,
+   },
+   {
+   .procname   = "hung_task_warnings",
+   .data   = _hung_task_warnings,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = SYSCTL_NEG_ONE,
+   },
+   {}
+};
+
+static void __init hung_task_sysctl_init(void)
+{
+   register_sysctl_init("kernel", hung_task_sysctls, "hung_task_sysctls");
+}
+#else
+#define hung_task_sysctl_init() do { } while (0)
+#endif
+
+
 static atomic_t reset_hung_task = ATOMIC_INIT(0);
 
 void reset_hung_task_detector(void)
@@ -304,6 +364,7 @@ static int __init hung_task_init(void)
pm_notifier(hungtask_pm_notify, 0);
 
watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
+   hung_task_sysctl_init();
 
return 0;
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5f08cc2..49e9541 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -144,13 +144,6 @@
 static int ngroups_max = NGROUPS_MAX;
 static const int cap_last_cap = CAP_LAST_CAP;
 
-/*
- * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
- * and hung_task_check_interval_secs
- */
-#ifdef CONFIG_DETEC

[PATCH v2 4/4] watchdog: move watchdog sysctl interface to watchdog.c

2020-05-16 Thread Xiaoming Ni
Move watchdog syscl interface to watchdog.c.
Use register_sysctl() to register the sysctl interface to avoid
merge conflicts when different features modify sysctl.c at the same time.

Signed-off-by: Xiaoming Ni 
---
 kernel/sysctl.c   |  96 ---
 kernel/watchdog.c | 101 ++
 2 files changed, 101 insertions(+), 96 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 49e9541..efe6172 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -97,9 +97,6 @@
 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
 #include 
 #endif
-#ifdef CONFIG_LOCKUP_DETECTOR
-#include 
-#endif
 
 #if defined(CONFIG_SYSCTL)
 
@@ -120,9 +117,6 @@
 #endif
 
 /* Constants used for minimum and  maximum */
-#ifdef CONFIG_LOCKUP_DETECTOR
-static int sixty = 60;
-#endif
 
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
@@ -883,96 +877,6 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0444,
.proc_handler   = proc_dointvec,
},
-#if defined(CONFIG_LOCKUP_DETECTOR)
-   {
-   .procname   = "watchdog",
-   .data   = _user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "watchdog_thresh",
-   .data   = _thresh,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog_thresh,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = ,
-   },
-   {
-   .procname   = "nmi_watchdog",
-   .data   = _watchdog_user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = NMI_WATCHDOG_SYSCTL_PERM,
-   .proc_handler   = proc_nmi_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "watchdog_cpumask",
-   .data   = _cpumask_bits,
-   .maxlen = NR_CPUS,
-   .mode   = 0644,
-   .proc_handler   = proc_watchdog_cpumask,
-   },
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-   {
-   .procname   = "soft_watchdog",
-   .data   = _watchdog_user_enabled,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_soft_watchdog,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-   {
-   .procname   = "softlockup_panic",
-   .data   = _panic,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#ifdef CONFIG_SMP
-   {
-   .procname   = "softlockup_all_cpu_backtrace",
-   .data   = _softlockup_all_cpu_backtrace,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-   {
-   .procname   = "hardlockup_panic",
-   .data   = _panic,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#ifdef CONFIG_SMP
-   {
-   .procname   = "hardlockup_all_cpu_backtrace",
-   .data   = _hardlockup_all_cpu_backtrace,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec_minmax,
-   .extra1 = SYSCTL_ZERO,
-   .extra2 = SYSCTL_ONE,
-   },
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname   = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b6b1f54..f5d19be 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -756,6 +756,106 @@ int proc_watchdog_cpumask(struct ctl_table *table, int 
write,
mutex_unlock

[PATCH v2 2/4] sysctl: Move some boundary constants form sysctl.c to sysctl_vals

2020-05-16 Thread Xiaoming Ni
Some boundary (.extra1 .extra2) constants (E.g: neg_one two) in
sysctl.c are used in multiple features. Move these variables to
sysctl_vals to avoid adding duplicate variables when cleaning up
sysctls table.

Signed-off-by: Xiaoming Ni 
---
 fs/proc/proc_sysctl.c  |  2 +-
 include/linux/sysctl.h | 11 ---
 kernel/sysctl.c| 37 -
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b6f5d45..3f77e64 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,7 +23,7 @@
 static const struct inode_operations proc_sys_dir_operations;
 
 /* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 1000, INT_MAX };
 EXPORT_SYMBOL(sysctl_vals);
 
 /* Support for permanently empty directories */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 43f8ef9..bf97c30 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,9 +38,14 @@
 struct ctl_dir;
 
 /* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO((void *)_vals[0])
-#define SYSCTL_ONE ((void *)_vals[1])
-#define SYSCTL_INT_MAX ((void *)_vals[2])
+#define SYSCTL_NEG_ONE ((void *)_vals[0])
+#define SYSCTL_ZERO((void *)_vals[1])
+#define SYSCTL_ONE ((void *)_vals[2])
+#define SYSCTL_TWO ((void *)_vals[3])
+#define SYSCTL_FOUR((void *)_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)_vals[5])
+#define SYSCTL_ONE_THOUSAND((void *)_vals[6])
+#define SYSCTL_INT_MAX ((void *)_vals[7])
 
 extern const int sysctl_vals[];
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 20c31f5..5f08cc2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -124,14 +124,9 @@
 static int sixty = 60;
 #endif
 
-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
 static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 1;
 #endif
@@ -547,7 +542,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
-   .extra1 = _one,
+   .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
 #endif
@@ -878,7 +873,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax_sysadmin,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
 #endif
{
@@ -1189,7 +1184,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _hundred,
+   .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname   = "perf_event_max_stack",
@@ -1207,7 +1202,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _thousand,
+   .extra2 = SYSCTL_ONE_THOUSAND,
},
 #endif
{
@@ -1282,7 +1277,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = "panic_on_oom",
@@ -1291,7 +1286,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = "oom_kill_allocating_task",
@@ -1336,7 +1331,7 @@ static int sysrq_sysctl_handler(struct ctl_table *table, 
int write,
.mode   = 0644,
.proc_handler   = dirty_background_ratio_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _hundred,
+   .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname   = "dirty_background_bytes

Re: [PATCH 2/4] proc/sysctl: add shared variables -1

2020-05-15 Thread Xiaoming Ni

On 2020/5/16 10:47, Kees Cook wrote:

On Sat, May 16, 2020 at 10:32:19AM +0800, Xiaoming Ni wrote:

On 2020/5/16 0:05, Kees Cook wrote:

On Fri, May 15, 2020 at 05:06:28PM +0800, Xiaoming Ni wrote:

On 2020/5/15 16:06, Kees Cook wrote:

On Fri, May 15, 2020 at 12:33:42PM +0800, Xiaoming Ni wrote:

Add the shared variable SYSCTL_NEG_ONE to replace the variable neg_one
used in both sysctl_writes_strict and hung_task_warnings.

Signed-off-by: Xiaoming Ni 
---
fs/proc/proc_sysctl.c | 2 +-
include/linux/sysctl.h| 1 +
kernel/hung_task_sysctl.c | 3 +--
kernel/sysctl.c   | 3 +--


How about doing this refactoring in advance of the extraction patch?

Before  advance of the extraction patch, neg_one is only used in one file,
does it seem to have no value for refactoring?


I guess it doesn't matter much, but I think it's easier to review in the
sense that neg_one is first extracted and then later everything else is
moved.


Later, when more features sysctl interface is moved to the code file, there
will be more variables that need to be extracted.
So should I only extract the neg_one variable here, or should I extract all
the variables used by multiple features?


Hmm -- if you're going to do a consolidation pass, then nevermind, I
don't think order will matter then.

Thank you for the cleanup! Sorry we're giving you back-and-forth advice!

-Kees



Sorry, I don't fully understand.
Does this mean that there is no need to adjust the patch order or the 
order of variables in sysctl_vals?

Should I extract only SYSCTL_NEG_ONE or should I extract all variables?

Thanks
Xiaoming Ni



Re: [PATCH 2/4] proc/sysctl: add shared variables -1

2020-05-15 Thread Xiaoming Ni

On 2020/5/16 0:05, Kees Cook wrote:

On Fri, May 15, 2020 at 05:06:28PM +0800, Xiaoming Ni wrote:

On 2020/5/15 16:06, Kees Cook wrote:

On Fri, May 15, 2020 at 12:33:42PM +0800, Xiaoming Ni wrote:

Add the shared variable SYSCTL_NEG_ONE to replace the variable neg_one
used in both sysctl_writes_strict and hung_task_warnings.

Signed-off-by: Xiaoming Ni 
---
   fs/proc/proc_sysctl.c | 2 +-
   include/linux/sysctl.h| 1 +
   kernel/hung_task_sysctl.c | 3 +--
   kernel/sysctl.c   | 3 +--


How about doing this refactoring in advance of the extraction patch?

Before  advance of the extraction patch, neg_one is only used in one file,
does it seem to have no value for refactoring?


I guess it doesn't matter much, but I think it's easier to review in the
sense that neg_one is first extracted and then later everything else is
moved.

Later, when more features sysctl interface is moved to the code file, 
there will be more variables that need to be extracted.
So should I only extract the neg_one variable here, or should I extract 
all the variables used by multiple features?


 fs/proc/proc_sysctl.c  |  2 +-
 include/linux/sysctl.h | 11 ---
 kernel/sysctl.c| 37 -
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b6f5d45..3f77e64 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,7 +23,7 @@
 static const struct inode_operations proc_sys_dir_operations;

 /* shared constants to be used in various sysctls */
-const int sysctl_vals[] = { 0, 1, INT_MAX };
+const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 1000, INT_MAX };
 EXPORT_SYMBOL(sysctl_vals);

 /* Support for permanently empty directories */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 43f8ef9..bf97c30 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -38,9 +38,14 @@
 struct ctl_dir;

 /* Keep the same order as in fs/proc/proc_sysctl.c */
-#define SYSCTL_ZERO((void *)_vals[0])
-#define SYSCTL_ONE ((void *)_vals[1])
-#define SYSCTL_INT_MAX ((void *)_vals[2])
+#define SYSCTL_NEG_ONE ((void *)_vals[0])
+#define SYSCTL_ZERO((void *)_vals[1])
+#define SYSCTL_ONE ((void *)_vals[2])
+#define SYSCTL_TWO ((void *)_vals[3])
+#define SYSCTL_FOUR((void *)_vals[4])
+#define SYSCTL_ONE_HUNDRED ((void *)_vals[5])
+#define SYSCTL_ONE_THOUSAND((void *)_vals[6])
+#define SYSCTL_INT_MAX ((void *)_vals[7])

 extern const int sysctl_vals[];

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5dd6d01..efe6172 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -118,14 +118,9 @@

 /* Constants used for minimum and  maximum */

-static int __maybe_unused neg_one = -1;
-static int __maybe_unused two = 2;
-static int __maybe_unused four = 4;
 static unsigned long zero_ul;
 static unsigned long one_ul = 1;
 static unsigned long long_max = LONG_MAX;
-static int one_hundred = 100;
-static int one_thousand = 1000;
 #ifdef CONFIG_PRINTK
 static int ten_thousand = 1;
 #endif
@@ -534,7 +529,7 @@ static int sysrq_sysctl_handler(struct ctl_table 
*table, int write,

.maxlen = sizeof(int),
.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
-   .extra1 = _one,
+   .extra1 = SYSCTL_NEG_ONE,
.extra2 = SYSCTL_ONE,
},
 #endif
@@ -865,7 +860,7 @@ static int sysrq_sysctl_handler(struct ctl_table 
*table, int write,

.mode   = 0644,
.proc_handler   = proc_dointvec_minmax_sysadmin,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
 #endif
{
@@ -1043,7 +1038,7 @@ static int sysrq_sysctl_handler(struct ctl_table 
*table, int write,

.mode   = 0644,
.proc_handler   = perf_cpu_time_max_percent_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _hundred,
+   .extra2 = SYSCTL_ONE_HUNDRED,
},
{
.procname   = "perf_event_max_stack",
@@ -1061,7 +1056,7 @@ static int sysrq_sysctl_handler(struct ctl_table 
*table, int write,

.mode   = 0644,
.proc_handler   = perf_event_max_stack_handler,
.extra1 = SYSCTL_ZERO,
-   .extra2 = _thousand,
+   .extra2 = SYSCTL_ONE_THOUSAND,
},
 #endif
{
@@ -1136,7 +1131,7 @@ static int sysrq_sysctl_handler(struct ctl_table 
*table, int write,

.mode   = 0644,
.proc_handler   = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
-   .extra2 = ,
+   .extra2 = SYSCTL_TWO,
},
{
.procname   = &qu

  1   2   >