date:20210323

[PATCH v2 1/9] staging: rtl8723bs: removed function prototypes in core/rtw_efuse.c

2021-03-23 Thread Fabio Aiuto

fix the following checkpatch issues:

WARNING: externs should be avoided in .c files
35: FILE: drivers/staging/rtl8723bs/core/rtw_efuse.c:35:
+bool

removed two function prototypes in core/rtw_efuse.c and
made definition static

Signed-off-by: Fabio Aiuto 
---
 drivers/staging/rtl8723bs/core/rtw_efuse.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_efuse.c 
b/drivers/staging/rtl8723bs/core/rtw_efuse.c
index 32ca10f01413..3701336e7ff6 100644
--- a/drivers/staging/rtl8723bs/core/rtw_efuse.c
+++ b/drivers/staging/rtl8723bs/core/rtw_efuse.c
@@ -32,12 +32,7 @@ u8 fakeBTEfuseModifiedMap[EFUSE_BT_MAX_MAP_LEN] = {0};
 #define REG_EFUSE_CTRL 0x0030
 #define EFUSE_CTRL REG_EFUSE_CTRL  /*  E-Fuse 
Control. */
 
-bool
-Efuse_Read1ByteFromFakeContent(
-   struct adapter *padapter,
-   u16 Offset,
-   u8 *Value);
-bool
+static bool
 Efuse_Read1ByteFromFakeContent(
struct adapter *padapter,
u16 Offset,
@@ -53,12 +48,7 @@ Efuse_Read1ByteFromFakeContent(
return true;
 }
 
-bool
-Efuse_Write1ByteToFakeContent(
-   struct adapter *padapter,
-   u16 Offset,
-   u8 Value);
-bool
+static bool
 Efuse_Write1ByteToFakeContent(
struct adapter *padapter,
u16 Offset,
-- 
2.20.1

[PATCH] drm/imx: fix out of bounds array access warning

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

When CONFIG_OF is disabled, building with 'make W=1' produces warnings
about out of bounds array access:

drivers/gpu/drm/imx/imx-ldb.c: In function 'imx_ldb_set_clock.constprop':
drivers/gpu/drm/imx/imx-ldb.c:186:8: error: array subscript -22 is below array 
bounds of 'struct clk *[4]' [-Werror=array-bounds]

Add an error check before the index is used, which helps with the
warning, as well as any possible other error condition that may be
triggered at runtime.

Signed-off-by: Arnd Bergmann 
---
 drivers/gpu/drm/imx/imx-ldb.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index dbfe39e2f7f6..1210360cec8a 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -197,6 +197,12 @@ static void imx_ldb_encoder_enable(struct drm_encoder 
*encoder)
int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
 
+   if (mux < 0) {
+   dev_warn(ldb->dev,
+"%s: invalid mux\n", __func__);
+   return;
+   }
+
drm_panel_prepare(imx_ldb_ch->panel);
 
if (dual) {
@@ -255,6 +261,12 @@ imx_ldb_encoder_atomic_mode_set(struct drm_encoder 
*encoder,
int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
u32 bus_format = imx_ldb_ch->bus_format;
 
+   if (mux < 0) {
+   dev_warn(ldb->dev,
+"%s: invalid mux\n", __func__);
+   return;
+   }
+
if (mode->clock > 17) {
dev_warn(ldb->dev,
 "%s: mode exceeds 170 MHz pixel clock\n", __func__);
-- 
2.29.2

[RFC PATCH v7 13/22] virtio/vsock: add SEQPACKET receive logic

2021-03-23 Thread Arseny Krasnov

This modifies current receive logic for SEQPACKET support:
1) Inserts 'SEQ_BEGIN' packet to socket's rx queue.
2) Inserts 'RW' packet to socket's rx queue, but without merging with
   buffer of last packet in queue.
3) Performs check for packet and socket types on receive(if mismatch,
   then reset connection).

Signed-off-by: Arseny Krasnov 
---
 v6 -> v7:
 In 'virtio_transport_recv_pkt()', 'sock_put()' is added, when type of
 received packet does not match to the type of socket.

 net/vmw_vsock/virtio_transport_common.c | 64 +
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 41f05034593e..bfe0d7026bf8 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -165,6 +165,14 @@ void virtio_transport_deliver_tap_pkt(struct 
virtio_vsock_pkt *pkt)
 }
 EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
 
+static u16 virtio_transport_get_type(struct sock *sk)
+{
+   if (sk->sk_type == SOCK_STREAM)
+   return VIRTIO_VSOCK_TYPE_STREAM;
+   else
+   return VIRTIO_VSOCK_TYPE_SEQPACKET;
+}
+
 /* This function can only be used on connecting/connected sockets,
  * since a socket assigned to a transport is required.
  *
@@ -1075,25 +1083,27 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
goto out;
}
 
-   /* Try to copy small packets into the buffer of last packet queued,
-* to avoid wasting memory queueing the entire buffer with a small
-* payload.
-*/
-   if (pkt->len <= GOOD_COPY_LEN && !list_empty(>rx_queue)) {
-   struct virtio_vsock_pkt *last_pkt;
+   if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM) {
+   /* Try to copy small packets into the buffer of last packet 
queued,
+* to avoid wasting memory queueing the entire buffer with a 
small
+* payload.
+*/
+   if (pkt->len <= GOOD_COPY_LEN && !list_empty(>rx_queue)) {
+   struct virtio_vsock_pkt *last_pkt;
 
-   last_pkt = list_last_entry(>rx_queue,
-  struct virtio_vsock_pkt, list);
+   last_pkt = list_last_entry(>rx_queue,
+  struct virtio_vsock_pkt, 
list);
 
-   /* If there is space in the last packet queued, we copy the
-* new packet in its buffer.
-*/
-   if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
-   memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
-  pkt->len);
-   last_pkt->len += pkt->len;
-   free_pkt = true;
-   goto out;
+   /* If there is space in the last packet queued, we copy 
the
+* new packet in its buffer.
+*/
+   if (pkt->len <= last_pkt->buf_len - last_pkt->len) {
+   memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
+  pkt->len);
+   last_pkt->len += pkt->len;
+   free_pkt = true;
+   goto out;
+   }
}
}
 
@@ -1113,9 +1123,13 @@ virtio_transport_recv_connected(struct sock *sk,
int err = 0;
 
switch (le16_to_cpu(pkt->hdr.op)) {
+   case VIRTIO_VSOCK_OP_SEQ_BEGIN:
+   case VIRTIO_VSOCK_OP_SEQ_END:
case VIRTIO_VSOCK_OP_RW:
virtio_transport_recv_enqueue(vsk, pkt);
-   sk->sk_data_ready(sk);
+
+   if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_SEQ_BEGIN)
+   sk->sk_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
sk->sk_write_space(sk);
@@ -1258,6 +1272,12 @@ virtio_transport_recv_listen(struct sock *sk, struct 
virtio_vsock_pkt *pkt,
return 0;
 }
 
+static bool virtio_transport_valid_type(u16 type)
+{
+   return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
+  (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
+}
+
 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
  * lock.
  */
@@ -1283,7 +1303,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
le32_to_cpu(pkt->hdr.buf_alloc),
le32_to_cpu(pkt->hdr.fwd_cnt));
 
-   if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) {
+   if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
(void)virtio_transport_reset_no_sock(t, pkt);
goto free_pkt;
}
@@ -1300,6 +1320,12 @@ void virtio_transport_recv_pkt(struct virtio_transport 
*t,

[PATCH] block: avoid -Wunused-but-set-parameter warning

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

Building with 'make W=1' shows a warning for one function parameter
that is only set but not used in some configurations:

kernel/trace/blktrace.c: In function '__trace_note_message':
kernel/trace/blktrace.c:148:63: error: parameter 'blkcg' set but not used 
[-Werror=unused-but-set-parameter]
  148 | void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg,

Move the assignment into the #ifdef block that contains the only
use to clarify how it's used and avoid the warning.

Apparently this is the only -Wunused-but-set-parameter warning in the
kernel as of v5.12, after similar cleanups done by others. It may be time
to turn it on globally.

Fixes: 35fe6d763229 ("block: use standard blktrace API to output cgroup info 
for debug notes")
Signed-off-by: Arnd Bergmann 
---
 kernel/trace/blktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c221e4c3f625..f9314351a7e2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -170,9 +170,9 @@ void __trace_note_message(struct blk_trace *bt, struct 
blkcg *blkcg,
n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
va_end(args);
 
+#ifdef CONFIG_BLK_CGROUP
if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
blkcg = NULL;
-#ifdef CONFIG_BLK_CGROUP
trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n,
   blkcg ? cgroup_id(blkcg->css.cgroup) : 1);
 #else
-- 
2.29.2

[RFC PATCH v7 16/22] virtio/vsock: setup SEQPACKET ops for transport

2021-03-23 Thread Arseny Krasnov

This adds SEQPACKET ops for virtio transport and 'seqpacket_allow()'
callback.

Signed-off-by: Arseny Krasnov 
---
 net/vmw_vsock/virtio_transport.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 2700a63ab095..83ae2078c847 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -443,6 +443,8 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
queue_work(virtio_vsock_workqueue, >rx_work);
 }
 
+static bool virtio_transport_seqpacket_allow(void);
+
 static struct virtio_transport virtio_transport = {
.transport = {
.module   = THIS_MODULE,
@@ -469,6 +471,10 @@ static struct virtio_transport virtio_transport = {
.stream_is_active = virtio_transport_stream_is_active,
.stream_allow = virtio_transport_stream_allow,
 
+   .seqpacket_dequeue= virtio_transport_seqpacket_dequeue,
+   .seqpacket_enqueue= virtio_transport_seqpacket_enqueue,
+   .seqpacket_allow  = virtio_transport_seqpacket_allow,
+
.notify_poll_in   = virtio_transport_notify_poll_in,
.notify_poll_out  = virtio_transport_notify_poll_out,
.notify_recv_init = virtio_transport_notify_recv_init,
@@ -483,8 +489,14 @@ static struct virtio_transport virtio_transport = {
},
 
.send_pkt = virtio_transport_send_pkt,
+   .seqpacket_allow = false
 };
 
+static bool virtio_transport_seqpacket_allow(void)
+{
+   return virtio_transport.seqpacket_allow;
+}
+
 static void virtio_transport_rx_work(struct work_struct *work)
 {
struct virtio_vsock *vsock =
-- 
2.25.1

[RFC PATCH v7 14/22] virtio/vsock: rest of SOCK_SEQPACKET support

2021-03-23 Thread Arseny Krasnov

This adds rest of logic for SEQPACKET:
1) SEQPACKET specific functions which send SEQ_BEGIN/SEQ_END.
   Note that both functions may sleep to wait enough space for
   SEQPACKET header.
2) SEQ_BEGIN/SEQ_END in TAP packet capture.
3) Send SHUTDOWN on socket close for SEQPACKET type.
4) Set SEQPACKET packet type during send.
5) Set MSG_EOR in flags for SEQPACKET during send.
6) 'seqpacket_allow' flag to virtio transport.

Signed-off-by: Arseny Krasnov 
---
 v6 -> v7:
 In 'virtio_transport_seqpacket_enqueue()', 'next_tx_msg_id' is updated
 in both cases when message send successfully or error occured.

 include/linux/virtio_vsock.h|  7 ++
 net/vmw_vsock/virtio_transport_common.c | 88 -
 2 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 0e3aa395c07c..ab5f56fd7251 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -22,6 +22,7 @@ struct virtio_vsock_seq_state {
u32 user_read_seq_len;
u32 user_read_copied;
u32 curr_rx_msg_id;
+   u32 next_tx_msg_id;
 };
 
 /* Per-socket state (accessed via vsk->trans) */
@@ -76,6 +77,8 @@ struct virtio_transport {
 
/* Takes ownership of the packet */
int (*send_pkt)(struct virtio_vsock_pkt *pkt);
+
+   bool seqpacket_allow;
 };
 
 ssize_t
@@ -89,6 +92,10 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
   size_t len, int flags);
 
 int
+virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
+  struct msghdr *msg,
+  size_t len);
+int
 virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
   struct msghdr *msg,
   int flags,
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index bfe0d7026bf8..01a56c7da8bd 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -139,6 +139,8 @@ static struct sk_buff *virtio_transport_build_skb(void 
*opaque)
break;
case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
+   case VIRTIO_VSOCK_OP_SEQ_BEGIN:
+   case VIRTIO_VSOCK_OP_SEQ_END:
hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
break;
default:
@@ -187,7 +189,12 @@ static int virtio_transport_send_pkt_info(struct 
vsock_sock *vsk,
struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
 
-   info->type = VIRTIO_VSOCK_TYPE_STREAM;
+   info->type = virtio_transport_get_type(sk_vsock(vsk));
+
+   if (info->type == VIRTIO_VSOCK_TYPE_SEQPACKET &&
+   info->msg &&
+   info->msg->msg_flags & MSG_EOR)
+   info->flags |= VIRTIO_VSOCK_RW_EOR;
 
t_ops = virtio_transport_get_ops(vsk);
if (unlikely(!t_ops))
@@ -401,6 +408,43 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
return err;
 }
 
+static int virtio_transport_seqpacket_send_ctrl(struct vsock_sock *vsk,
+   int type,
+   size_t len,
+   int flags)
+{
+   struct virtio_vsock_sock *vvs = vsk->trans;
+   struct virtio_vsock_pkt_info info = {
+   .op = type,
+   .vsk = vsk,
+   .pkt_len = sizeof(struct virtio_vsock_seq_hdr)
+   };
+
+   struct virtio_vsock_seq_hdr seq_hdr = {
+   .msg_id = cpu_to_le32(vvs->seq_state.next_tx_msg_id),
+   .msg_len = cpu_to_le32(len)
+   };
+
+   struct kvec seq_hdr_kiov = {
+   .iov_base = (void *)_hdr,
+   .iov_len = sizeof(struct virtio_vsock_seq_hdr)
+   };
+
+   struct msghdr msg = {0};
+
+   //XXX: do we need 'vsock_transport_send_notify_data' pointer?
+   if (vsock_wait_space(sk_vsock(vsk),
+sizeof(struct virtio_vsock_seq_hdr),
+flags, NULL))
+   return -1;
+
+   iov_iter_kvec(_iter, WRITE, _hdr_kiov, 1, sizeof(seq_hdr));
+
+   info.msg = 
+
+   return virtio_transport_send_pkt_info(vsk, );
+}
+
 static inline void virtio_transport_remove_pkt(struct virtio_vsock_pkt *pkt)
 {
list_del(>list);
@@ -595,6 +639,46 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
 }
 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
 
+int
+virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
+  struct msghdr *msg,
+  size_t len)
+{
+   int written = -1;
+
+   if (msg->msg_iter.iov_offset == 0) {
+   /* Send SEQBEGIN. */
+   if (virtio_transport_seqpacket_send_ctrl(vsk,
+

[RFC PATCH v7 12/22] virtio/vsock: fetch length for SEQPACKET record

2021-03-23 Thread Arseny Krasnov

This adds transport callback which tries to fetch record begin marker
from socket's rx queue. It is called from af_vsock.c before reading data
packets of record.

Signed-off-by: Arseny Krasnov 
---
 v6 -> v7:
 1) Now 'virtio_transport_seqpacket_seq_get_len()' returns 0, if rx
queue of socket is empty. Else it returns length of current message
to handle.
 2) If dequeue callback is called, but there is no detected length of
message to dequeue, EAGAIN is returned, and outer loop restarts
receiving.

 net/vmw_vsock/virtio_transport_common.c | 61 +
 1 file changed, 61 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index a8f4326e45e8..41f05034593e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -399,6 +399,62 @@ static inline void virtio_transport_remove_pkt(struct 
virtio_vsock_pkt *pkt)
virtio_transport_free_pkt(pkt);
 }
 
+static size_t virtio_transport_drop_until_seq_begin(struct virtio_vsock_sock 
*vvs)
+{
+   struct virtio_vsock_pkt *pkt, *n;
+   size_t bytes_dropped = 0;
+
+   list_for_each_entry_safe(pkt, n, >rx_queue, list) {
+   if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_SEQ_BEGIN)
+   break;
+
+   bytes_dropped += le32_to_cpu(pkt->hdr.len);
+   virtio_transport_dec_rx_pkt(vvs, pkt);
+   virtio_transport_remove_pkt(pkt);
+   }
+
+   return bytes_dropped;
+}
+
+static size_t virtio_transport_seqpacket_seq_get_len(struct vsock_sock *vsk)
+{
+   struct virtio_vsock_seq_hdr *seq_hdr;
+   struct virtio_vsock_sock *vvs;
+   struct virtio_vsock_pkt *pkt;
+   size_t bytes_dropped = 0;
+
+   vvs = vsk->trans;
+
+   spin_lock_bh(>rx_lock);
+
+   /* Have some record to process, return it's length. */
+   if (vvs->seq_state.user_read_seq_len)
+   goto out;
+
+   /* Fetch all orphaned 'RW' packets and send credit update. */
+   bytes_dropped = virtio_transport_drop_until_seq_begin(vvs);
+
+   if (list_empty(>rx_queue))
+   goto out;
+
+   pkt = list_first_entry(>rx_queue, struct virtio_vsock_pkt, list);
+
+   vvs->seq_state.user_read_copied = 0;
+
+   seq_hdr = (struct virtio_vsock_seq_hdr *)pkt->buf;
+   vvs->seq_state.user_read_seq_len = le32_to_cpu(seq_hdr->msg_len);
+   vvs->seq_state.curr_rx_msg_id = le32_to_cpu(seq_hdr->msg_id);
+   virtio_transport_dec_rx_pkt(vvs, pkt);
+   virtio_transport_remove_pkt(pkt);
+out:
+   spin_unlock_bh(>rx_lock);
+
+   if (bytes_dropped)
+   virtio_transport_send_credit_update(vsk);
+
+   return vvs->seq_state.user_read_seq_len;
+}
+
 static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 struct msghdr *msg,
 bool *msg_ready)
@@ -522,6 +578,11 @@ virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
if (flags & MSG_PEEK)
return -EOPNOTSUPP;
 
+   *msg_len = virtio_transport_seqpacket_seq_get_len(vsk);
+
+   if (*msg_len == 0)
+   return -EAGAIN;
+
return virtio_transport_seqpacket_do_dequeue(vsk, msg, msg_ready);
 }
 EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
-- 
2.25.1

[RFC PATCH v7 11/22] virtio/vsock: dequeue callback for SOCK_SEQPACKET

2021-03-23 Thread Arseny Krasnov

This adds transport callback and it's logic for SEQPACKET dequeue.
Callback fetches RW packets from rx queue of socket until whole record
is copied(if user's buffer is full, user is not woken up). This is done
to not stall sender, because if we wake up user and it leaves syscall,
nobody will send credit update for rest of record, and sender will wait
for next enter of read syscall at receiver's side. So if user buffer is
full, we just send credit update and drop data. If during copy SEQ_BEGIN
was found(and not all data was copied), copying is restarted by reset
user's iov iterator(previous unfinished data is dropped).

Signed-off-by: Arseny Krasnov 
---
 v6 -> v7:
 1) 'struct virtio_vsock_seqpacket_state' now renamed to
'struct virtio_vsock_seq_state'.
 2) Field 'seqpacket_state' of 'struct virtio_vsock_sock' now
renamed to 'seq_state'.
 3) Current message length to process('user_read_seq_len') is
set to 0 on error or message dequeue completed sucecssfully.

 include/linux/virtio_vsock.h|  14 +++
 include/uapi/linux/virtio_vsock.h   |  16 
 net/vmw_vsock/virtio_transport_common.c | 121 
 3 files changed, 151 insertions(+)

diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index dc636b727179..0e3aa395c07c 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -18,6 +18,12 @@ enum {
VSOCK_VQ_MAX= 3,
 };
 
+struct virtio_vsock_seq_state {
+   u32 user_read_seq_len;
+   u32 user_read_copied;
+   u32 curr_rx_msg_id;
+};
+
 /* Per-socket state (accessed via vsk->trans) */
 struct virtio_vsock_sock {
struct vsock_sock *vsk;
@@ -36,6 +42,8 @@ struct virtio_vsock_sock {
u32 rx_bytes;
u32 buf_alloc;
struct list_head rx_queue;
+
+   struct virtio_vsock_seq_state seq_state;
 };
 
 struct virtio_vsock_pkt {
@@ -80,6 +88,12 @@ virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
   struct msghdr *msg,
   size_t len, int flags);
 
+int
+virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
+  struct msghdr *msg,
+  int flags,
+  bool *msg_ready,
+  size_t *msg_len);
 s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
 s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
 
diff --git a/include/uapi/linux/virtio_vsock.h 
b/include/uapi/linux/virtio_vsock.h
index 1d57ed3d84d2..692f8078cced 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -63,8 +63,14 @@ struct virtio_vsock_hdr {
__le32  fwd_cnt;
 } __attribute__((packed));
 
+struct virtio_vsock_seq_hdr {
+   __le32  msg_id;
+   __le32  msg_len;
+} __attribute__((packed));
+
 enum virtio_vsock_type {
VIRTIO_VSOCK_TYPE_STREAM = 1,
+   VIRTIO_VSOCK_TYPE_SEQPACKET = 2,
 };
 
 enum virtio_vsock_op {
@@ -83,6 +89,11 @@ enum virtio_vsock_op {
VIRTIO_VSOCK_OP_CREDIT_UPDATE = 6,
/* Request the peer to send the credit info to us */
VIRTIO_VSOCK_OP_CREDIT_REQUEST = 7,
+
+   /* Record begin for SOCK_SEQPACKET */
+   VIRTIO_VSOCK_OP_SEQ_BEGIN = 8,
+   /* Record end for SOCK_SEQPACKET */
+   VIRTIO_VSOCK_OP_SEQ_END = 9,
 };
 
 /* VIRTIO_VSOCK_OP_SHUTDOWN flags values */
@@ -91,4 +102,9 @@ enum virtio_vsock_shutdown {
VIRTIO_VSOCK_SHUTDOWN_SEND = 2,
 };
 
+/* VIRTIO_VSOCK_OP_RW flags values */
+enum virtio_vsock_rw {
+   VIRTIO_VSOCK_RW_EOR = 1,
+};
+
 #endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
diff --git a/net/vmw_vsock/virtio_transport_common.c 
b/net/vmw_vsock/virtio_transport_common.c
index 833104b71a1c..a8f4326e45e8 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -393,6 +393,114 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
return err;
 }
 
+static inline void virtio_transport_remove_pkt(struct virtio_vsock_pkt *pkt)
+{
+   list_del(>list);
+   virtio_transport_free_pkt(pkt);
+}
+
+static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
+struct msghdr *msg,
+bool *msg_ready)
+{
+   struct virtio_vsock_sock *vvs = vsk->trans;
+   struct virtio_vsock_pkt *pkt;
+   int err = 0;
+   size_t user_buf_len = msg->msg_iter.count;
+
+   *msg_ready = false;
+   spin_lock_bh(>rx_lock);
+
+   while (!*msg_ready && !list_empty(>rx_queue) && !err) {
+   pkt = list_first_entry(>rx_queue, struct virtio_vsock_pkt, 
list);
+
+   switch (le16_to_cpu(pkt->hdr.op)) {
+   case VIRTIO_VSOCK_OP_SEQ_BEGIN: {
+   /* Unexpected 'SEQ_BEGIN' during record copy:
+* Leave receive loop, 'EAGAIN' will restart it from
+

[RFC PATCH v7 15/22] virtio/vsock: SEQPACKET support feature bit

2021-03-23 Thread Arseny Krasnov

This adds new virtio vsock specific feature bit which means
SOCK_SEQPACKET support. Guest negotiates this bit with vhost,
thus checking that vhost side supports SEQPACKET.

Signed-off-by: Arseny Krasnov 
---
 include/uapi/linux/virtio_vsock.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/uapi/linux/virtio_vsock.h 
b/include/uapi/linux/virtio_vsock.h
index 692f8078cced..619aaebb355a 100644
--- a/include/uapi/linux/virtio_vsock.h
+++ b/include/uapi/linux/virtio_vsock.h
@@ -38,6 +38,9 @@
 #include 
 #include 
 
+/* The feature bitmap for virtio vsock */
+#define VIRTIO_VSOCK_F_SEQPACKET   0   /* SOCK_SEQPACKET supported */
+
 struct virtio_vsock_config {
__le64 guest_cid;
 } __attribute__((packed));
-- 
2.25.1

Re: [PATCH RFC 0/3] drivers/char: remove /dev/kmem for good

2021-03-23 Thread Greg Kroah-Hartman

On Fri, Mar 19, 2021 at 03:34:49PM +0100, David Hildenbrand wrote:
> Let's start a discussion if /dev/kmem is worth keeping around and
> fixing/maintaining or if we should just remove it now for good.
> 
> More details / findings in patch #1. Patch #2 and #3 perform minor cleanups
> based on removed /dev/kmem support.
> 
> Only compile-tested on x86-64 -- good enough for discussing the general
> topic (RFC).

I'll gladly take this in my char/misc tree for now, to show up in
linux-next to get some testing to see if anyone complains.

Let me know if that will help out.

thanks,

greg k-h

[PATCH net-next] airo: work around stack usage warning

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

gcc-11 with KASAN on 32-bit arm produces a warning about a function
that needs a lot of stack space:

drivers/net/wireless/cisco/airo.c: In function 'setup_card.constprop':
drivers/net/wireless/cisco/airo.c:3960:1: error: the frame size of 1512 bytes 
is larger than 1400 bytes [-Werror=frame-larger-than=]

Most of this is from a single large structure that could be dynamically
allocated or moved into the per-device structure.  However, as the callers
all seem to have a fairly well bounded call chain, the easiest change
is to pull out the part of the function that needs the large variables
into a separate function and mark that as noinline_for_stack. This does
not reduce the total stack usage, but it gets rid of the warning and
requires minimal changes otherwise.

Signed-off-by: Arnd Bergmann 
---
 drivers/net/wireless/cisco/airo.c | 117 +-
 1 file changed, 65 insertions(+), 52 deletions(-)

diff --git a/drivers/net/wireless/cisco/airo.c 
b/drivers/net/wireless/cisco/airo.c
index e35e1380ae43..540ba694899c 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -3818,6 +3818,68 @@ static inline void set_auth_type(struct airo_info 
*local, int auth_type)
local->last_auth = auth_type;
 }
 
+static int noinline_for_stack airo_readconfig(struct airo_info *ai, u8 *mac, 
int lock)
+{
+   int i, status;
+   /* large variables, so don't inline this function,
+* maybe change to kmalloc
+*/
+   tdsRssiRid rssi_rid;
+   CapabilityRid cap_rid;
+
+   kfree(ai->SSID);
+   ai->SSID = NULL;
+   // general configuration (read/modify/write)
+   status = readConfigRid(ai, lock);
+   if (status != SUCCESS) return ERROR;
+
+   status = readCapabilityRid(ai, _rid, lock);
+   if (status != SUCCESS) return ERROR;
+
+   status = PC4500_readrid(ai, RID_RSSI, _rid, sizeof(rssi_rid), 
lock);
+   if (status == SUCCESS) {
+   if (ai->rssi || (ai->rssi = kmalloc(512, GFP_KERNEL)) != NULL)
+   memcpy(ai->rssi, (u8*)_rid + 2, 512); /* Skip RID 
length member */
+   }
+   else {
+   kfree(ai->rssi);
+   ai->rssi = NULL;
+   if (cap_rid.softCap & cpu_to_le16(8))
+   ai->config.rmode |= RXMODE_NORMALIZED_RSSI;
+   else
+   airo_print_warn(ai->dev->name, "unknown received signal 
"
+   "level scale");
+   }
+   ai->config.opmode = adhoc ? MODE_STA_IBSS : MODE_STA_ESS;
+   set_auth_type(ai, AUTH_OPEN);
+   ai->config.modulation = MOD_CCK;
+
+   if (le16_to_cpu(cap_rid.len) >= sizeof(cap_rid) &&
+   (cap_rid.extSoftCap & cpu_to_le16(1)) &&
+   micsetup(ai) == SUCCESS) {
+   ai->config.opmode |= MODE_MIC;
+   set_bit(FLAG_MIC_CAPABLE, >flags);
+   }
+
+   /* Save off the MAC */
+   for (i = 0; i < ETH_ALEN; i++) {
+   mac[i] = ai->config.macAddr[i];
+   }
+
+   /* Check to see if there are any insmod configured
+  rates to add */
+   if (rates[0]) {
+   memset(ai->config.rates, 0, sizeof(ai->config.rates));
+   for (i = 0; i < 8 && rates[i]; i++) {
+   ai->config.rates[i] = rates[i];
+   }
+   }
+   set_bit (FLAG_COMMIT, >flags);
+
+   return SUCCESS;
+}
+
+
 static u16 setup_card(struct airo_info *ai, u8 *mac, int lock)
 {
Cmd cmd;
@@ -3864,58 +3926,9 @@ static u16 setup_card(struct airo_info *ai, u8 *mac, int 
lock)
if (lock)
up(>sem);
if (ai->config.len == 0) {
-   int i;
-   tdsRssiRid rssi_rid;
-   CapabilityRid cap_rid;
-
-   kfree(ai->SSID);
-   ai->SSID = NULL;
-   // general configuration (read/modify/write)
-   status = readConfigRid(ai, lock);
-   if (status != SUCCESS) return ERROR;
-
-   status = readCapabilityRid(ai, _rid, lock);
-   if (status != SUCCESS) return ERROR;
-
-   status = PC4500_readrid(ai, RID_RSSI,_rid, 
sizeof(rssi_rid), lock);
-   if (status == SUCCESS) {
-   if (ai->rssi || (ai->rssi = kmalloc(512, GFP_KERNEL)) 
!= NULL)
-   memcpy(ai->rssi, (u8*)_rid + 2, 512); /* 
Skip RID length member */
-   }
-   else {
-   kfree(ai->rssi);
-   ai->rssi = NULL;
-   if (cap_rid.softCap & cpu_to_le16(8))
-   ai->config.rmode |= RXMODE_NORMALIZED_RSSI;
-   else
-   airo_print_warn(ai->dev->name, "unknown 
received signal "
-   "level scale");
-   }
-   ai->config.opmode = adhoc ?

[RFC PATCH v7 22/22] virtio/vsock: update trace event for SEQPACKET

2021-03-23 Thread Arseny Krasnov

This updates current implementation for trace event of virtio vsock:
SEQPACKET socket's type, SEQPACKET specific ops and SEQPACKET 'msg_len'
and 'msg_id' fields are added.

Signed-off-by: Arseny Krasnov 
---
 .../events/vsock_virtio_transport_common.h| 48 ++-
 net/vmw_vsock/virtio_transport_common.c   | 25 +-
 2 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/include/trace/events/vsock_virtio_transport_common.h 
b/include/trace/events/vsock_virtio_transport_common.h
index 6782213778be..3254607eac04 100644
--- a/include/trace/events/vsock_virtio_transport_common.h
+++ b/include/trace/events/vsock_virtio_transport_common.h
@@ -9,9 +9,12 @@
 #include 
 
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_STREAM);
+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_TYPE_SEQPACKET);
 
 #define show_type(val) \
-   __print_symbolic(val, { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" })
+   __print_symbolic(val, \
+   { VIRTIO_VSOCK_TYPE_STREAM, "STREAM" }, \
+   { VIRTIO_VSOCK_TYPE_SEQPACKET, "SEQPACKET" })
 
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_INVALID);
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_REQUEST);
@@ -21,6 +24,8 @@ TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SHUTDOWN);
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_RW);
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_UPDATE);
 TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST);
+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SEQ_BEGIN);
+TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_SEQ_END);
 
 #define show_op(val) \
__print_symbolic(val, \
@@ -31,7 +36,9 @@ TRACE_DEFINE_ENUM(VIRTIO_VSOCK_OP_CREDIT_REQUEST);
 { VIRTIO_VSOCK_OP_SHUTDOWN, "SHUTDOWN" }, \
 { VIRTIO_VSOCK_OP_RW, "RW" }, \
 { VIRTIO_VSOCK_OP_CREDIT_UPDATE, "CREDIT_UPDATE" }, \
-{ VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" })
+{ VIRTIO_VSOCK_OP_CREDIT_REQUEST, "CREDIT_REQUEST" }, \
+{ VIRTIO_VSOCK_OP_SEQ_BEGIN, "SEQ_BEGIN" }, \
+{ VIRTIO_VSOCK_OP_SEQ_END, "SEQ_END" })
 
 TRACE_EVENT(virtio_transport_alloc_pkt,
TP_PROTO(
@@ -40,7 +47,9 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
 __u32 len,
 __u16 type,
 __u16 op,
-__u32 flags
+__u32 flags,
+__u32 msg_len,
+__u32 msg_id
),
TP_ARGS(
src_cid, src_port,
@@ -48,7 +57,9 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
len,
type,
op,
-   flags
+   flags,
+   msg_len,
+   msg_id
),
TP_STRUCT__entry(
__field(__u32, src_cid)
@@ -59,6 +70,8 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__field(__u16, type)
__field(__u16, op)
__field(__u32, flags)
+   __field(__u32, msg_len)
+   __field(__u32, msg_id)
),
TP_fast_assign(
__entry->src_cid = src_cid;
@@ -69,14 +82,18 @@ TRACE_EVENT(virtio_transport_alloc_pkt,
__entry->type = type;
__entry->op = op;
__entry->flags = flags;
+   __entry->msg_len = msg_len;
+   __entry->msg_id = msg_id;
),
-   TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x",
+   TP_printk("%u:%u -> %u:%u len=%u type=%s op=%s flags=%#x msg_len=%u 
msg_id=%u",
  __entry->src_cid, __entry->src_port,
  __entry->dst_cid, __entry->dst_port,
  __entry->len,
  show_type(__entry->type),
  show_op(__entry->op),
- __entry->flags)
+ __entry->flags,
+ __entry->msg_len,
+ __entry->msg_id)
 );
 
 TRACE_EVENT(virtio_transport_recv_pkt,
@@ -88,7 +105,9 @@ TRACE_EVENT(virtio_transport_recv_pkt,
 __u16 op,
 __u32 flags,
 __u32 buf_alloc,
-__u32 fwd_cnt
+__u32 fwd_cnt,
+__u32 msg_len,
+__u32 msg_id
),
TP_ARGS(
src_cid, src_port,
@@ -98,7 +117,9 @@ TRACE_EVENT(virtio_transport_recv_pkt,
op,
flags,
buf_alloc,
-   fwd_cnt
+   fwd_cnt,
+   msg_len,
+   msg_id
),
TP_STRUCT__entry(
__field(__u32, src_cid)
@@ -111,6 +132,8 @@ TRACE_EVENT(virtio_transport_recv_pkt,
__field(__u32, flags)
__field(__u32, buf_alloc)
__field(__u32, fwd_cnt)
+   __field(__u32, msg_len)
+   __field(__u32, msg_id)
),
TP_fast_assign(
__entry->src_cid = src_cid;
@@ -123,9 +146,10 @@ TRACE_EVENT(virtio_transport_recv_pkt,

Re: [RFC RESEND PATCH v2 0/8] Add managed version of delayed work init

2021-03-23 Thread Vaittinen, Matti

Hi Greg,

On Tue, 2021-03-23 at 13:43 +0100, Greg KH wrote:
> On Mon, Mar 22, 2021 at 09:41:13AM +0200, Matti Vaittinen wrote:
> > It's not rare that device drivers need delayed work.
> > It's not rare that this work needs driver's data.
> 
> I don't normally comment on "RFC" patch series as I can't take them
> and
> the submitter doesn't feel right with them being merged at this point
> in
> time.
> 
> So if you think this is all correct now, please resubmit without that
> so
> we can review it properly :)

Thanks for the guidance :)
I'll drop the RFC and resubmit.

Others - sorry for the noise.

Best Regards
Matti Vaittinen

fs/io_uring.c:6897:12: warning: stack frame size of 1072 bytes in function 'io_submit_sqes'

2021-03-23 Thread kernel test robot

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   84196390620ac0e5070ae36af84c137c6216a7dc
commit: a1ab7b35db8f262cd74edff62b47b4d90f84f997 io_uring: move req link into 
submit_state
date:   5 weeks ago
config: mips-randconfig-r023-20210322 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 
14696baaf4c43fe53f738bc292bbe169eed93d5d)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install mips cross compiling tool for clang build
# apt-get install binutils-mips-linux-gnu
# 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a1ab7b35db8f262cd74edff62b47b4d90f84f997
git remote add linus 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
git fetch --no-tags linus master
git checkout a1ab7b35db8f262cd74edff62b47b4d90f84f997
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

>> fs/io_uring.c:6897:12: warning: stack frame size of 1072 bytes in function 
>> 'io_submit_sqes' [-Wframe-larger-than=]
   static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
  ^
   1 warning generated.


vim +/io_submit_sqes +6897 fs/io_uring.c

709b302faddfac Pavel Begunkov 2020-04-08  6896  
0f2122045b9462 Jens Axboe 2020-09-13 @6897  static int 
io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
6c271ce2f1d572 Jens Axboe 2019-01-10  6898  {
46c4e16a8625f7 Pavel Begunkov 2021-02-18  6899  int submitted = 0;
6c271ce2f1d572 Jens Axboe 2019-01-10  6900  
c4a2ed72c9a615 Jens Axboe 2019-11-21  6901  /* if we have a backlog 
and couldn't flush it all, return BUSY */
ad3eb2c89fb24d Jens Axboe 2019-12-18  6902  if (test_bit(0, 
>sq_check_overflow)) {
6c503150ae33ee Pavel Begunkov 2021-01-04  6903  if 
(!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
1d7bb1d50fb4dc Jens Axboe 2019-11-06  6904  return 
-EBUSY;
ad3eb2c89fb24d Jens Axboe 2019-12-18  6905  }
6c271ce2f1d572 Jens Axboe 2019-01-10  6906  
ee7d46d9db19de Pavel Begunkov 2019-12-30  6907  /* make sure SQ entry 
isn't read before tail */
ee7d46d9db19de Pavel Begunkov 2019-12-30  6908  nr = min3(nr, 
ctx->sq_entries, io_sqring_entries(ctx));
9ef4f124894b7b Pavel Begunkov 2019-12-30  6909  
2b85edfc0c90ef Pavel Begunkov 2019-12-28  6910  if 
(!percpu_ref_tryget_many(>refs, nr))
2b85edfc0c90ef Pavel Begunkov 2019-12-28  6911  return -EAGAIN;
6c271ce2f1d572 Jens Axboe 2019-01-10  6912  
d8a6df10aac9f2 Jens Axboe 2020-10-15  6913  
percpu_counter_add(>io_uring->inflight, nr);
faf7b51c06973f Jens Axboe 2020-10-07  6914  refcount_add(nr, 
>usage);
ba88ff112bdfde Pavel Begunkov 2021-02-10  6915  
io_submit_state_start(>submit_state, nr);
b14cca0c84c760 Pavel Begunkov 2020-01-17  6916  
46c4e16a8625f7 Pavel Begunkov 2021-02-18  6917  while (submitted < nr) {
3529d8c2b353e6 Jens Axboe 2019-12-19  6918  const struct 
io_uring_sqe *sqe;
196be95cd55720 Pavel Begunkov 2019-11-07  6919  struct io_kiocb 
*req;
fb5ccc98782f65 Pavel Begunkov 2019-10-25  6920  
258b29a93bfe74 Pavel Begunkov 2021-02-10  6921  req = 
io_alloc_req(ctx);
196be95cd55720 Pavel Begunkov 2019-11-07  6922  if 
(unlikely(!req)) {
196be95cd55720 Pavel Begunkov 2019-11-07  6923  if 
(!submitted)
196be95cd55720 Pavel Begunkov 2019-11-07  6924  
submitted = -EAGAIN;
fb5ccc98782f65 Pavel Begunkov 2019-10-25  6925  break;
196be95cd55720 Pavel Begunkov 2019-11-07  6926  }
4fccfcbb733794 Pavel Begunkov 2021-02-12  6927  sqe = 
io_get_sqe(ctx);
4fccfcbb733794 Pavel Begunkov 2021-02-12  6928  if 
(unlikely(!sqe)) {
4fccfcbb733794 Pavel Begunkov 2021-02-12  6929  
kmem_cache_free(req_cachep, req);
4fccfcbb733794 Pavel Begunkov 2021-02-12  6930  break;
4fccfcbb733794 Pavel Begunkov 2021-02-12  6931  }
d3656344fea033 Jens Axboe 2019-12-18  6932  /* will 
complete beyond this point, count as submitted */
d3656344fea033 Jens Axboe 2019-12-18  6933  submitted++;
a1ab7b35db8f26 Pavel Begunkov 2021-02-18  6934  if 
(io_submit_sqe(ctx, req, sqe))
fb5ccc98782f65 Pavel Begunkov 2019-10-25  6935  break;
196be95cd55720 Pavel Begunkov 2019-11-07  6936  }
fb5ccc98782f65 Pavel

Re: [PATCH v2 12/12] media: atomisp: Fix LOGICAL_CONTINUATIONS

2021-03-23 Thread Mauro Carvalho Chehab

Em Mon, 14 Dec 2020 12:01:56 +0100
Philipp Gerlesberger  escreveu:

> Logical continuations should be on the previous line
> 
> Co-developed-by: Andrey Khlopkov 
> Signed-off-by: Andrey Khlopkov 
> Signed-off-by: Philipp Gerlesberger 
> ---
>  drivers/staging/media/atomisp/pci/runtime/queue/src/queue.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/staging/media/atomisp/pci/runtime/queue/src/queue.c 
> b/drivers/staging/media/atomisp/pci/runtime/queue/src/queue.c
> index 2f1c2df59f71..7d44070c7114 100644
> --- a/drivers/staging/media/atomisp/pci/runtime/queue/src/queue.c
> +++ b/drivers/staging/media/atomisp/pci/runtime/queue/src/queue.c
> @@ -24,8 +24,8 @@
>   
> */
>  int ia_css_queue_local_init(ia_css_queue_t *qhandle, ia_css_queue_local_t 
> *desc)
>  {
> - if (NULL == qhandle || NULL == desc
> - || NULL == desc->cb_elems || NULL == desc->cb_desc) {
> + if (NULL == qhandle || NULL == desc ||
> + NULL == desc->cb_elems || NULL == desc->cb_desc) {

Nah, there are coding style issues here... we usually do:

if (foo == CONSTANT)

instead of:

if (CONSTANT == foo)

Also, we usually simplify checks for null. So, the above should
be, instead, just:

if (!qhandle || !desc || !desc->cb_elements || !desc->cb_desc)


>   /* Invalid parameters, return error*/
>   return -EINVAL;
>   }



Thanks,
Mauro

[RFC PATCH v7 20/22] virtio/vsock: SEQPACKET feature bit support

2021-03-23 Thread Arseny Krasnov

This adds handling of SEQPACKET bit: guest tries to negotiate it
with vhost.

Signed-off-by: Arseny Krasnov 
---
 net/vmw_vsock/virtio_transport.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 83ae2078c847..ee99bd919a12 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -624,6 +624,10 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
rcu_assign_pointer(the_virtio_vsock, vsock);
 
mutex_unlock(_virtio_vsock_mutex);
+
+   if (vdev->features & (1ULL << VIRTIO_VSOCK_F_SEQPACKET))
+   virtio_transport.seqpacket_allow = true;
+
return 0;
 
 out:
@@ -707,6 +711,7 @@ static struct virtio_device_id id_table[] = {
 };
 
 static unsigned int features[] = {
+   VIRTIO_VSOCK_F_SEQPACKET
 };
 
 static struct virtio_driver virtio_vsock_driver = {
-- 
2.25.1

[RFC PATCH v7 21/22] vsock_test: add SOCK_SEQPACKET tests

2021-03-23 Thread Arseny Krasnov

This adds two tests of SOCK_SEQPACKET socket: both transfer data and
then test MSG_EOR and MSG_TRUNC flags. Cases for connect(), bind(),
etc. are not tested, because it is same as for stream socket.

Signed-off-by: Arseny Krasnov 
---
 tools/testing/vsock/util.c   |  32 ++--
 tools/testing/vsock/util.h   |   3 +
 tools/testing/vsock/vsock_test.c | 126 +++
 3 files changed, 156 insertions(+), 5 deletions(-)

diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 93cbd6f603f9..2acbb7703c6a 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -84,7 +84,7 @@ void vsock_wait_remote_close(int fd)
 }
 
 /* Connect to  and return the file descriptor. */
-int vsock_stream_connect(unsigned int cid, unsigned int port)
+static int vsock_connect(unsigned int cid, unsigned int port, int type)
 {
union {
struct sockaddr sa;
@@ -101,7 +101,7 @@ int vsock_stream_connect(unsigned int cid, unsigned int 
port)
 
control_expectln("LISTENING");
 
-   fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+   fd = socket(AF_VSOCK, type, 0);
 
timeout_begin(TIMEOUT);
do {
@@ -120,11 +120,21 @@ int vsock_stream_connect(unsigned int cid, unsigned int 
port)
return fd;
 }
 
+int vsock_stream_connect(unsigned int cid, unsigned int port)
+{
+   return vsock_connect(cid, port, SOCK_STREAM);
+}
+
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port)
+{
+   return vsock_connect(cid, port, SOCK_SEQPACKET);
+}
+
 /* Listen on  and return the first incoming connection.  The remote
  * address is stored to clientaddrp.  clientaddrp may be NULL.
  */
-int vsock_stream_accept(unsigned int cid, unsigned int port,
-   struct sockaddr_vm *clientaddrp)
+static int vsock_accept(unsigned int cid, unsigned int port,
+   struct sockaddr_vm *clientaddrp, int type)
 {
union {
struct sockaddr sa;
@@ -145,7 +155,7 @@ int vsock_stream_accept(unsigned int cid, unsigned int port,
int client_fd;
int old_errno;
 
-   fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+   fd = socket(AF_VSOCK, type, 0);
 
if (bind(fd, , sizeof(addr.svm)) < 0) {
perror("bind");
@@ -189,6 +199,18 @@ int vsock_stream_accept(unsigned int cid, unsigned int 
port,
return client_fd;
 }
 
+int vsock_stream_accept(unsigned int cid, unsigned int port,
+   struct sockaddr_vm *clientaddrp)
+{
+   return vsock_accept(cid, port, clientaddrp, SOCK_STREAM);
+}
+
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+  struct sockaddr_vm *clientaddrp)
+{
+   return vsock_accept(cid, port, clientaddrp, SOCK_SEQPACKET);
+}
+
 /* Transmit one byte and check the return value.
  *
  * expected_ret:
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e53dd09d26d9..a3375ad2fb7f 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -36,8 +36,11 @@ struct test_case {
 void init_signals(void);
 unsigned int parse_cid(const char *str);
 int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_seqpacket_connect(unsigned int cid, unsigned int port);
 int vsock_stream_accept(unsigned int cid, unsigned int port,
struct sockaddr_vm *clientaddrp);
+int vsock_seqpacket_accept(unsigned int cid, unsigned int port,
+  struct sockaddr_vm *clientaddrp);
 void vsock_wait_remote_close(int fd);
 void send_byte(int fd, int expected_ret, int flags);
 void recv_byte(int fd, int expected_ret, int flags);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 5a4fb80fa832..5fca9be5b1dd 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "timeout.h"
 #include "control.h"
@@ -279,6 +281,120 @@ static void test_stream_msg_peek_server(const struct 
test_opts *opts)
close(fd);
 }
 
+#define MESSAGES_CNT 7
+#define MESSAGE_EOR_IDX (MESSAGES_CNT / 2)
+static void test_seqpacket_msg_eor_client(const struct test_opts *opts)
+{
+   int fd;
+
+   fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+   if (fd < 0) {
+   perror("connect");
+   exit(EXIT_FAILURE);
+   }
+
+   /* Send several messages, one with MSG_EOR flag */
+   for (int i = 0; i < MESSAGES_CNT; i++)
+   send_byte(fd, 1, (i != MESSAGE_EOR_IDX) ? 0 : MSG_EOR);
+
+   control_writeln("SENDDONE");
+   close(fd);
+}
+
+static void test_seqpacket_msg_eor_server(const struct test_opts *opts)
+{
+   int fd;
+   char buf[16];
+   struct msghdr msg = {0};
+   struct iovec iov = {0};
+
+   fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+   if (fd < 0) {
+   perror("accept");
+

Re: [PATCH] ARM: dma-mapping: fix out of bounds access in CMA

2021-03-23 Thread Russell King - ARM Linux admin

On Tue, Mar 23, 2021 at 02:14:13PM +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> Dereferencing a zero-length array is always a bug, and we get a warning
> with 'make W=1' here:
> 
> arch/arm/mm/dma-mapping.c: In function 'dma_contiguous_early_fixup':
> arch/arm/mm/dma-mapping.c:395:15: error: array subscript  is outside 
> array bounds of 'struct dma_contig_early_reserve[0]' [-Werror=array-bounds]
>   395 |  dma_mmu_remap[dma_mmu_remap_num].base = base;
>   |  ~^~~
> arch/arm/mm/dma-mapping.c:389:40: note: while referencing 'dma_mmu_remap'
>   389 | static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] 
> __initdata;
>   |^
> arch/arm/mm/dma-mapping.c:396:15: error: array subscript  is outside 
> array bounds of 'struct dma_contig_early_reserve[0]' [-Werror=array-bounds]
> 
> Add a runtime check to prevent this from happening, while also
> avoiding the compile-time warning.
> 
> Fixes: c79095092834 ("ARM: integrate CMA with DMA-mapping subsystem")
> Signed-off-by: Arnd Bergmann 
> ---
>  arch/arm/mm/dma-mapping.c | 9 +
>  1 file changed, 9 insertions(+)
> 
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index c4b8df2ad328..af29344fb150 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -392,6 +392,11 @@ static int dma_mmu_remap_num __initdata;
>  
>  void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
>  {
> + if (!MAX_CMA_AREAS || dma_mmu_remap_num >= MAX_CMA_AREAS) {
> + WARN_ONCE(1, "number of CMA areas\n");
> + return;
> + }
> +

What if dma_mmu_remap_num were negative - that condition is not checked
and will also result in an overflow of the array. If we're being fussy
enough to bounds check, we ought to do it properly.

So, I think a better solution would be to make dma_mmu_remap_num an
unsigned int, and then to use:

if (dma_mmu_remap_num >= ARRAY_SIZE(dma_mmu_remap)) {
...
}

which is really the condition we're after here.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!

Re: [PATCH] Input: analog - fix invalid snprintf() call

2021-03-23 Thread Rasmus Villemoes

On 23/03/2021 14.14, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> overlapping input and output arguments to snprintf() are
> undefined behavior in C99:
> 

Good luck:
https://lore.kernel.org/lkml/1457469654-17059-1-git-send-email-li...@rasmusvillemoes.dk/

At least 5 years ago the consensus from old-timers was that "the
kernel's snprintf supports this use case, just keep it working that way".

> diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c
> index f798922a4598..8c9fed3f13e2 100644
> --- a/drivers/input/joystick/analog.c
> +++ b/drivers/input/joystick/analog.c
> @@ -419,14 +419,16 @@ static void analog_calibrate_timer(struct analog_port 
> *port)
>  
>  static void analog_name(struct analog *analog)
>  {
> - snprintf(analog->name, sizeof(analog->name), "Analog %d-axis %d-button",
> + int len;
> +
> + len = snprintf(analog->name, sizeof(analog->name), "Analog %d-axis 
> %d-button",
>hweight8(analog->mask & ANALOG_AXES_STD),
>hweight8(analog->mask & ANALOG_BTNS_STD) + !!(analog->mask & 
> ANALOG_BTNS_CHF) * 2 +
>hweight16(analog->mask & ANALOG_BTNS_GAMEPAD) + 
> !!(analog->mask & ANALOG_HBTN_CHF) * 4);
>  
>   if (analog->mask & ANALOG_HATS_ALL)
> - snprintf(analog->name, sizeof(analog->name), "%s %d-hat",
> -  analog->name, hweight16(analog->mask & 
> ANALOG_HATS_ALL));
> + len += snprintf(analog->name + len, sizeof(analog->name) - len, 
> "%d-hat",
> +  hweight16(analog->mask & ANALOG_HATS_ALL));

Use scnprintf, this is too fragile and hard to verify. If the first
snprintf overflows, the second passes a huge size_t to snprintf which
will WARN.

Rasmus

Re: [PATCH 2/2] mm/vmalloc: Use kvmalloc to allocate the table of pages

2021-03-23 Thread Uladzislau Rezki

On Tue, Mar 23, 2021 at 12:39:13PM +, Matthew Wilcox wrote:
> On Tue, Mar 23, 2021 at 01:04:36PM +0100, Uladzislau Rezki wrote:
> > On Mon, Mar 22, 2021 at 11:03:11PM +, Matthew Wilcox wrote:
> > > I suspect the vast majority of the time is spent calling 
> > > alloc_pages_node()
> > > 1024 times.  Have you looked at Mel's patch to do ... well, exactly what
> > > vmalloc() wants?
> > > 
> > 
> >  - __vmalloc_node_range
> > - 45.25% __alloc_pages_nodemask
> >- 37.59% get_page_from_freelist
> [...]
> >   - 44.61% 0xc047348d
> >  - __vunmap
> > - 35.56% free_unref_page
> 
> Hmm!  I hadn't been thinking about the free side of things.
> Does this make a difference?
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 4f5f8c907897..61d5b769fea0 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -2277,16 +2277,8 @@ static void __vunmap(const void *addr, int 
> deallocate_pages)
>   vm_remove_mappings(area, deallocate_pages);
>  
>   if (deallocate_pages) {
> - int i;
> -
> - for (i = 0; i < area->nr_pages; i++) {
> - struct page *page = area->pages[i];
> -
> - BUG_ON(!page);
> - __free_pages(page, 0);
> - }
> + release_pages(area->pages, area->nr_pages);
>   atomic_long_sub(area->nr_pages, _vmalloc_pages);
> -
>   kvfree(area->pages);
>   }
> 
Will check it today!

> release_pages does a bunch of checks that are unnecessary ... we could
> probably just do:
> 
>   LIST_HEAD(pages_to_free);
> 
>   for (i = 0; i < area->nr_pages; i++) {
>   struct page *page = area->pages[i];
>   if (put_page_testzero(page))
>   list_add(>lru, _to_free);
>   }
>   free_unref_page_list(_to_free);
> 
> but let's see if the provided interface gets us the performance we want.
>  
> > Reviewed-by: Uladzislau Rezki (Sony) 
> > 
> > Thanks!
> 
> Thank you!
You are welcome. A small nit:

  CC  mm/vmalloc.o
mm/vmalloc.c: In function ‘__vmalloc_area_node’:
mm/vmalloc.c:2492:14: warning: passing argument 4 of ‘kvmalloc_node_caller’ 
makes integer from pointer without a cast [-Wint-conversion]
  area->caller);
  ^~~~
In file included from mm/vmalloc.c:12:
./include/linux/mm.h:782:7: note: expected ‘long unsigned int’ but argument is 
of type ‘const void *’
 void *kvmalloc_node_caller(size_t size, gfp_t flags, int node,


diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8a202ba263f6..ee6fa44983bc 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2489,7 +2489,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, 
gfp_t gfp_mask,
 
/* Please note that the recursion is strictly bounded. */
pages = kvmalloc_node_caller(array_size, nested_gfp, node,
-area->caller);
+   (unsigned long) area->caller);
if (!pages) {
free_vm_area(area);
return NULL;


As for the bulk-array interface. I have checked the:

git://git.kernel.org/pub/scm/linux/kernel/git/mel/linux.git mm-bulk-rebase-v6r2

applied the patch that is in question + below one:


@@ -2503,25 +2498,13 @@ static void *__vmalloc_area_node(struct vm_struct 
*area, gfp_t gfp_mask,
area->pages = pages;
area->nr_pages = nr_pages;
 
-   for (i = 0; i < area->nr_pages; i++) {
-   struct page *page;
-
-   if (node == NUMA_NO_NODE)
-   page = alloc_page(gfp_mask);
-   else
-   page = alloc_pages_node(node, gfp_mask, 0);
-
-   if (unlikely(!page)) {
-   /* Successfully allocated i pages, free them in 
__vfree() */
-   area->nr_pages = i;
-   atomic_long_add(area->nr_pages, _vmalloc_pages);
-   goto fail;
-   }
-   area->pages[i] = page;
-   if (gfpflags_allow_blocking(gfp_mask))
-   cond_resched();
+   ret = alloc_pages_bulk_array(gfp_mask, area->nr_pages, area->pages);
+   if (ret == nr_pages)
+   atomic_long_add(area->nr_pages, _vmalloc_pages);
+   else {
+   area->nr_pages = ret;
+   goto fail;
}
-   atomic_long_add(area->nr_pages, _vmalloc_pages);


single CPU, 4MB allocation, 100 avg: 70639437 usec
single CPU, 4MB allocation, 100 avg: 89218654 usec

and now we get ~21% delta. That is very good :)

--
Vlad Rezki

Re: [RFC PATCH v2 5/8] arm64: Detect an FTRACE frame and mark a stack trace unreliable

2021-03-23 Thread Madhavan T. Venkataraman




On 3/23/21 8:36 AM, Mark Rutland wrote:
> On Tue, Mar 23, 2021 at 07:56:40AM -0500, Madhavan T. Venkataraman wrote:
>>
>>
>> On 3/23/21 5:51 AM, Mark Rutland wrote:
>>> On Mon, Mar 15, 2021 at 11:57:57AM -0500, madve...@linux.microsoft.com 
>>> wrote:
 From: "Madhavan T. Venkataraman" 

 When CONFIG_DYNAMIC_FTRACE_WITH_REGS is enabled and tracing is activated
 for a function, the ftrace infrastructure is called for the function at
 the very beginning. Ftrace creates two frames:

- One for the traced function

- One for the caller of the traced function

 That gives a reliable stack trace while executing in the ftrace
 infrastructure code. When ftrace returns to the traced function, the frames
 are popped and everything is back to normal.

 However, in cases like live patch, execution is redirected to a different
 function when ftrace returns. A stack trace taken while still in the ftrace
 infrastructure code will not show the target function. The target function
 is the real function that we want to track.

 So, if an FTRACE frame is detected on the stack, just mark the stack trace
 as unreliable.
>>>
>>> To identify this case, please identify the ftrace trampolines instead,
>>> e.g. ftrace_regs_caller, return_to_handler.
>>>
>>
>> Yes. As part of the return address checking, I will check this. IIUC, I 
>> think that
>> I need to check for the inner labels that are defined at the point where the
>> instructions are patched for ftrace. E.g., ftrace_call and ftrace_graph_call.
>>
>> SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
>> bl  ftrace_stub  <
>>
>> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) // ftrace_graph_caller();
>> nop  <===// If enabled, this will be replaced
>> // "b ftrace_graph_caller"
>> #endif
>>
>> For instance, the stack trace I got while tracing do_mmap() with the stack 
>> trace
>> tracer looks like this:
>>
>>   ...
>> [  338.911793]   trace_function+0xc4/0x160
>> [  338.911801]   function_stack_trace_call+0xac/0x130
>> [  338.911807]   ftrace_graph_call+0x0/0x4
>> [  338.911813]   do_mmap+0x8/0x598
>> [  338.911820]   vm_mmap_pgoff+0xf4/0x188
>> [  338.911826]   ksys_mmap_pgoff+0x1d8/0x220
>> [  338.911832]   __arm64_sys_mmap+0x38/0x50
>> [  338.911839]   el0_svc_common.constprop.0+0x70/0x1a8
>> [  338.911846]   do_el0_svc+0x2c/0x98
>> [  338.911851]   el0_svc+0x2c/0x70
>> [  338.911859]   el0_sync_handler+0xb0/0xb8
>> [  338.911864]   el0_sync+0x180/0x1c0
>>
>>> It'd be good to check *exactly* when we need to reject, since IIUC when
>>> we have a graph stack entry the unwind will be correct from livepatch's
>>> PoV.
>>>
>>
>> The current unwinder already handles this like this:
>>
>> #ifdef CONFIG_FUNCTION_GRAPH_TRACER
>> if (tsk->ret_stack &&
>> (ptrauth_strip_insn_pac(frame->pc) == (unsigned 
>> long)return_to_handler)) {
>> struct ftrace_ret_stack *ret_stack;
>> /*
>>  * This is a case where function graph tracer has
>>  * modified a return address (LR) in a stack frame
>>  * to hook a function return.
>>  * So replace it to an original value.
>>  */
>> ret_stack = ftrace_graph_get_ret_stack(tsk, frame->graph++);
>> if (WARN_ON_ONCE(!ret_stack))
>> return -EINVAL;
>> frame->pc = ret_stack->ret;
>> }
>> #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> 
> Beware that this handles the case where a function will return to
> return_to_handler, but doesn't handle unwinding from *within*
> return_to_handler, which we can't do reliably today, so that might need
> special handling.
> 

OK. I will take a look at this.

>> Is there anything else that needs handling here?
> 
> I wrote up a few cases to consider in:
> 
> https://www.kernel.org/doc/html/latest/livepatch/reliable-stacktrace.html
> 
> ... e.g. the "Obscuring of return addresses" case.
> 
> It might be that we're fine so long as we refuse to unwind across
> exception boundaries, but it needs some thought. We probably need to go
> over each of the trampolines instruction-by-instruction to consider
> that.
> 
> As mentioned above, within return_to_handler when we call
> ftrace_return_to_handler, there's a period where the real return address
> has been removed from the ftrace return stack, but hasn't yet been
> placed in x30, and wouldn't show up in a trace (e.g. if we could somehow
> hook the return from ftrace_return_to_handler).
> 
> We might be saved by the fact we'll mark traces across exception
> boundaries as unreliable, but I haven't thought very hard about it. We
> might want to explciitly reject unwinds within return_to_handler in case
> it's possible to

[PATCH] i915_vma: Rename vma_lookup to i915_vma_lookup

2021-03-23 Thread Liam Howlett

Use i915 prefix to avoid name collision with future vma_lookup() in mm.

Signed-off-by: Liam R. Howlett 
Reviewed-by: Matthew Wilcox (Oracle) 
---
 drivers/gpu/drm/i915/i915_vma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index caa9b041616b..ee0028c697f6 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -230,7 +230,7 @@ vma_create(struct drm_i915_gem_object *obj,
 }
 
 static struct i915_vma *
-vma_lookup(struct drm_i915_gem_object *obj,
+i915_vma_lookup(struct drm_i915_gem_object *obj,
   struct i915_address_space *vm,
   const struct i915_ggtt_view *view)
 {
@@ -278,7 +278,7 @@ i915_vma_instance(struct drm_i915_gem_object *obj,
GEM_BUG_ON(!atomic_read(>open));
 
spin_lock(>vma.lock);
-   vma = vma_lookup(obj, vm, view);
+   vma = i915_vma_lookup(obj, vm, view);
spin_unlock(>vma.lock);
 
/* vma_create() will resolve the race if another creates the vma */
-- 
2.30.0

Re: [PATCH 0/4] nfc: fix Resource leakage and endless loop

2021-03-23 Thread Greg KH

On Wed, Mar 03, 2021 at 02:16:50PM +0800, Xiaoming Ni wrote:
> fix Resource leakage and endless loop in net/nfc/llcp_sock.c,
>  reported by "kiyin(尹亮)".
> 
> Link: https://www.openwall.com/lists/oss-security/2020/11/01/1

What happened to this series?

Does it need to be resent against the latest networking tree?

thanks,

greg k-h

Re: [s390x vDSO Bug?] clock_gettime(CLOCK_MONOTONIC_RAW, ...) gets abnormal ts value

2021-03-23 Thread Heiko Carstens

On Tue, Mar 23, 2021 at 08:11:41AM +0100, Heiko Carstens wrote:
> On Tue, Mar 23, 2021 at 02:21:52PM +0800, Li Wang wrote:
> > Hi linux-s390 experts,
> > 
> > We observed that LTP/clock_gettime04 always FAIL on s390x with
> > kernel-v5.12-rc3.
> > To simply show the problem, I rewrite the LTP reproducer as a simple C
> > below.
> > Maybe it's a new bug introduced from the kernel-5.12 series branch?
> > 
> > PASS:
> > 
> > # uname -r
> > 5.11.0-*.s390x
> > 
> > # grep TIME_NS /boot/config-5.11.0-*.s390x
> > no TIME_NS enabled
> > 
> > ## ./test-timer
> > vdso_ts_nsec = 898169901815, vdso_ts.tv_sec = 898, vdso_ts.tv_nsec =
> > 169901815
> > sys_ts_nsec  = 898169904269, sys_ts.tv_sec  = 898, sys_ts.tv_nsec  =
> > 169904269
> > ===> PASS
> > 
> > FAIL:
> > --
> > # uname -r
> > 5.12.0-0.rc3.*.s390x
> > 
> > # grep TIME_NS /boot/config-5.12.0-0.rc3.s390x
> > CONFIG_TIME_NS=y
> > CONFIG_GENERIC_VDSO_TIME_NS=y
> > 
> > # ./test-timer
> > vdso_ts_nsec = 4484351380985507, vdso_ts.tv_sec = 4484351, vdso_ts.tv_nsec
> > = 380985507
> > sys_ts_nsec  = 1446923235377, sys_ts.tv_sec  = 1446, sys_ts.tv_nsec  =
> > 923235377
> > ===> FAIL
> 
> Thanks for reporting!
> 
> I'll look later today into this. I would nearly bet that I broke it
> with commit f8d8977a3d97 ("s390/time: convert tod_clock_base to
> union")

So, I broke it with commit 1ba2d6c0fd4e ("s390/vdso: simplify
__arch_get_hw_counter()"). Reverting that patch will fix it for non
time namespace processes only.

The problem is that the vdso data page contains an array of struct
vdso_data's for each clock source. However only the first member of
that array contains a/the valid struct arch_vdso_data, which is
required for __arch_get_hw_counter(). Which alone is a bit odd...

However for a process which is within a time namespace there is no
(easy) way to access that page (the time namespace specific vdso data
page does not contain valid arch_vdso_data). I guess the real fix is
to simply map yet another page into the vvar mapping and put the
arch_data there. What a mess... :/

[RFC PATCH v5 08/10] KVM: selftests: List all hugetlb src types specified with page sizes

2021-03-23 Thread Yanan Wang

With VM_MEM_SRC_ANONYMOUS_HUGETLB, we currently can only use system
default hugetlb pages to back the testing guest memory. In order to
add flexibility, now list all the known hugetlb backing src types with
different page sizes, so that we can specify use of hugetlb pages of the
exact granularity that we want. And as all the known hugetlb page sizes
are listed, it's appropriate for all architectures.

Besides, the helper get_backing_src_pagesz() is added to get the
granularity of different backing src types(anonumous, thp, hugetlb).

Suggested-by: Ben Gardon 
Signed-off-by: Yanan Wang 
---
 .../testing/selftests/kvm/include/test_util.h |  18 ++-
 tools/testing/selftests/kvm/lib/kvm_util.c|   2 +-
 tools/testing/selftests/kvm/lib/test_util.c   | 109 --
 3 files changed, 116 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/test_util.h 
b/tools/testing/selftests/kvm/include/test_util.h
index e087174eefe5..fade3130eb01 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -71,16 +71,32 @@ enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS,
VM_MEM_SRC_ANONYMOUS_THP,
VM_MEM_SRC_ANONYMOUS_HUGETLB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
+   VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+   NUM_SRC_TYPES,
 };
 
 struct vm_mem_backing_src_alias {
const char *name;
-   enum vm_mem_backing_src_type type;
+   uint32_t flag;
 };
 
 bool thp_configured(void);
 size_t get_trans_hugepagesz(void);
 size_t get_def_hugetlb_pagesz(void);
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
+size_t get_backing_src_pagesz(uint32_t i);
 void backing_src_help(void);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c 
b/tools/testing/selftests/kvm/lib/kvm_util.c
index 2ea837fe03af..3506174c2053 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -759,7 +759,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->mmap_start = mmap(NULL, region->mmap_size,
  PROT_READ | PROT_WRITE,
  MAP_PRIVATE | MAP_ANONYMOUS
- | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? 
MAP_HUGETLB : 0),
+ | vm_mem_backing_src_alias(src_type)->flag,
  -1, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
"test_malloc failed, mmap_start: %p errno: %i",
diff --git a/tools/testing/selftests/kvm/lib/test_util.c 
b/tools/testing/selftests/kvm/lib/test_util.c
index 665724ccab97..205408bffa38 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "linux/kernel.h"
 
 #include "test_util.h"
@@ -112,12 +113,6 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
 }
 
-const struct vm_mem_backing_src_alias backing_src_aliases[] = {
-   {"anonymous", VM_MEM_SRC_ANONYMOUS,},
-   {"anonymous_thp", VM_MEM_SRC_ANONYMOUS_THP,},
-   {"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,},
-};
-
 bool thp_configured(void)
 {
int ret;
@@ -171,22 +166,114 @@ size_t get_def_hugetlb_pagesz(void)
return 0;
 }
 
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
+{
+   static const struct vm_mem_backing_src_alias aliases[] = {
+   [VM_MEM_SRC_ANONYMOUS] = {
+   .name = "anonymous",
+   .flag = 0,
+   },
+   [VM_MEM_SRC_ANONYMOUS_THP] = {
+   .name = "anonymous_thp",
+   .flag = 0,
+   },
+   [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
+   .name = "anonymous_hugetlb",
+   .flag = MAP_HUGETLB,
+   },
+   [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
+   .name = "anonymous_hugetlb_16kb",
+   .flag = MAP_HUGETLB | MAP_HUGE_16KB,
+   },
+   [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
+   .name = "anonymous_hugetlb_64kb",
+   .flag = MAP_HUGETLB | MAP_HUGE_64KB,
+   },
+   [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB]

[RFC PATCH v5 07/10] KVM: selftests: Add a helper to get system default hugetlb page size

2021-03-23 Thread Yanan Wang

If HUGETLB is configured in the host kernel, then we can know the system
default hugetlb page size through *cat /proc/meminfo*. Otherwise, we will
not see the information of hugetlb pages in file /proc/meminfo if it's not
configured. So add a helper to determine whether HUGETLB is configured and
then get the default page size by reading /proc/meminfo.

This helper can be useful when a program wants to use the default hugetlb
pages of the system and doesn't know the default page size.

Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
---
 .../testing/selftests/kvm/include/test_util.h |  1 +
 tools/testing/selftests/kvm/lib/test_util.c   | 25 +++
 2 files changed, 26 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/test_util.h 
b/tools/testing/selftests/kvm/include/test_util.h
index ef24c76ba89a..e087174eefe5 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -80,6 +80,7 @@ struct vm_mem_backing_src_alias {
 
 bool thp_configured(void);
 size_t get_trans_hugepagesz(void);
+size_t get_def_hugetlb_pagesz(void);
 void backing_src_help(void);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 
diff --git a/tools/testing/selftests/kvm/lib/test_util.c 
b/tools/testing/selftests/kvm/lib/test_util.c
index efc1a7782de0..665724ccab97 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -146,6 +146,31 @@ size_t get_trans_hugepagesz(void)
return size;
 }
 
+size_t get_def_hugetlb_pagesz(void)
+{
+   char buf[64];
+   const char *tag = "Hugepagesize:";
+   FILE *f;
+
+   f = fopen("/proc/meminfo", "r");
+   TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo");
+
+   while (fgets(buf, sizeof(buf), f) != NULL) {
+   if (strstr(buf, tag) == buf) {
+   fclose(f);
+   return strtoull(buf + strlen(tag), NULL, 10) << 10;
+   }
+   }
+
+   if (feof(f))
+   TEST_FAIL("HUGETLB is not configured in host kernel");
+   else
+   TEST_FAIL("Error in reading /proc/meminfo");
+
+   fclose(f);
+   return 0;
+}
+
 void backing_src_help(void)
 {
int i;
-- 
2.19.1

[RFC PATCH v5 09/10] KVM: selftests: Adapt vm_userspace_mem_region_add to new helpers

2021-03-23 Thread Yanan Wang

With VM_MEM_SRC_ANONYMOUS_THP specified in vm_userspace_mem_region_add(),
we have to get the transparent hugepage size for HVA alignment. With the
new helpers, we can use get_backing_src_pagesz() to check whether THP is
configured and then get the exact configured hugepage size.

As different architectures may have different THP page sizes configured,
this can get the accurate THP page sizes on any platform.

Signed-off-by: Yanan Wang 
Reviewed-by: Ben Gardon 
Reviewed-by: Andrew Jones 
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 28 +++---
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c 
b/tools/testing/selftests/kvm/lib/kvm_util.c
index 3506174c2053..c7a2228deaf3 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -18,7 +18,6 @@
 #include 
 #include 
 
-#define KVM_UTIL_PGS_PER_HUGEPG 512
 #define KVM_UTIL_MIN_PFN   2
 
 static int vcpu_mmap_sz(void);
@@ -688,7 +687,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 {
int ret;
struct userspace_mem_region *region;
-   size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+   size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
size_t alignment;
 
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
@@ -750,7 +749,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 #endif
 
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
-   alignment = max(huge_page_size, alignment);
+   alignment = max(backing_src_pagesz, alignment);
 
/* Add enough memory to align up if necessary */
if (alignment > 1)
@@ -769,22 +768,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->host_mem = align(region->mmap_start, alignment);
 
/* As needed perform madvise */
-   if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == 
VM_MEM_SRC_ANONYMOUS_THP) {
-   struct stat statbuf;
-
-   ret = stat("/sys/kernel/mm/transparent_hugepage", );
-   TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
-   "stat /sys/kernel/mm/transparent_hugepage");
-
-   TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
-   "VM_MEM_SRC_ANONYMOUS_THP requires THP to be 
configured in the host kernel");
-
-   if (ret == 0) {
-   ret = madvise(region->host_mem, npages * vm->page_size,
- src_type == VM_MEM_SRC_ANONYMOUS ? 
MADV_NOHUGEPAGE : MADV_HUGEPAGE);
-   TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 
0x%lx src_type: %x",
-   region->host_mem, npages * vm->page_size, 
src_type);
-   }
+   if ((src_type == VM_MEM_SRC_ANONYMOUS ||
+src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
+   ret = madvise(region->host_mem, npages * vm->page_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? 
MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+   TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx 
src_type: %s",
+   region->host_mem, npages * vm->page_size,
+   vm_mem_backing_src_alias(src_type)->name);
}
 
region->unused_phy_pages = sparsebit_alloc();
-- 
2.19.1

[RFC PATCH v5 10/10] KVM: selftests: Add a test for kvm page table code

2021-03-23 Thread Yanan Wang

This test serves as a performance tester and a bug reproducer for
kvm page table code (GPA->HPA mappings), so it gives guidance for
people trying to make some improvement for kvm.

The function guest_code() can cover the conditions where a single vcpu or
multiple vcpus access guest pages within the same memory region, in three
VM stages(before dirty logging, during dirty logging, after dirty logging).
Besides, the backing src memory type(ANONYMOUS/THP/HUGETLB) of the tested
memory region can be specified by users, which means normal page mappings
or block mappings can be chosen by users to be created in the test.

If ANONYMOUS memory is specified, kvm will create normal page mappings
for the tested memory region before dirty logging, and update attributes
of the page mappings from RO to RW during dirty logging. If THP/HUGETLB
memory is specified, kvm will create block mappings for the tested memory
region before dirty logging, and split the blcok mappings into normal page
mappings during dirty logging, and coalesce the page mappings back into
block mappings after dirty logging is stopped.

So in summary, as a performance tester, this test can present the
performance of kvm creating/updating normal page mappings, or the
performance of kvm creating/splitting/recovering block mappings,
through execution time.

When we need to coalesce the page mappings back to block mappings after
dirty logging is stopped, we have to firstly invalidate *all* the TLB
entries for the page mappings right before installation of the block entry,
because a TLB conflict abort error could occur if we can't invalidate the
TLB entries fully. We have hit this TLB conflict twice on aarch64 software
implementation and fixed it. As this test can imulate process from dirty
logging enabled to dirty logging stopped of a VM with block mappings,
so it can also reproduce this TLB conflict abort due to inadequate TLB
invalidation when coalescing tables.

Signed-off-by: Yanan Wang 
Reviewed-by: Ben Gardon 
---
 tools/testing/selftests/kvm/.gitignore|   1 +
 tools/testing/selftests/kvm/Makefile  |   3 +
 .../selftests/kvm/kvm_page_table_test.c   | 512 ++
 3 files changed, 516 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/kvm_page_table_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore 
b/tools/testing/selftests/kvm/.gitignore
index 32b87cc77c8e..137ab7273be6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -35,6 +35,7 @@
 /dirty_log_perf_test
 /hardware_disable_test
 /kvm_create_max_vcpus
+/kvm_page_table_test
 /memslot_modification_stress_test
 /set_memory_region_test
 /steal_time
diff --git a/tools/testing/selftests/kvm/Makefile 
b/tools/testing/selftests/kvm/Makefile
index a6d61f451f88..75dc57db36b4 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -69,6 +69,7 @@ TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
 TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += kvm_page_table_test
 TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
@@ -79,6 +80,7 @@ TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += kvm_page_table_test
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
 
@@ -88,6 +90,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
 TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += kvm_page_table_test
 TEST_GEN_PROGS_s390x += set_memory_region_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c 
b/tools/testing/selftests/kvm/kvm_page_table_test.c
new file mode 100644
index ..bbd5c489d61f
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -0,0 +1,512 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM page table test
+ *
+ * Copyright (C) 2021, Huawei, Inc.
+ *
+ * Make sure that THP has been enabled or enough HUGETLB pages with specific
+ * page size have been pre-allocated on your system, if you are planning to
+ * use hugepages to back the guest memory for testing.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define TEST_MEM_SLOT_INDEX 1
+
+/* Default size(1GB) of the memory for testing */
+#define DEFAULT_TEST_MEM_SIZE  (1 << 30)
+
+/* Default guest test virtual memory offset */
+#define

[RFC PATCH v5 06/10] KVM: selftests: Add a helper to get system configured THP page size

2021-03-23 Thread Yanan Wang

If we want to have some tests about transparent hugepages, the system
configured THP hugepage size should better be known by the tests, which
can be used for kinds of alignment or guest memory accessing of vcpus...
So it makes sense to add a helper to get the transparent hugepage size.

With VM_MEM_SRC_ANONYMOUS_THP specified in vm_userspace_mem_region_add(),
we now stat /sys/kernel/mm/transparent_hugepage to check whether THP is
configured in the host kernel before madvise(). Based on this, we can also
read file /sys/kernel/mm/transparent_hugepage/hpage_pmd_size to get THP
hugepage size.

Signed-off-by: Yanan Wang 
Reviewed-by: Ben Gardon 
---
 .../testing/selftests/kvm/include/test_util.h |  2 ++
 tools/testing/selftests/kvm/lib/test_util.c   | 29 +++
 2 files changed, 31 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/test_util.h 
b/tools/testing/selftests/kvm/include/test_util.h
index b7f41399f22c..ef24c76ba89a 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -78,6 +78,8 @@ struct vm_mem_backing_src_alias {
enum vm_mem_backing_src_type type;
 };
 
+bool thp_configured(void);
+size_t get_trans_hugepagesz(void);
 void backing_src_help(void);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 
diff --git a/tools/testing/selftests/kvm/lib/test_util.c 
b/tools/testing/selftests/kvm/lib/test_util.c
index c7c0627c6842..efc1a7782de0 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "linux/kernel.h"
 
 #include "test_util.h"
@@ -117,6 +118,34 @@ const struct vm_mem_backing_src_alias 
backing_src_aliases[] = {
{"anonymous_hugetlb", VM_MEM_SRC_ANONYMOUS_HUGETLB,},
 };
 
+bool thp_configured(void)
+{
+   int ret;
+   struct stat statbuf;
+
+   ret = stat("/sys/kernel/mm/transparent_hugepage", );
+   TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
+   "Error in stating /sys/kernel/mm/transparent_hugepage");
+
+   return ret == 0;
+}
+
+size_t get_trans_hugepagesz(void)
+{
+   size_t size;
+   FILE *f;
+
+   TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+   f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
+   TEST_ASSERT(f != NULL, "Error in opening 
transparent_hugepage/hpage_pmd_size");
+
+   fscanf(f, "%ld", );
+   fclose(f);
+
+   return size;
+}
+
 void backing_src_help(void)
 {
int i;
-- 
2.19.1

[RFC PATCH v5 04/10] KVM: selftests: Print the errno besides error-string in TEST_ASSERT

2021-03-23 Thread Yanan Wang

Print the errno besides error-string in TEST_ASSERT in the format of
"errno=%d - %s" will explicitly indicate that the string is an error
information. Besides, the errno is easier to be used for debugging
than the error-string.

Suggested-by: Andrew Jones 
Signed-off-by: Yanan Wang 
---
 tools/testing/selftests/kvm/lib/assert.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/assert.c 
b/tools/testing/selftests/kvm/lib/assert.c
index 5ebbd0d6b472..71ade6100fd3 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -71,9 +71,9 @@ test_assert(bool exp, const char *exp_str,
 
fprintf(stderr, " Test Assertion Failure \n"
"  %s:%u: %s\n"
-   "  pid=%d tid=%d - %s\n",
+   "  pid=%d tid=%d errno=%d - %s\n",
file, line, exp_str, getpid(), _gettid(),
-   strerror(errno));
+   errno, strerror(errno));
test_dump_stack();
if (fmt) {
fputs("  ", stderr);
-- 
2.19.1

[RFC PATCH v5 03/10] KVM: selftests: Use flag CLOCK_MONOTONIC_RAW for timing

2021-03-23 Thread Yanan Wang

In addition to function of CLOCK_MONOTONIC, flag CLOCK_MONOTONIC_RAW can
also shield possiable impact of NTP, which can provide more robustness.

Suggested-by: Vitaly Kuznetsov 
Signed-off-by: Yanan Wang 
Reviewed-by: Ben Gardon 
Reviewed-by: Andrew Jones 
---
 tools/testing/selftests/kvm/demand_paging_test.c  |  8 
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 14 +++---
 tools/testing/selftests/kvm/lib/test_util.c   |  2 +-
 tools/testing/selftests/kvm/steal_time.c  |  4 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/kvm/demand_paging_test.c 
b/tools/testing/selftests/kvm/demand_paging_test.c
index 5f7a229c3af1..efbf0c1e9130 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -53,7 +53,7 @@ static void *vcpu_worker(void *data)
vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
run = vcpu_state(vm, vcpu_id);
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
 
/* Let the guest access its memory */
ret = _vcpu_run(vm, vcpu_id);
@@ -86,7 +86,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
copy.len = perf_test_args.host_page_size;
copy.mode = 0;
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
 
r = ioctl(uffd, UFFDIO_COPY, );
if (r == -1) {
@@ -123,7 +123,7 @@ static void *uffd_handler_thread_fn(void *arg)
struct timespec start;
struct timespec ts_diff;
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
while (!quit_uffd_thread) {
struct uffd_msg msg;
struct pollfd pollfd[2];
@@ -336,7 +336,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
pr_info("Finished creating vCPUs and starting uffd threads\n");
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
 
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
pthread_create(_threads[vcpu_id], NULL, vcpu_worker,
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c 
b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 04a2641261be..6cff4ccf9525 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -50,7 +50,7 @@ static void *vcpu_worker(void *data)
while (!READ_ONCE(host_quit)) {
int current_iteration = READ_ONCE(iteration);
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
ret = _vcpu_run(vm, vcpu_id);
ts_diff = timespec_elapsed(start);
 
@@ -141,7 +141,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration = 0;
host_quit = false;
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
vcpu_last_completed_iteration[vcpu_id] = -1;
 
@@ -162,7 +162,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
ts_diff.tv_sec, ts_diff.tv_nsec);
 
/* Enable dirty logging */
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
KVM_MEM_LOG_DIRTY_PAGES);
ts_diff = timespec_elapsed(start);
@@ -174,7 +174,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 * Incrementing the iteration number will start the vCPUs
 * dirtying memory again.
 */
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
iteration++;
 
pr_debug("Starting iteration %d\n", iteration);
@@ -189,7 +189,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
 
ts_diff = timespec_elapsed(start);
@@ -199,7 +199,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
if (dirty_log_manual_caps) {
-   clock_gettime(CLOCK_MONOTONIC, );
+   clock_gettime(CLOCK_MONOTONIC_RAW, );
kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, 
bmap, 0,
   host_num_pages);
 
@@ -212,7 +212,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
 
/* Disable dirty

[RFC PATCH v5 00/10] KVM: selftests: some improvement and a new test for kvm page table

2021-03-23 Thread Yanan Wang

Hi,
This v5 series can mainly include two parts.
Based on kvm queue branch: 
https://git.kernel.org/pub/scm/virt/kvm/kvm.git/log/?h=queue

In the first part, all the known hugetlb backing src types specified
with different hugepage sizes are listed, so that we can specify use
of hugetlb source of the exact granularity that we want, instead of
the system default ones. And as all the known hugetlb page sizes are
listed, it's appropriate for all architectures. Besides, a helper that
can get granularity of different backing src types(anonumous/thp/hugetlb)
is added, so that we can use the accurate backing src granularity for
kinds of alignment or guest memory accessing of vcpus.

In the second part, a new test is added:
This test is added to serve as a performance tester and a bug reproducer
for kvm page table code (GPA->HPA mappings), it gives guidance for the
people trying to make some improvement for kvm. And the following explains
what we can exactly do through this test.

The function guest_code() can cover the conditions where a single vcpu or
multiple vcpus access guest pages within the same memory region, in three
VM stages(before dirty logging, during dirty logging, after dirty logging).
Besides, the backing src memory type(ANONYMOUS/THP/HUGETLB) of the tested
memory region can be specified by users, which means normal page mappings
or block mappings can be chosen by users to be created in the test.

If ANONYMOUS memory is specified, kvm will create normal page mappings
for the tested memory region before dirty logging, and update attributes
of the page mappings from RO to RW during dirty logging. If THP/HUGETLB
memory is specified, kvm will create block mappings for the tested memory
region before dirty logging, and split the blcok mappings into normal page
mappings during dirty logging, and coalesce the page mappings back into
block mappings after dirty logging is stopped.

So in summary, as a performance tester, this test can present the
performance of kvm creating/updating normal page mappings, or the
performance of kvm creating/splitting/recovering block mappings,
through execution time.

When we need to coalesce the page mappings back to block mappings after
dirty logging is stopped, we have to firstly invalidate *all* the TLB
entries for the page mappings right before installation of the block entry,
because a TLB conflict abort error could occur if we can't invalidate the
TLB entries fully. We have hit this TLB conflict twice on aarch64 software
implementation and fixed it. As this test can imulate process from dirty
logging enabled to dirty logging stopped of a VM with block mappings,
so it can also reproduce this TLB conflict abort due to inadequate TLB
invalidation when coalescing tables.

Links about the TLB conflict abort:
https://lore.kernel.org/lkml/20201201201034.116760-3-wangyana...@huawei.com/

---

Change logs:

v4->v5:
- Use synchronization(sem_wait) for time measurement
- Add a new patch about TEST_ASSERT(patch 4)
- Address Andrew Jones's comments for v4 series
- Add Andrew Jones's R-b tags in some patches
- v4: 
https://lore.kernel.org/lkml/20210302125751.19080-1-wangyana...@huawei.com/

v3->v4:
- Add a helper to get system default hugetlb page size
- Add tags of Reviewed-by of Ben in the patches
- v3: 
https://lore.kernel.org/lkml/20210301065916.11484-1-wangyana...@huawei.com/

v2->v3:
- Add tags of Suggested-by, Reviewed-by in the patches
- Add a generic micro to get hugetlb page sizes
- Some changes for suggestions about v2 series
- v2: 
https://lore.kernel.org/lkml/20210225055940.18748-1-wangyana...@huawei.com/

v1->v2:
- Add a patch to sync header files
- Add helpers to get granularity of different backing src types
- Some changes for suggestions about v1 series
- v1: 
https://lore.kernel.org/lkml/20210208090841.333724-1-wangyana...@huawei.com/

---

Yanan Wang (10):
  tools headers: sync headers of asm-generic/hugetlb_encode.h
  tools headers: Add a macro to get HUGETLB page sizes for mmap
  KVM: selftests: Use flag CLOCK_MONOTONIC_RAW for timing
  KVM: selftests: Print the errno besides error-string in TEST_ASSERT
  KVM: selftests: Make a generic helper to get vm guest mode strings
  KVM: selftests: Add a helper to get system configured THP page size
  KVM: selftests: Add a helper to get system default hugetlb page size
  KVM: selftests: List all hugetlb src types specified with page sizes
  KVM: selftests: Adapt vm_userspace_mem_region_add to new helpers
  KVM: selftests: Add a test for kvm page table code

 include/uapi/linux/mman.h |   2 +
 tools/include/asm-generic/hugetlb_encode.h|   3 +
 tools/include/uapi/linux/mman.h   |   2 +
 tools/testing/selftests/kvm/.gitignore|   1 +
 tools/testing/selftests/kvm/Makefile  |   3 +
 .../selftests/kvm/demand_paging_test.c|   8 +-
 .../selftests/kvm/dirty_log_perf_test.c   |  14 +-
 .../testing/selftests/kvm/include/kvm_util.h  |   4 +-

[RFC PATCH v5 01/10] tools headers: sync headers of asm-generic/hugetlb_encode.h

2021-03-23 Thread Yanan Wang

This patch syncs contents of tools/include/asm-generic/hugetlb_encode.h
and include/uapi/asm-generic/hugetlb_encode.h. Arch powerpc supports 16KB
hugepages and ARM64 supports 32MB/512MB hugepages. The corresponding mmap
flags have already been added in include/uapi/asm-generic/hugetlb_encode.h,
but not tools/include/asm-generic/hugetlb_encode.h.

Cc: Ingo Molnar 
Cc: Adrian Hunter 
Cc: Jiri Olsa 
Cc: Arnaldo Carvalho de Melo 
Signed-off-by: Yanan Wang 
Reviewed-by: Ben Gardon 
---
 tools/include/asm-generic/hugetlb_encode.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/include/asm-generic/hugetlb_encode.h 
b/tools/include/asm-generic/hugetlb_encode.h
index e4732d3c2998..4f3d5aaa11f5 100644
--- a/tools/include/asm-generic/hugetlb_encode.h
+++ b/tools/include/asm-generic/hugetlb_encode.h
@@ -20,13 +20,16 @@
 #define HUGETLB_FLAG_ENCODE_SHIFT  26
 #define HUGETLB_FLAG_ENCODE_MASK   0x3f
 
+#define HUGETLB_FLAG_ENCODE_16KB   (14 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_64KB   (16 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_512KB  (19 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_1MB(20 << 
HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_2MB(21 << 
HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_8MB(23 << 
HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_16MB   (24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB   (25 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_256MB  (28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB  (29 << HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_1GB(30 << 
HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_2GB(31 << 
HUGETLB_FLAG_ENCODE_SHIFT)
 #define HUGETLB_FLAG_ENCODE_16GB   (34 << HUGETLB_FLAG_ENCODE_SHIFT)
-- 
2.19.1

[RFC PATCH v5 05/10] KVM: selftests: Make a generic helper to get vm guest mode strings

2021-03-23 Thread Yanan Wang

For generality and conciseness, make an API which can be used in all
kvm libs and selftests to get vm guest mode strings. And the index i
is checked in the API in case of possiable faults.

Suggested-by: Sean Christopherson 
Signed-off-by: Yanan Wang 
Reviewed-by: Andrew Jones 
Reviewed-by: Ben Gardon 
---
 .../testing/selftests/kvm/include/kvm_util.h  |  4 +--
 tools/testing/selftests/kvm/lib/kvm_util.c| 29 ---
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util.h 
b/tools/testing/selftests/kvm/include/kvm_util.h
index 2d7eb6989e83..f52a7492f47f 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -68,9 +68,6 @@ enum vm_guest_mode {
 #define MIN_PAGE_SIZE  (1U << MIN_PAGE_SHIFT)
 #define PTES_PER_MIN_PAGE  ptes_per_page(MIN_PAGE_SIZE)
 
-#define vm_guest_mode_string(m) vm_guest_mode_string[m]
-extern const char * const vm_guest_mode_string[];
-
 struct vm_guest_mode_params {
unsigned int pa_bits;
unsigned int va_bits;
@@ -84,6 +81,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap 
*cap);
 int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_enable_cap *cap);
 void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
 
 struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int 
perm);
 void kvm_vm_free(struct kvm_vm *vmp);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c 
b/tools/testing/selftests/kvm/lib/kvm_util.c
index e5fbf16f725b..2ea837fe03af 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -143,17 +143,24 @@ static void vm_open(struct kvm_vm *vm, int perm)
"rc: %i errno: %i", vm->fd, errno);
 }
 
-const char * const vm_guest_mode_string[] = {
-   "PA-bits:52,  VA-bits:48,  4K pages",
-   "PA-bits:52,  VA-bits:48, 64K pages",
-   "PA-bits:48,  VA-bits:48,  4K pages",
-   "PA-bits:48,  VA-bits:48, 64K pages",
-   "PA-bits:40,  VA-bits:48,  4K pages",
-   "PA-bits:40,  VA-bits:48, 64K pages",
-   "PA-bits:ANY, VA-bits:48,  4K pages",
-};
-_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
-  "Missing new mode strings?");
+const char *vm_guest_mode_string(uint32_t i)
+{
+   static const char * const strings[] = {
+   [VM_MODE_P52V48_4K] = "PA-bits:52,  VA-bits:48,  4K pages",
+   [VM_MODE_P52V48_64K]= "PA-bits:52,  VA-bits:48, 64K pages",
+   [VM_MODE_P48V48_4K] = "PA-bits:48,  VA-bits:48,  4K pages",
+   [VM_MODE_P48V48_64K]= "PA-bits:48,  VA-bits:48, 64K pages",
+   [VM_MODE_P40V48_4K] = "PA-bits:40,  VA-bits:48,  4K pages",
+   [VM_MODE_P40V48_64K]= "PA-bits:40,  VA-bits:48, 64K pages",
+   [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48,  4K pages",
+   };
+   _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
+  "Missing new mode strings?");
+
+   TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
+
+   return strings[i];
+}
 
 const struct vm_guest_mode_params vm_guest_mode_params[] = {
{ 52, 48,  0x1000, 12 },
-- 
2.19.1

[RFC PATCH v5 02/10] tools headers: Add a macro to get HUGETLB page sizes for mmap

2021-03-23 Thread Yanan Wang

We know that if a system supports multiple hugetlb page sizes,
the desired hugetlb page size can be specified in bits [26:31]
of the flag arguments. The value in these 6 bits will be the
shift of each hugetlb page size.

So add a macro to get the page size shift and then calculate the
corresponding hugetlb page size, using flag x.

Cc: Ben Gardon 
Cc: Ingo Molnar 
Cc: Adrian Hunter 
Cc: Jiri Olsa 
Cc: Arnaldo Carvalho de Melo 
Cc: Arnd Bergmann 
Cc: Michael Kerrisk 
Cc: Thomas Gleixner 
Suggested-by: Ben Gardon 
Signed-off-by: Yanan Wang 
Reviewed-by: Ben Gardon 
---
 include/uapi/linux/mman.h   | 2 ++
 tools/include/uapi/linux/mman.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index f55bc680b5b0..d72df73b182d 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -41,4 +41,6 @@
 #define MAP_HUGE_2GB   HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB  HUGETLB_FLAG_ENCODE_16GB
 
+#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK))
+
 #endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index f55bc680b5b0..d72df73b182d 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -41,4 +41,6 @@
 #define MAP_HUGE_2GB   HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB  HUGETLB_FLAG_ENCODE_16GB
 
+#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK))
+
 #endif /* _UAPI_LINUX_MMAN_H */
-- 
2.19.1

Re: [syzbot] WARNING in io_wq_put

2021-03-23 Thread Jens Axboe

On 3/20/21 6:44 PM, syzbot wrote:
> syzbot has found a reproducer for the following issue on:
> 
> HEAD commit:1c273e10 Merge tag 'zonefs-5.12-rc4' of git://git.kernel.o..
> git tree:   upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=13853506d0
> kernel config:  https://syzkaller.appspot.com/x/.config?x=c51293a9ca630f6d
> dashboard link: https://syzkaller.appspot.com/bug?extid=77a738a6bc947bf639ca
> compiler:   Debian clang version 11.0.1-2
> syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=11ec259ed0
> C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=13acfa62d0
> 
> IMPORTANT: if you fix the issue, please add the following tag to the commit:
> Reported-by: syzbot+77a738a6bc947bf63...@syzkaller.appspotmail.com

#syz test: git://git.kernel.dk/linux-block wq-no-manager

-- 
Jens Axboe

Re: [RFC PATCH 2/2] integrity: double check iint_cache was initialized

2021-03-23 Thread Tetsuo Handa

On 2021/03/23 22:37, Tetsuo Handa wrote:
> On 2021/03/23 21:09, Mimi Zohar wrote:
>> Please take a look at the newer version of this patch.   Do you want to
>> add any tags?
> 
> Oh, I didn't know that you already posted the newer version.
> 
>> diff --git a/security/integrity/iint.c b/security/integrity/iint.c
>> index 1d20003243c3..0ba01847e836 100644
>> --- a/security/integrity/iint.c
>> +++ b/security/integrity/iint.c
>> @@ -98,6 +98,14 @@ struct integrity_iint_cache *integrity_inode_get(struct 
>> inode *inode)
>>  struct rb_node *node, *parent = NULL;
>>  struct integrity_iint_cache *iint, *test_iint;
>>  
>> +/*
>> + * The integrity's "iint_cache" is initialized at security_init(),
>> + * unless it is not included in the ordered list of LSMs enabled
>> + * on the boot command line.
>> + */
>> +if (!iint_cache)
>> +panic("%s: lsm=integrity required.\n", __func__);
>> +
> 
> This looks strange. If "lsm=" parameter must include "integrity",
> it implies that nobody is allowed to disable "integrity" at boot.
> Then, why not unconditionally call integrity_iintcache_init() by
> not counting on DEFINE_LSM(integrity) declaration?

Or, I think below one is also possible.

diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index 1d20003243c3..37afc5168891 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "integrity.h"
 
 static struct rb_root integrity_iint_tree = RB_ROOT;
@@ -85,6 +86,20 @@ static void iint_free(struct integrity_iint_cache *iint)
kmem_cache_free(iint_cache, iint);
 }
 
+static void init_once(void *foo)
+{
+   struct integrity_iint_cache *iint = foo;
+
+   memset(iint, 0, sizeof(*iint));
+   iint->ima_file_status = INTEGRITY_UNKNOWN;
+   iint->ima_mmap_status = INTEGRITY_UNKNOWN;
+   iint->ima_bprm_status = INTEGRITY_UNKNOWN;
+   iint->ima_read_status = INTEGRITY_UNKNOWN;
+   iint->ima_creds_status = INTEGRITY_UNKNOWN;
+   iint->evm_status = INTEGRITY_UNKNOWN;
+   mutex_init(>mutex);
+}
+
 /**
  * integrity_inode_get - find or allocate an iint associated with an inode
  * @inode: pointer to the inode
@@ -102,6 +117,18 @@ struct integrity_iint_cache *integrity_inode_get(struct 
inode *inode)
if (iint)
return iint;
 
+   if (!iint_cache) {
+   static DEFINE_MUTEX(lock);
+   unsigned int flags = memalloc_nofs_save();
+
+   mutex_lock();
+   if (!iint_cache)
+   iint_cache = kmem_cache_create("iint_cache",
+  sizeof(struct 
integrity_iint_cache),
+  0, SLAB_PANIC, 
init_once);
+   mutex_unlock();
+   memalloc_nofs_restore(flags);
+   }
iint = kmem_cache_alloc(iint_cache, GFP_NOFS);
if (!iint)
return NULL;
@@ -150,25 +177,8 @@ void integrity_inode_free(struct inode *inode)
iint_free(iint);
 }
 
-static void init_once(void *foo)
-{
-   struct integrity_iint_cache *iint = foo;
-
-   memset(iint, 0, sizeof(*iint));
-   iint->ima_file_status = INTEGRITY_UNKNOWN;
-   iint->ima_mmap_status = INTEGRITY_UNKNOWN;
-   iint->ima_bprm_status = INTEGRITY_UNKNOWN;
-   iint->ima_read_status = INTEGRITY_UNKNOWN;
-   iint->ima_creds_status = INTEGRITY_UNKNOWN;
-   iint->evm_status = INTEGRITY_UNKNOWN;
-   mutex_init(>mutex);
-}
-
 static int __init integrity_iintcache_init(void)
 {
-   iint_cache =
-   kmem_cache_create("iint_cache", sizeof(struct integrity_iint_cache),
- 0, SLAB_PANIC, init_once);
return 0;
 }
 DEFINE_LSM(integrity) = {

Re: [PATCH] mmc: core: Mark mmc_host device with pm_runtime_no_callbacks

2021-03-23 Thread Ulf Hansson

On Tue, 23 Mar 2021 at 11:49, hieagle  wrote:
>
> We encounter a resume issue in our device sometimes. The mmc device's
> parent list is
> mmc0:0001->mmc_host mmc0->fa63.mmc->soc in our soc. We found in the blow
> case with mmc0->power.disable_depth=0 the mmc_runtime_resume will be skipped,
> which cause subsequent mmc command fail.
>
> mmc_get_card(mmc0:0001)->pm_runtime_get_sync->rpm_resume(mmc0:0001)->rpm_resume(mmc0)
> The rpm_resume(mmc0) return -ENOSYS due to no callback and
> mmc0->power.runtime_status
> keep RPM_SUSPENDED. This lead to rpm_resume(mmc0:0001) return -EBUSY and skip
> rpm_callback which call mmc_runtime_resume, the mmc is still in
> suspended and the
> subsequent mmc command fail.
>
> [  198.856157] Call trace:
> [  198.858917] [] dump_backtrace+0x0/0x1cc
> [  198.864966] [] show_stack+0x14/0x1c
> [  198.870627] [] dump_stack+0xa8/0xe0
> [  198.876288] [] rpm_resume+0x850/0x938
> [  198.882141] [] rpm_resume+0x250/0x938
> [  198.887994] [] __pm_runtime_resume+0x50/0x74
> [  198.894530] [] mmc_get_card+0x3c/0xb8
> [  198.900388] [] mmc_blk_issue_rq+0x2b0/0x4d8
> [  198.906824] [] mmc_queue_thread+0xdc/0x198
> [  198.913165] [] kthread+0xec/0x100
> [  198.918632] [] ret_from_fork+0x10/0x40
> [  198.924582] mmc0  callback   (null)
> [  198.935837] mmcblk mmc0:0001: __pm_runtime_resume ret -16
>
> Mark mmc_host device with pm_runtime_no_callbacks will solve the issue.
> Thanks.
> Huanlin Ke

Thanks for sharing more details! I have to admit, that this sounds
quite weird to me. I wonder if this is a problem that deserves to be
fixed in the runtime PM core

Let me have a closer look a get back to you again. Please be patient
though, I have a busy week in front of me.

Kind regards
Uffe

>
> Ulf Hansson  于2021年3月22日周一 下午6:26写道：
> >
> > On Sat, 20 Mar 2021 at 05:57, kehuanlin  wrote:
> > >
> > > The rpm_resume() will call parent's resume callback recursively.
> > > Since mmc_host has no its own pm_runtime callbacks, the mmc devices
> > > may fail to resume (-ENOSYS in rpm_callback) sometimes. Mark mmc_host
> > > device with pm_runtime_no_callbacks can fix the issue.
> >
> > Can you please elaborate more on this? What do you mean by "sometimes"?
> >
> > More precisely, how do you trigger the rpm_callback() for mmc class
> > device to return -ENOSYS?
> >
> > Don't get me wrong, the patch is fine, but I want to understand if it
> > actually solves a problem for you - or that it's better considered as
> > an optimization?
> >
> > Kind regards
> > Uffe
> >
> > >
> > > Signed-off-by: kehuanlin 
> > > ---
> > >  drivers/mmc/core/host.c | 2 ++
> > >  1 file changed, 2 insertions(+)
> > >
> > > diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
> > > index 9b89a91b6b47..177bebd9a6c4 100644
> > > --- a/drivers/mmc/core/host.c
> > > +++ b/drivers/mmc/core/host.c
> > > @@ -15,6 +15,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > @@ -480,6 +481,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct 
> > > device *dev)
> > > host->class_dev.class = _host_class;
> > > device_initialize(>class_dev);
> > > device_enable_async_suspend(>class_dev);
> > > +   pm_runtime_no_callbacks(>class_dev);
> > >
> > > if (mmc_gpio_alloc(host)) {
> > > put_device(>class_dev);
> > > --
> > > 2.30.0
> > >

Re: [RFC PATCH 2/2] mm,drm/ttm: Use VM_PFNMAP for TTM vmas

2021-03-23 Thread Jason Gunthorpe

On Sun, Mar 21, 2021 at 07:45:29PM +0100, Thomas Hellström (Intel) wrote:
> To block fast gup we need to make sure TTM ptes are always special.
> With MIXEDMAP we, on architectures that don't support pte_special,
> insert normal ptes, but OTOH on those architectures, fast is not
> supported.
> At the same time, the function documentation to vm_normal_page() suggests
> that ptes pointing to system memory pages of MIXEDMAP vmas are always
> normal, but that doesn't seem consistent with what's implemented in
> vmf_insert_mixed(). I'm thus not entirely sure this patch is actually
> needed.
> 
> But to make sure and to avoid also normal (non-fast) gup, make all
> TTM vmas PFNMAP. With PFNMAP we can't allow COW mappings
> anymore so make is_cow_mapping() available and use it to reject
> COW mappigs at mmap time.
> 
> There was previously a comment in the code that WC mappings together
> with x86 PAT + PFNMAP was bad for performance. However from looking at
> vmf_insert_mixed() it looks like in the current code PFNMAP and MIXEDMAP
> are handled the same for architectures that support pte_special. This
> means there should not be a performance difference anymore, but this
> needs to be verified.
> 
> Cc: Christian Koenig 
> Cc: David Airlie 
> Cc: Daniel Vetter 
> Cc: Andrew Morton 
> Cc: Jason Gunthorpe 
> Cc: linux...@kvack.org
> Cc: dri-de...@lists.freedesktop.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Thomas Hellström (Intel) 
>  drivers/gpu/drm/ttm/ttm_bo_vm.c | 22 --
>  include/linux/mm.h  |  5 +
>  mm/internal.h   |  5 -
>  3 files changed, 13 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> index 1c34983480e5..708c6fb9be81 100644
> +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> @@ -372,12 +372,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
>* at arbitrary times while the data is mmap'ed.
>* See vmf_insert_mixed_prot() for a discussion.
>*/
> - if (vma->vm_flags & VM_MIXEDMAP)
> - ret = vmf_insert_mixed_prot(vma, address,
> - __pfn_to_pfn_t(pfn, 
> PFN_DEV),
> - prot);
> - else
> - ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
> + ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
>  
>   /* Never error on prefaulted PTEs */
>   if (unlikely((ret & VM_FAULT_ERROR))) {
> @@ -555,18 +550,14 @@ static void ttm_bo_mmap_vma_setup(struct 
> ttm_buffer_object *bo, struct vm_area_s
>* Note: We're transferring the bo reference to
>* vma->vm_private_data here.
>*/
> -
>   vma->vm_private_data = bo;
>  
>   /*
> -  * We'd like to use VM_PFNMAP on shared mappings, where
> -  * (vma->vm_flags & VM_SHARED) != 0, for performance reasons,
> -  * but for some reason VM_PFNMAP + x86 PAT + write-combine is very
> -  * bad for performance. Until that has been sorted out, use
> -  * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719
> +  * PFNMAP forces us to block COW mappings in mmap(),
> +  * and with MIXEDMAP we would incorrectly allow fast gup
> +  * on TTM memory on architectures that don't have pte_special.
>*/
> - vma->vm_flags |= VM_MIXEDMAP;
> - vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
> + vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
>  }
>  
>  int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
> @@ -579,6 +570,9 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct 
> *vma,
>   if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START))
>   return -EINVAL;
>  
> + if (unlikely(is_cow_mapping(vma->vm_flags)))
> + return -EINVAL;
> +
>   bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
>   if (unlikely(!bo))
>   return -EINVAL;
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 77e64e3eac80..c6ebf7f9ddbb 100644
> +++ b/include/linux/mm.h
> @@ -686,6 +686,11 @@ static inline bool vma_is_accessible(struct 
> vm_area_struct *vma)
>   return vma->vm_flags & VM_ACCESS_FLAGS;
>  }
>  
> +static inline bool is_cow_mapping(vm_flags_t flags)
> +{
> + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
> +}

Most driver places are just banning VM_SHARED.

I see you copied this from remap_pfn_range(), but that logic is so
special I'm not sure..

Can the user mprotect the write back on with the above logic? Do we
need VM_DENYWRITE too?

Jason

Re: [kbuild-all] Re: include/linux/compiler_types.h:315:38: error: call to '__compiletime_assert_536' declared with attribute error: BUILD_BUG_ON failed: offsetof(struct can_frame, len) != offsetof(st

2021-03-23 Thread Rasmus Villemoes

On 23/03/2021 13.49, Oliver Hartkopp wrote:
> 
> 
> On 23.03.21 12:36, Rasmus Villemoes wrote:
>>
>> and more directly from the horse's mouth:
>>
>> https://developer.arm.com/documentation/dui0067/d/arm-compiler-reference/c-and-c---implementation-details/structures--unions--enumerations--and-bitfields
>>
>>
>> Field alignment
>>
>>  Structures are arranged with the first-named component at the lowest
>> address. Fields are aligned as follows:
>>
>>  A field with a char type is aligned to the next available byte.
>>
>>  A field with a short type is aligned to the next even-addressed
>> byte.
>>
>>  Bitfield alignment depends on how the bitfield is declared. See
>> Bitfields in packed structures for more information.
>>
>>  All other types are aligned on word boundaries.
>>
>> That anonymous union falls into the "All other types" bullet.
>>
>> __packed is the documented and standard way to overrule the
>> compiler's/ABI's layout decisions.
> 
> So why is there a difference between
> 
> gcc version 10.2.0
> 
> and
> 
> gcc version 10.2.1 20210110 (Debian 10.2.1-6)

I'm guessing there's no difference between those (in this respect), but
they are invoked differently.

> Would this mean that either STRUCTURE_SIZE_BOUNDARY or the command line
> option -mstructure_size_boundary=
> 
> are set differently?

Yes, though very likely -mstructure_size_boundary is not set explicitly
but via some other option.

gcc has a rather helpful but almost unknown feature that one can
actually query for lots of different parameters and their
default/current values. So on my Ubuntu system (20.04, gcc 9.3), for
example, if I do

$ arm-linux-gnueabihf-gcc -O2 -Q --help=target | grep struct
  -mstructure-size-boundary=8

So that would seem to say that the union should work as expected.
However, when I actually try to compile with the .config that kbuild
reports failing, I do see that BUILD_BUG_ON triggering.

So let us inspect the actual command line used to build some other
random .o file in net/can; look at net/can/.bcm.o.cmd

cmd_net/can/bcm.o := arm-linux-gnueabihf-gcc -Wp,-MMD,net/can/.bcm.o.d
-nostdinc -isystem /usr/lib/gcc-cross/arm-linux-gnueabihf/9/include
-I./arch/arm/include -I./arch/arm/include/generated  -I./include
-I./arch/arm/include/uapi -I./arch/arm/include/generated/uapi
-I./include/uapi -I./include/generated/uapi -include
./include/linux/compiler-version.h -include ./include/linux/kconfig.h
-include ./include/linux/compiler_types.h -D__KERNEL__ -mlittle-endian
-I./arch/arm/mach-footbridge/include -fmacro-prefix-map=./= -Wall
-Wundef -Werror=strict-prototypes -Wno-trigraphs -fno-strict-aliasing
-fno-common -fshort-wchar -fno-PIE -Werror=implicit-function-declaration
-Werror=implicit-int -Werror=return-type -Wno-format-security -std=gnu89
-fno-dwarf2-cfi-asm -mno-unaligned-access -fno-omit-frame-pointer -mapcs
-mno-sched-prolog -fno-ipa-sra -mabi=apcs-gnu -mno-thumb-interwork -marm
-Wa,-mno-warn-deprecated -D__LINUX_ARM_ARCH__=4 -march=armv4
-mtune=strongarm110 -msoft-float -Uarm -fno-delete-null-pointer-checks
-Wno-frame-address -Wno-format-truncation -Wno-format-overflow
-Wno-address-of-packed-member -O2 --param=allow-store-data-races=0
-Wframe-larger-than=1024 -fno-stack-protector
-Wno-unused-but-set-variable -Wimplicit-fallthrough
-Wno-unused-const-variable -fno-omit-frame-pointer
-fno-optimize-sibling-calls -fno-inline-functions-called-once
-Wdeclaration-after-statement -Wvla -Wno-pointer-sign
-Wno-stringop-truncation -Wno-array-bounds -Wno-stringop-overflow
-Wno-restrict -Wno-maybe-uninitialized -fno-strict-overflow
-fno-stack-check -fconserve-stack -Werror=date-time
-Werror=incompatible-pointer-types -Werror=designated-init
-Wno-packed-not-aligned-fsanitize-coverage=trace-pc
-DKBUILD_MODFILE='"net/can/can-bcm"' -DKBUILD_BASENAME='"bcm"'
-DKBUILD_MODNAME='"can_bcm"' -D__KBUILD_MODNAME=kmod_can_bcm -c -o
net/can/bcm.o net/can/bcm.c

Lots of gunk. But just to see if one of those options have affected the
-mstructure-size-boundary= value, just take that whole command line and
throw in -Q --help=target at the end, and we get

  -mstructure-size-boundary=32

So let us guess that it's the ABI choice -mabi=apcs-gnu

$ arm-linux-gnueabihf-gcc -O2 -msoft-float -mabi=apcs-gnu -Q
--help=target | grep struct
  -mstructure-size-boundary=32

Bingo. (-msoft-float is also included just as in the real command line
because gcc barfs otherwise).

Now what CONFIG_* knobs are responsible for putting -mabi=apcs-gnu in
CFLAGS is left as an exercise for the reader. Regardless, it is not a
bug in the compiler. The error is the assumption that this language

"Aggregates and Unions

Structures and unions assume the alignment of their most strictly
aligned component.
Each member is assigned to the lowest available offset with the appropriate
alignment. The size of any object is always a multiple of the object‘s
alignment."

from the x86-64 ABI applies on all other

Re: [PATCH v2] dt-binding: leds: Document leds-multi-gpio bindings

2021-03-23 Thread Rob Herring

On Mon, Mar 22, 2021 at 10:28 PM Hermes Zhang  wrote:
>
> > -Original Message-
> > From: Rob Herring 
> > Sent: 2021年3月23日 1:38
> > My bot found errors running 'make dt_binding_check' on your patch:
> >
> > yamllint warnings/errors:
> >
> > dtschema/dtc warnings/errors:
> > /builds/robherring/linux-dt-
> > review/Documentation/devicetree/bindings/leds/leds-multi-
> > gpio.example.dt.yaml: gpios-led: led-states: 'oneOf' conditional failed, one
> > must be fixed:
> >   [[0, 1, 2, 3]] is too short
> >   [0, 1, 2, 3] is too long
> >   From schema: /builds/robherring/linux-dt-
> > review/Documentation/devicetree/bindings/leds/leds-multi-gpio.yaml
> >
>
> Hi Rob,
>
> Thanks. Yes, now I can see the warning, but I could not understand what was 
> wrong? Could you give some hint?

I think you need 'maxItems' in addition to minItems.

Rob

[PATCH][next] media: dvb-frontends: Remove redundant error check on variable ret

2021-03-23 Thread Colin King

From: Colin Ian King 

An earlier commit removed a call to lgdt3306a_spectral_inversion and
omitted to remove the error return check. The check on ret is now
redundant and can be removed.

Addresses-Coverity: ("Logically dead code")
Fixes: d4a3fa6652e3 ("media: dvb-frontends: lgdt3306a.c: remove dead code")
Signed-off-by: Colin Ian King 
---
 drivers/media/dvb-frontends/lgdt3306a.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/media/dvb-frontends/lgdt3306a.c 
b/drivers/media/dvb-frontends/lgdt3306a.c
index 22d484487695..136b76cb4807 100644
--- a/drivers/media/dvb-frontends/lgdt3306a.c
+++ b/drivers/media/dvb-frontends/lgdt3306a.c
@@ -1017,9 +1017,6 @@ static int lgdt3306a_set_parameters(struct dvb_frontend 
*fe)
 
/* spectral_inversion defaults already set for VSB and QAM */
 
-   if (lg_chkerr(ret))
-   goto fail;
-
ret = lgdt3306a_mpeg_mode(state, state->cfg->mpeg_mode);
if (lg_chkerr(ret))
goto fail;
-- 
2.30.2

[PATCH v2] arm64: dts: imx8mq-kontron-pitx-imx8m: pass phy reset delays

2021-03-23 Thread Heiko Thiery

The TI DP83867 PHY datasheet says:
T1: Post RESET stabilization time == 195us
T3: Hardware configuration pins transition to output drivers == 64us
T4: RESET pulse width == 1us

So with a little overhead set 'reset-assert-us' to 10us (T4) and
'reset-deassert-us' to 280us (T1+T3).

Without these reset delays the board will hang during startup when
bootargs has ip=dhcp set.

Fixes: 1dc7f3d79a1a ("arm64: dts: fsl: add support for Kontron pitx-imx8m 
board")
Signed-off-by: Heiko Thiery 
---
v2:
 - add desciption what issue will be fixed
 - add Fixes tag

 arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts 
b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
index 91a9ceb10305..f593e4ff62e1 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mq-kontron-pitx-imx8m.dts
@@ -90,6 +90,8 @@ ethphy0: ethernet-phy@0 {
ti,tx-internal-delay = ;
ti,fifo-depth = ;
reset-gpios = < 11 GPIO_ACTIVE_LOW>;
+   reset-assert-us = <10>;
+   reset-deassert-us = <280>;
};
};
 };
-- 
2.30.0

Re: [PATCH v6] power: suspend: Move dpm_watchdog to suspend.c and enhance it

2021-03-23 Thread Greg Kroah-Hartman

On Thu, Jan 28, 2021 at 05:39:27PM +0800, Joseph Jang wrote:
> Since dpm_watchdog just cover two functions __device_suspend() and
> device_resume(), we proposed to move it to core power suspend.c to extend
> its coverage and monitor more devices suspend hand issues.
> 
> We propose to use new name suspend watchdog and new timeout handler to
> cover more sleep hang issues. The new timeout handler will dump disk
> sleep task call trace at first round timeout and trigger kernel panic
> at second round timeout.
> The default timer for each round is defined in
> CONFIG_PM_SUSPEND_WATCHDOG_TIMEOUT.
> 
> Signed-off-by: Joseph Jang 

Seems sane, but I'm not the maintainer of this stuff:

Acked-by: Greg Kroah-Hartman

[PATCH][next] media: imx-jpeg: Pass the v4l2_jpeg_header header argument by reference

2021-03-23 Thread Colin King

From: Colin Ian King 

Currently the header argument is being passed by value, so a copy of 256
byte structure on the stack is potentially occurring. Fix this by passing
by reference to avoid any large stack copies.

Addresses-Coverity: ("Big parameter passed by value")
Fixes: 2db16c6ed72c ("media: imx-jpeg: Add V4L2 driver for i.MX8 JPEG 
Encoder/Decoder")
Signed-off-by: Colin Ian King 
---
 drivers/media/platform/imx-jpeg/mxc-jpeg.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/media/platform/imx-jpeg/mxc-jpeg.c 
b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
index adb1715c75d7..f13a8efc35ad 100644
--- a/drivers/media/platform/imx-jpeg/mxc-jpeg.c
+++ b/drivers/media/platform/imx-jpeg/mxc-jpeg.c
@@ -1114,21 +1114,21 @@ static int mxc_jpeg_valid_comp_id(struct device *dev,
 }
 
 static u32 mxc_jpeg_get_image_format(struct device *dev,
-const struct v4l2_jpeg_header header)
+const struct v4l2_jpeg_header *header)
 {
int i;
u32 fourcc = 0;
 
for (i = 0; i < MXC_JPEG_NUM_FORMATS; i++)
-   if (mxc_formats[i].subsampling == header.frame.subsampling &&
-   mxc_formats[i].nc == header.frame.num_components) {
+   if (mxc_formats[i].subsampling == header->frame.subsampling &&
+   mxc_formats[i].nc == header->frame.num_components) {
fourcc = mxc_formats[i].fourcc;
break;
}
if (fourcc == 0) {
dev_err(dev, "Could not identify image format nc=%d, 
subsampling=%d\n",
-   header.frame.num_components,
-   header.frame.subsampling);
+   header->frame.num_components,
+   header->frame.subsampling);
return fourcc;
}
/*
@@ -1137,7 +1137,7 @@ static u32 mxc_jpeg_get_image_format(struct device *dev,
 * ITU-T T.872 chapter 6.5.3 APP14 marker segment for colour encoding
 */
if (fourcc == V4L2_PIX_FMT_YUV24 || fourcc == V4L2_PIX_FMT_RGB24) {
-   if (header.app14_tf == V4L2_JPEG_APP14_TF_CMYK_RGB)
+   if (header->app14_tf == V4L2_JPEG_APP14_TF_CMYK_RGB)
fourcc = V4L2_PIX_FMT_RGB24;
else
fourcc = V4L2_PIX_FMT_YUV24;
@@ -1258,7 +1258,7 @@ static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx,
if (!mxc_jpeg_valid_comp_id(dev, psof, psos))
dev_warn(dev, "JPEG component ids should be 0-3 or 1-4");
 
-   fourcc = mxc_jpeg_get_image_format(dev, header);
+   fourcc = mxc_jpeg_get_image_format(dev, );
if (fourcc == 0)
return -EINVAL;
 
-- 
2.30.2

Re: [PATCH] usb: dwc3: fix build error when POWER_SUPPLY is not enabled

2021-03-23 Thread Ray Chi

Hi Greg,

I will upload fixes for power supply usage in dwc3 and dt-binding
documentation for the new device tree this week.

Thanks,
Ray

On Tue, Mar 23, 2021 at 9:47 PM Greg KH  wrote:
>
> On Fri, Mar 12, 2021 at 09:57:56PM +0800, Ray Chi wrote:
> > Hi Sebastian,
> >
> > Sorry for the late reply.
> >
> > On Wed, Mar 10, 2021 at 2:58 AM Sebastian Reichel  wrote:
> > >
> > > Hi,
> > >
> > > On Mon, Mar 08, 2021 at 09:31:46PM +0800, Ray Chi wrote:
> > > > Fix build error when CONFIG_POWER_SUPPLY is not enabled.
> > > >
> > > > The build error occurs in mips (cavium_octeon_defconfig).
> > > >
> > > > mips-linux-gnu-ld: drivers/usb/dwc3/core.o: in function `dwc3_remove':
> > > > drivers/usb/dwc3/core.c:1657: undefined reference to `power_supply_put'
> > > > mips-linux-gnu-ld: drivers/usb/dwc3/core.o: in function 
> > > > `dwc3_get_properties':
> > > > drivers/usb/dwc3/core.c:1270: undefined reference to 
> > > > `power_supply_get_by_name'
> > > > mips-linux-gnu-ld: drivers/usb/dwc3/core.o: in function `dwc3_probe':
> > > > drivers/usb/dwc3/core.c:1632: undefined reference to `power_supply_put'
> > > >
> > > > Fixes: 59fa3def35de ("usb: dwc3: add a power supply for current 
> > > > control")
> > > > Reported-by: Naresh Kamboju 
> > > > Signed-off-by: Ray Chi 
> > > > ---
> > >
> > > While I'm fine with merging this after fixing up the subject, the
> > > original patch for dwc3 [0] looks completly incorrect to me.
> > >
> > > First of all it uses wrong scale (power-supply uses uA, not mA),
> > > so you are charging 1000x slower than expected. Then the patchset
> > > introduces a new DT property to get the power-supply device, but
> > > does not update the DT binding documentation and does not Cc the
> > > DT binding maintainer.
> >
> > Yes, it should use uA and send this information, and I will update a
> > patch to fix it and add the DT binding documentation.
>
> So should I revert what we currently have in my usb-next tree, or do
> you have a fix for this?
>
> thanks,
>
> greg k-h

[PATCH v2 1/2] mtd: spi-nor: sfdp: save a copy of the SFDP data

2021-03-23 Thread Michael Walle

Due to possible mode switching to 8D-8D-8D, it might not be possible to
read the SFDP after the initial probe. To be able to dump the SFDP via
sysfs afterwards, make a complete copy of it.

Signed-off-by: Michael Walle 
---
 drivers/mtd/spi-nor/core.h  | 10 +++
 drivers/mtd/spi-nor/sfdp.c  | 58 +
 include/linux/mtd/spi-nor.h |  2 ++
 3 files changed, 70 insertions(+)

diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index db07832ee66c..08d2469837da 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -406,6 +406,16 @@ struct spi_nor_manufacturer {
const struct spi_nor_fixups *fixups;
 };
 
+/**
+ * struct sfdp - SFDP data
+ * @num_dwords: number of entries in the dwords array
+ * @dwords: array of double words of the SFDP data
+ */
+struct sfdp {
+   size_t  num_dwords;
+   u32 *dwords;
+};
+
 /* Manufacturer drivers. */
 extern const struct spi_nor_manufacturer spi_nor_atmel;
 extern const struct spi_nor_manufacturer spi_nor_catalyst;
diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c
index 23c28e91f698..c500c2118a5d 100644
--- a/drivers/mtd/spi-nor/sfdp.c
+++ b/drivers/mtd/spi-nor/sfdp.c
@@ -16,6 +16,7 @@
(((p)->parameter_table_pointer[2] << 16) | \
 ((p)->parameter_table_pointer[1] <<  8) | \
 ((p)->parameter_table_pointer[0] <<  0))
+#define SFDP_PARAM_HEADER_PARAM_LEN(p) ((p)->length * 4)
 
 #define SFDP_BFPT_ID   0xff00  /* Basic Flash Parameter Table */
 #define SFDP_SECTOR_MAP_ID 0xff81  /* Sector Map Table */
@@ -1245,6 +1246,8 @@ int spi_nor_parse_sfdp(struct spi_nor *nor)
struct sfdp_parameter_header *param_headers = NULL;
struct sfdp_header header;
struct device *dev = nor->dev;
+   struct sfdp *sfdp;
+   size_t sfdp_size;
size_t psize;
int i, err;
 
@@ -1267,6 +1270,9 @@ int spi_nor_parse_sfdp(struct spi_nor *nor)
bfpt_header->major != SFDP_JESD216_MAJOR)
return -EINVAL;
 
+   sfdp_size = SFDP_PARAM_HEADER_PTP(bfpt_header) +
+   SFDP_PARAM_HEADER_PARAM_LEN(bfpt_header);
+
/*
 * Allocate memory then read all parameter headers with a single
 * Read SFDP command. These parameter headers will actually be parsed
@@ -1293,6 +1299,58 @@ int spi_nor_parse_sfdp(struct spi_nor *nor)
}
}
 
+   /*
+* Cache the complete SFDP data. It is not (easily) possible to fetch
+* SFDP after probe time and we need it for the sysfs access.
+*/
+   for (i = 0; i < header.nph; i++) {
+   param_header = _headers[i];
+   sfdp_size = max_t(size_t, sfdp_size,
+ SFDP_PARAM_HEADER_PTP(param_header) +
+ SFDP_PARAM_HEADER_PARAM_LEN(param_header));
+   }
+
+   /*
+* Limit the total size to a reasonable value to avoid allocating too
+* much memory just of because the flash returned some insane values.
+*/
+   if (sfdp_size > PAGE_SIZE) {
+   dev_dbg(dev, "SFDP data (%zu) too big, truncating\n",
+   sfdp_size);
+   sfdp_size = PAGE_SIZE;
+   }
+
+   sfdp = devm_kzalloc(dev, sizeof(*sfdp), GFP_KERNEL);
+   if (!sfdp) {
+   err = -ENOMEM;
+   goto exit;
+   }
+
+   /*
+* The SFDP is organized in chunks of DWORDs. Thus, in theory, the
+* sfdp_size should be a multiple of DWORDs. But in case a flash
+* is not spec compliant, make sure that we have enough space to store
+* the complete SFDP data.
+*/
+   sfdp->num_dwords = DIV_ROUND_UP(sfdp_size, sizeof(*sfdp->dwords));
+   sfdp->dwords = devm_kcalloc(dev, sfdp->num_dwords,
+   sizeof(*sfdp->dwords), GFP_KERNEL);
+   if (!sfdp->dwords) {
+   err = -ENOMEM;
+   devm_kfree(dev, sfdp);
+   goto exit;
+   }
+
+   err = spi_nor_read_sfdp(nor, 0, sfdp_size, sfdp->dwords);
+   if (err < 0) {
+   dev_dbg(dev, "failed to read SFDP data\n");
+   devm_kfree(dev, sfdp->dwords);
+   devm_kfree(dev, sfdp);
+   goto exit;
+   }
+
+   nor->sfdp = sfdp;
+
/*
 * Check other parameter headers to get the latest revision of
 * the basic flash parameter table.
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index a0d572855444..2215e3565422 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -375,6 +375,7 @@ struct spi_nor_flash_parameter;
  * @read_proto:the SPI protocol for read operations
  * @write_proto:   the SPI protocol for write operations
  * @reg_proto: the SPI protocol for read_reg/write_reg/erase operations
+ * @sfdp:  the SFDP data of the flash
  *

[PATCH v2 2/2] mtd: spi-nor: add initial sysfs support

2021-03-23 Thread Michael Walle

Add support to show the name and JEDEC identifier as well as to dump the
SFDP table. Not all flashes list their SFDP table contents in their
datasheet. So having that is useful. It might also be helpful in bug
reports from users.

Signed-off-by: Michael Walle 
---
 drivers/mtd/spi-nor/Makefile |  2 +-
 drivers/mtd/spi-nor/core.c   |  5 +++
 drivers/mtd/spi-nor/core.h   |  3 ++
 drivers/mtd/spi-nor/sysfs.c  | 86 
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/spi-nor/sysfs.c

diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
index 653923896205..aff308f75987 100644
--- a/drivers/mtd/spi-nor/Makefile
+++ b/drivers/mtd/spi-nor/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-spi-nor-objs   := core.o sfdp.o
+spi-nor-objs   := core.o sfdp.o sysfs.o
 spi-nor-objs   += atmel.o
 spi-nor-objs   += catalyst.o
 spi-nor-objs   += eon.o
diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index fbc34158a883..02523ddac612 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -3708,6 +3708,10 @@ static int spi_nor_probe(struct spi_mem *spimem)
if (ret)
return ret;
 
+   ret = spi_nor_sysfs_create(nor);
+   if (ret)
+   return ret;
+
return mtd_device_register(>mtd, data ? data->parts : NULL,
   data ? data->nr_parts : 0);
 }
@@ -3717,6 +3721,7 @@ static int spi_nor_remove(struct spi_mem *spimem)
struct spi_nor *nor = spi_mem_get_drvdata(spimem);
 
spi_nor_restore(nor);
+   spi_nor_sysfs_remove(nor);
 
/* Clean up MTD stuff. */
return mtd_device_unregister(>mtd);
diff --git a/drivers/mtd/spi-nor/core.h b/drivers/mtd/spi-nor/core.h
index 08d2469837da..599035200a03 100644
--- a/drivers/mtd/spi-nor/core.h
+++ b/drivers/mtd/spi-nor/core.h
@@ -486,4 +486,7 @@ static struct spi_nor __maybe_unused *mtd_to_spi_nor(struct 
mtd_info *mtd)
return mtd->priv;
 }
 
+int spi_nor_sysfs_create(struct spi_nor *nor);
+void spi_nor_sysfs_remove(struct spi_nor *nor);
+
 #endif /* __LINUX_MTD_SPI_NOR_INTERNAL_H */
diff --git a/drivers/mtd/spi-nor/sysfs.c b/drivers/mtd/spi-nor/sysfs.c
new file mode 100644
index ..c62cc4d6bce6
--- /dev/null
+++ b/drivers/mtd/spi-nor/sysfs.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+#include 
+#include 
+#include 
+
+#include "core.h"
+
+static ssize_t name_show(struct device *dev,
+struct device_attribute *attr, char *buf)
+{
+   struct spi_device *spi = to_spi_device(dev);
+   struct spi_mem *spimem = spi_get_drvdata(spi);
+   struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+   return sysfs_emit(buf, "%s\n", nor->info->name);
+}
+static DEVICE_ATTR_RO(name);
+
+static ssize_t jedec_id_show(struct device *dev,
+struct device_attribute *attr, char *buf)
+{
+   struct spi_device *spi = to_spi_device(dev);
+   struct spi_mem *spimem = spi_get_drvdata(spi);
+   struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+   return sysfs_emit(buf, "%*phN\n", nor->info->id_len, nor->info->id);
+}
+static DEVICE_ATTR_RO(jedec_id);
+
+static struct attribute *spi_nor_sysfs_entries[] = {
+   _attr_name.attr,
+   _attr_jedec_id.attr,
+   NULL
+};
+
+static ssize_t sfdp_read(struct file *filp, struct kobject *kobj,
+struct bin_attribute *bin_attr, char *buf,
+loff_t off, size_t count)
+{
+   struct spi_device *spi = to_spi_device(kobj_to_dev(kobj));
+   struct spi_mem *spimem = spi_get_drvdata(spi);
+   struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+   struct sfdp *sfdp = nor->sfdp;
+   size_t sfdp_size = sfdp->num_dwords * sizeof(*sfdp->dwords);
+
+   return memory_read_from_buffer(buf, count, , nor->sfdp->dwords,
+  sfdp_size);
+}
+static BIN_ATTR_RO(sfdp, 0);
+
+static struct bin_attribute *spi_nor_sysfs_bin_entries[] = {
+   _attr_sfdp,
+   NULL
+};
+
+static umode_t spi_nor_sysfs_is_bin_visible(struct kobject *kobj,
+   struct bin_attribute *attr, int n)
+{
+   struct spi_device *spi = to_spi_device(kobj_to_dev(kobj));
+   struct spi_mem *spimem = spi_get_drvdata(spi);
+   struct spi_nor *nor = spi_mem_get_drvdata(spimem);
+
+   if (attr == _attr_sfdp && !nor->sfdp)
+   return 0;
+
+   return 0444;
+}
+
+static struct attribute_group spi_nor_sysfs_attr_group = {
+   .name   = NULL,
+   .is_bin_visible = spi_nor_sysfs_is_bin_visible,
+   .attrs  = spi_nor_sysfs_entries,
+   .bin_attrs  = spi_nor_sysfs_bin_entries,
+};
+
+int spi_nor_sysfs_create(struct spi_nor *nor)
+{
+   return sysfs_create_group(>dev->kobj,

[PATCH v2 0/2] mtd: spi-nor: support dumping sfdp tables

2021-03-23 Thread Michael Walle

Add the possibility to dump the SFDP data of a flash device.

More and more flash devices share the same flash ID and we need per device
fixups. Usually, these fixups differentiate flashes by looking at
differences in the SFDP data. Determining the difference is only possible
if we have the SFDP data for all the flashes which share a flash ID. This
will lay the foundation to dump the whole SFDP data of a flash device.

This is even more important, because some datasheets doesn't even contain
the SFDP data. Fixups for these kind of flashes are nearly impossible to
do.

I envision having a database of all the SFDP data for the flashes we
support and make it a requirement to submit it when a new flash is added.
This might or might not have legal implications. Thus I'd start with having
that database private to the SPI NOR maintainers.

Changes since v1:
 - use sysfs_emit()
 - add comment about the allocation of the sfdp dwords
 - free SFDP memory in the error path
 - use BIN_ATTR_RO(sfdp, 0)
 - use spi_nor_read_sfdp()

Changes since RFC:
 - Don't read SFDP data after probe. The flash might already be switched to
   8D-8D-8D mode. Instead, cache the SFDP data
 - add two sysfs files: jedec-id and name
 - change the file mode of the sfdp file from 0400 to 0444. There is no
   hardware access anymore.

Michael Walle (2):
  mtd: spi-nor: sfdp: save a copy of the SFDP data
  mtd: spi-nor: add initial sysfs support

 drivers/mtd/spi-nor/Makefile |  2 +-
 drivers/mtd/spi-nor/core.c   |  5 +++
 drivers/mtd/spi-nor/core.h   | 13 ++
 drivers/mtd/spi-nor/sfdp.c   | 58 
 drivers/mtd/spi-nor/sysfs.c  | 86 
 include/linux/mtd/spi-nor.h  |  2 +
 6 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mtd/spi-nor/sysfs.c

-- 
2.20.1

[PATCH] [v2] btrfs: zoned: bail out in btrfs_alloc_chunk for bad input

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

gcc complains that the ctl->max_chunk_size member might be used
uninitialized when none of the three conditions for initializing it in
init_alloc_chunk_ctl_policy_zoned() are true:

In function ‘init_alloc_chunk_ctl_policy_zoned’,
inlined from ‘init_alloc_chunk_ctl’ at fs/btrfs/volumes.c:5023:3,
inlined from ‘btrfs_alloc_chunk’ at fs/btrfs/volumes.c:5340:2:
include/linux/compiler-gcc.h:48:45: error: ‘ctl.max_chunk_size’ may be used 
uninitialized [-Werror=maybe-uninitialized]
 4998 | ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
  |   ^~~
fs/btrfs/volumes.c: In function ‘btrfs_alloc_chunk’:
fs/btrfs/volumes.c:5316:32: note: ‘ctl’ declared here
 5316 | struct alloc_chunk_ctl ctl;
  |^~~

If we ever get into this condition, something is seriously
wrong, so the same logic as in init_alloc_chunk_ctl_policy_regular()
and a few other places should be applied. This avoids both further
data corruption, and the compile-time warning.

Fixes: 1cd6121f2a38 ("btrfs: zoned: implement zoned chunk allocator")
Link: https://lore.kernel.org/lkml/20210323132343.gf7...@twin.jikos.cz/
Suggested-by: David Sterba 
Signed-off-by: Arnd Bergmann 
---
Note that the -Wmaybe-unintialized warning is globally disabled
by default. For some reason I got this warning anyway when building
this specific file with gcc-11.
---
 fs/btrfs/volumes.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index bc3b33efddc5..d2994305ed77 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4989,6 +4989,8 @@ static void init_alloc_chunk_ctl_policy_zoned(
ctl->max_chunk_size = 2 * ctl->max_stripe_size;
ctl->devs_max = min_t(int, ctl->devs_max,
  BTRFS_MAX_DEVS_SYS_CHUNK);
+   } else {
+   BUG();
}
 
/* We don't want a chunk larger than 10% of writable space */
-- 
2.29.2

Re: [PATCH] serial: imx: drop workaround for forced irq threading

2021-03-23 Thread Johan Hovold

On Mon, Mar 22, 2021 at 02:40:32PM +0100, Uwe Kleine-König wrote:
> Hello Johan,
> 
> On Mon, Mar 22, 2021 at 02:20:57PM +0100, Johan Hovold wrote:
> > On Mon, Mar 22, 2021 at 12:55:36PM +0100, Uwe Kleine-König wrote:
> > > On Mon, Mar 22, 2021 at 12:39:18PM +0100, Sebastian Andrzej Siewior wrote:
> > > > On 2021-03-22 12:34:02 [+0100], Uwe Kleine-König wrote:
> > > > > On Mon, Mar 22, 2021 at 12:10:36PM +0100, Johan Hovold wrote:
> > > > > > Force-threaded interrupt handlers used to run with interrupts 
> > > > > > enabled,
> > > > > > something which could lead to deadlocks in case a threaded handler
> > > > > > shared a lock with code running in hard interrupt context (e.g. 
> > > > > > timer
> > > > > > callbacks) and did not explicitly disable interrupts.
> > > > > > 
> > > > > > This was specifically the case for serial drivers that take the port
> > > > > > lock in their console write path as printk can be called from hard
> > > > > > interrupt context also with forced threading ("threadirqs").
> > > > > > 
> > > > > > Since commit 81e2073c175b ("genirq: Disable interrupts for force
> > > > > > threaded handlers") interrupt handlers always run with interrupts
> > > > > > disabled on non-RT so that drivers no longer need to do handle this.
> > > > > 
> > > > > So we're breaking RT knowingly here? If this is the case I'm not happy
> > > > > with your change. (And if RT is not affected a different wording would
> > > > > be good.)
> > > > 
> > > > Which wording, could you be more specific? It looks good from here and
> > > > no, RT is not affected.
> > > 
> > > The commit log says essentially: "The change is fine on non-RT" which
> > > suggests there is a problem on RT.
> > 
> > I don't think you can read that into the commit message.
> 
> From a strictly logically point of view you indeed cannot. But if you go
> to the street and say to people there that they can park their car in
> this street free of charge between Monday and Friday, I expect that most
> of them will assume that they have to pay for parking on weekends.

That analogy would almost seem to suggest bad intent on my side.

To say that this workaround is no longer needed on !RT does not imply
that it is needed on RT. If anything it suggests I have considered RT,
I'd say.

> So when you said that on on-RT the reason why it used to need a
> workaround is gone made me wonder what that implies for RT.

Fair enough. I thought it was obvious from the commit message and the
commits referred to, and that RT wouldn't depend on patching random
mainline drivers like this without a clear marking such as using raw
spin locks.

Greg's already picked this one up and this is hopefully the last one
we'll see of these.

Johan


signature.asc
Description: PGP signature

Re: [RFC PATCH 2/2] mm,drm/ttm: Use VM_PFNMAP for TTM vmas

2021-03-23 Thread Christian König





Am 22.03.21 um 09:13 schrieb Thomas Hellström (Intel):

Hi!

On 3/22/21 8:47 AM, Christian König wrote:

Am 21.03.21 um 19:45 schrieb Thomas Hellström (Intel):

To block fast gup we need to make sure TTM ptes are always special.
With MIXEDMAP we, on architectures that don't support pte_special,
insert normal ptes, but OTOH on those architectures, fast is not
supported.
At the same time, the function documentation to vm_normal_page() 
suggests

that ptes pointing to system memory pages of MIXEDMAP vmas are always
normal, but that doesn't seem consistent with what's implemented in
vmf_insert_mixed(). I'm thus not entirely sure this patch is actually
needed.

But to make sure and to avoid also normal (non-fast) gup, make all
TTM vmas PFNMAP. With PFNMAP we can't allow COW mappings
anymore so make is_cow_mapping() available and use it to reject
COW mappigs at mmap time.


I would separate the disallowing of COW mapping from the PFN change. 
I'm pretty sure that COW mappings never worked on TTM BOs in the 
first place.


COW doesn't work with PFNMAP together with non-linear maps, so as a 
consequence from moving from MIXEDMAP to PFNMAP we must disallow COW, 
so it seems logical to me to do it in one patch.


And working COW was one of the tests I used for huge PMDs/PUDs, so it 
has indeed been working, but I can't think of any relevant use-cases.


Ok, going to keep that in mind. I was assuming COW didn't worked before 
on TTM pages.



Did you, BTW, have a chance to test this with WC mappings?


I'm going to give this a full piglit round, but currently I'm busy with 
internal testing.


Thanks,
Christian.



Thanks,
/Thomas





But either way this patch is Reviewed-by: Christian König 
.


Thanks,
Christian.



There was previously a comment in the code that WC mappings together
with x86 PAT + PFNMAP was bad for performance. However from looking at
vmf_insert_mixed() it looks like in the current code PFNMAP and 
MIXEDMAP

are handled the same for architectures that support pte_special. This
means there should not be a performance difference anymore, but this
needs to be verified.

Cc: Christian Koenig 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Andrew Morton 
Cc: Jason Gunthorpe 
Cc: linux...@kvack.org
Cc: dri-de...@lists.freedesktop.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Thomas Hellström (Intel) 
---
  drivers/gpu/drm/ttm/ttm_bo_vm.c | 22 --
  include/linux/mm.h  |  5 +
  mm/internal.h   |  5 -
  3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
b/drivers/gpu/drm/ttm/ttm_bo_vm.c

index 1c34983480e5..708c6fb9be81 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -372,12 +372,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct 
vm_fault *vmf,

   * at arbitrary times while the data is mmap'ed.
   * See vmf_insert_mixed_prot() for a discussion.
   */
-    if (vma->vm_flags & VM_MIXEDMAP)
-    ret = vmf_insert_mixed_prot(vma, address,
-    __pfn_to_pfn_t(pfn, PFN_DEV),
-    prot);
-    else
-    ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
+    ret = vmf_insert_pfn_prot(vma, address, pfn, prot);
    /* Never error on prefaulted PTEs */
  if (unlikely((ret & VM_FAULT_ERROR))) {
@@ -555,18 +550,14 @@ static void ttm_bo_mmap_vma_setup(struct 
ttm_buffer_object *bo, struct vm_area_s

   * Note: We're transferring the bo reference to
   * vma->vm_private_data here.
   */
-
  vma->vm_private_data = bo;
    /*
- * We'd like to use VM_PFNMAP on shared mappings, where
- * (vma->vm_flags & VM_SHARED) != 0, for performance reasons,
- * but for some reason VM_PFNMAP + x86 PAT + write-combine is very
- * bad for performance. Until that has been sorted out, use
- * VM_MIXEDMAP on all mappings. See freedesktop.org bug #75719
+ * PFNMAP forces us to block COW mappings in mmap(),
+ * and with MIXEDMAP we would incorrectly allow fast gup
+ * on TTM memory on architectures that don't have pte_special.
   */
-    vma->vm_flags |= VM_MIXEDMAP;
-    vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+    vma->vm_flags |= VM_PFNMAP | VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
  }
    int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
@@ -579,6 +570,9 @@ int ttm_bo_mmap(struct file *filp, struct 
vm_area_struct *vma,

  if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET_START))
  return -EINVAL;
  +    if (unlikely(is_cow_mapping(vma->vm_flags)))
+    return -EINVAL;
+
  bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
  if (unlikely(!bo))
  return -EINVAL;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 77e64e3eac80..c6ebf7f9ddbb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -686,6 +686,11 @@ static inline bool vma_is_accessible(struct

Re: [PATCH v1 1/6] usb: gadget: pch_udc: Drop unneeded cpu_to_le32() call

2021-03-23 Thread Greg Kroah-Hartman

On Mon, Mar 22, 2021 at 11:11:44PM +0200, Andy Shevchenko wrote:
> Either way ~0 will be in the correct byte order,
> hence drop unneeded cpu_to_le32() call. Moreover,
> it makes sparse happy, otherwise it complains:
> 
> pch_udc.c:1813:27: warning: incorrect type in assignment (different base 
> types)
> pch_udc.c:1813:27:expected unsigned int [usertype] dataptr
> pch_udc.c:1813:27:got restricted __le32 [usertype]
> 
> Fixes: f646cf94520e ("USB device driver of Topcliff PCH")
> Signed-off-by: Andy Shevchenko 
> ---
>  drivers/usb/gadget/udc/pch_udc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/gadget/udc/pch_udc.c 
> b/drivers/usb/gadget/udc/pch_udc.c
> index a3c1fc924268..2e2dca391007 100644
> --- a/drivers/usb/gadget/udc/pch_udc.c
> +++ b/drivers/usb/gadget/udc/pch_udc.c
> @@ -1756,7 +1756,7 @@ static struct usb_request *pch_udc_alloc_request(struct 
> usb_ep *usbep,
>   }
>   /* prevent from using desc. - set HOST BUSY */
>   dma_desc->status |= PCH_UDC_BS_HST_BSY;
> - dma_desc->dataptr = cpu_to_le32(DMA_ADDR_INVALID);
> + dma_desc->dataptr = DMA_ADDR_INVALID;
>   req->td_data = dma_desc;
>   req->td_data_last = dma_desc;
>   req->chain_len = 1;

With this series applied, I get the following build warning:

drivers/usb/gadget/udc/pch_udc.c: In function ‘pch_udc_alloc_request’:
drivers/usb/gadget/udc/pch_udc.c:208:26: warning: conversion from ‘long long 
unsigned int’ to ‘u32’ {aka ‘unsigned int’} changes value from 
‘18446744073709551615’ to ‘4294967295’ [-Woverflow]
  208 | #define DMA_ADDR_INVALID (~(dma_addr_t)0)
  |  ^
drivers/usb/gadget/udc/pch_udc.c:1813:22: note: in expansion of macro 
‘DMA_ADDR_INVALID’
 1813 |  dma_desc->dataptr = DMA_ADDR_INVALID;
  |  ^~~~


Please fix up and resend a new version of this series.

thanks,

greg k-h

[PATCH v5 16/19] coresight: sink: Add TRBE driver

2021-03-23 Thread Suzuki K Poulose

From: Anshuman Khandual 

Trace Buffer Extension (TRBE) implements a trace buffer per CPU which is
accessible via the system registers. The TRBE supports different addressing
modes including CPU virtual address and buffer modes including the circular
buffer mode. The TRBE buffer is addressed by a base pointer (TRBBASER_EL1),
an write pointer (TRBPTR_EL1) and a limit pointer (TRBLIMITR_EL1). But the
access to the trace buffer could be prohibited by a higher exception level
(EL3 or EL2), indicated by TRBIDR_EL1.P. The TRBE can also generate a CPU
private interrupt (PPI) on address translation errors and when the buffer
is full. Overall implementation here is inspired from the Arm SPE driver.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Signed-off-by: Anshuman Khandual 
[ Mark the buffer truncated on WRAP event, error code cleanup ]
Signed-off-by: Suzuki K Poulose 
---
Changes since v4:
 - Mark the buffer as truncated on WRAP event (Mike Leach)
 - Tidy up limit_pointer computation from TRBLIMITR_EL1
 - Fix error code for vmap failure. (Mathieu)
---
 drivers/hwtracing/coresight/Kconfig  |   14 +
 drivers/hwtracing/coresight/Makefile |1 +
 drivers/hwtracing/coresight/coresight-trbe.c | 1157 ++
 drivers/hwtracing/coresight/coresight-trbe.h |  152 +++
 4 files changed, 1324 insertions(+)
 create mode 100644 drivers/hwtracing/coresight/coresight-trbe.c
 create mode 100644 drivers/hwtracing/coresight/coresight-trbe.h

diff --git a/drivers/hwtracing/coresight/Kconfig 
b/drivers/hwtracing/coresight/Kconfig
index f154ae7e705d..84530fd80998 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -173,4 +173,18 @@ config CORESIGHT_CTI_INTEGRATION_REGS
  CTI trigger connections between this and other devices.These
  registers are not used in normal operation and can leave devices in
  an inconsistent state.
+
+config CORESIGHT_TRBE
+   tristate "Trace Buffer Extension (TRBE) driver"
+   depends on ARM64 && CORESIGHT_SOURCE_ETM4X
+   help
+ This driver provides support for percpu Trace Buffer Extension (TRBE).
+ TRBE always needs to be used along with it's corresponding percpu ETE
+ component. ETE generates trace data which is then captured with TRBE.
+ Unlike traditional sink devices, TRBE is a CPU feature accessible via
+ system registers. But it's explicit dependency with trace unit (ETE)
+ requires it to be plugged in as a coresight sink device.
+
+ To compile this driver as a module, choose M here: the module will be
+ called coresight-trbe.
 endif
diff --git a/drivers/hwtracing/coresight/Makefile 
b/drivers/hwtracing/coresight/Makefile
index f20e357758d1..d60816509755 100644
--- a/drivers/hwtracing/coresight/Makefile
+++ b/drivers/hwtracing/coresight/Makefile
@@ -21,5 +21,6 @@ obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o
 obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o
 obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o
 obj-$(CONFIG_CORESIGHT_CTI) += coresight-cti.o
+obj-$(CONFIG_CORESIGHT_TRBE) += coresight-trbe.o
 coresight-cti-y := coresight-cti-core.ocoresight-cti-platform.o \
   coresight-cti-sysfs.o
diff --git a/drivers/hwtracing/coresight/coresight-trbe.c 
b/drivers/hwtracing/coresight/coresight-trbe.c
new file mode 100644
index ..edd70c37fffb
--- /dev/null
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -0,0 +1,1157 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This driver enables Trace Buffer Extension (TRBE) as a per-cpu coresight
+ * sink device could then pair with an appropriate per-cpu coresight source
+ * device (ETE) thus generating required trace data. Trace can be enabled
+ * via the perf framework.
+ *
+ * The AUX buffer handling is inspired from Arm SPE PMU driver.
+ *
+ * Copyright (C) 2020 ARM Ltd.
+ *
+ * Author: Anshuman Khandual 
+ */
+#define DRVNAME "arm_trbe"
+
+#define pr_fmt(fmt) DRVNAME ": " fmt
+
+#include 
+#include "coresight-trbe.h"
+
+#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT))
+
+/*
+ * A padding packet that will help the user space tools
+ * in skipping relevant sections in the captured trace
+ * data which could not be decoded. TRBE doesn't support
+ * formatting the trace data, unlike the legacy CoreSight
+ * sinks and thus we use ETE trace packets to pad the
+ * sections of the buffer.
+ */
+#define ETE_IGNORE_PACKET  0x70
+
+/*
+ * Minimum amount of meaningful trace will contain:
+ * A-Sync, Trace Info, Trace On, Address, Atom.
+ * This is about 44bytes of ETE trace. To be on
+ * the safer side, we assume 64bytes is the minimum
+ * space required for a meaningful session, before
+ * we hit a "WRAP" event.
+ */
+#define TRBE_TRACE_MIN_BUF_SIZE64
+
+enum trbe_fault_action {
+   TRBE_FAULT_ACT_WRAP,
+   TRBE_FAULT_ACT_SPURIOUS,
+   TRBE_FAULT_ACT_FATAL,
+};
+
+struct

[PATCH v5 15/19] coresight: core: Add support for dedicated percpu sinks

2021-03-23 Thread Suzuki K Poulose

From: Anshuman Khandual 

Add support for dedicated sinks that are bound to individual CPUs. (e.g,
TRBE). To allow quicker access to the sink for a given CPU bound source,
keep a percpu array of the sink devices. Also, add support for building
a path to the CPU local sink from the ETM.

This adds a new percpu sink type CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM.
This new sink type is exclusively available and can only work with percpu
source type device CORESIGHT_DEV_SUBTYPE_SOURCE_PROC.

This defines a percpu structure that accommodates a single coresight_device
which can be used to store an initialized instance from a sink driver. As
these sinks are exclusively linked and dependent on corresponding percpu
sources devices, they should also be the default sink device during a perf
session.

Outwards device connections are scanned while establishing paths between a
source and a sink device. But such connections are not present for certain
percpu source and sink devices which are exclusively linked and dependent.
Build the path directly and skip connection scanning for such devices.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Suzuki K Poulose 
Tested-by: Suzuki K Poulose 
Reviewed-by: Mathieu Poirier 
Reviewed-by: Mike Leach 
Signed-off-by: Anshuman Khandual 
[Moved the set/get percpu sink APIs from TRBE patch to here
 Fixed build break on arm32
]
Signed-off-by: Suzuki K Poulose 
---
Changes since v4:
 - Fix build on arm32 kernel
---
 drivers/hwtracing/coresight/coresight-core.c | 29 ++--
 drivers/hwtracing/coresight/coresight-priv.h |  3 ++
 include/linux/coresight.h| 13 +
 3 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-core.c 
b/drivers/hwtracing/coresight/coresight-core.c
index 0062c8935653..55c645616bf6 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -23,6 +23,7 @@
 #include "coresight-priv.h"
 
 static DEFINE_MUTEX(coresight_mutex);
+DEFINE_PER_CPU(struct coresight_device *, csdev_sink);
 
 /**
  * struct coresight_node - elements of a path, from source to sink
@@ -70,6 +71,18 @@ void coresight_remove_cti_ops(void)
 }
 EXPORT_SYMBOL_GPL(coresight_remove_cti_ops);
 
+void coresight_set_percpu_sink(int cpu, struct coresight_device *csdev)
+{
+   per_cpu(csdev_sink, cpu) = csdev;
+}
+EXPORT_SYMBOL_GPL(coresight_set_percpu_sink);
+
+struct coresight_device *coresight_get_percpu_sink(int cpu)
+{
+   return per_cpu(csdev_sink, cpu);
+}
+EXPORT_SYMBOL_GPL(coresight_get_percpu_sink);
+
 static int coresight_id_match(struct device *dev, void *data)
 {
int trace_id, i_trace_id;
@@ -784,6 +797,14 @@ static int _coresight_build_path(struct coresight_device 
*csdev,
if (csdev == sink)
goto out;
 
+   if (coresight_is_percpu_source(csdev) && coresight_is_percpu_sink(sink) 
&&
+   sink == per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev))) {
+   if (_coresight_build_path(sink, sink, path) == 0) {
+   found = true;
+   goto out;
+   }
+   }
+
/* Not a sink - recursively explore each port found on this element */
for (i = 0; i < csdev->pdata->nr_outport; i++) {
struct coresight_device *child_dev;
@@ -999,8 +1020,12 @@ coresight_find_default_sink(struct coresight_device 
*csdev)
int depth = 0;
 
/* look for a default sink if we have not found for this device */
-   if (!csdev->def_sink)
-   csdev->def_sink = coresight_find_sink(csdev, );
+   if (!csdev->def_sink) {
+   if (coresight_is_percpu_source(csdev))
+   csdev->def_sink = per_cpu(csdev_sink, 
source_ops(csdev)->cpu_id(csdev));
+   if (!csdev->def_sink)
+   csdev->def_sink = coresight_find_sink(csdev, );
+   }
return csdev->def_sink;
 }
 
diff --git a/drivers/hwtracing/coresight/coresight-priv.h 
b/drivers/hwtracing/coresight/coresight-priv.h
index f5f654ea2994..ff1dd2092ac5 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -232,4 +232,7 @@ coresight_find_csdev_by_fwnode(struct fwnode_handle 
*r_fwnode);
 void coresight_set_assoc_ectdev_mutex(struct coresight_device *csdev,
  struct coresight_device *ect_csdev);
 
+void coresight_set_percpu_sink(int cpu, struct coresight_device *csdev);
+struct coresight_device *coresight_get_percpu_sink(int cpu);
+
 #endif
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 976ec2697610..85008a65e21f 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -50,6 +50,7 @@ enum coresight_dev_subtype_sink {
CORESIGHT_DEV_SUBTYPE_SINK_PORT,
CORESIGHT_DEV_SUBTYPE_SINK_BUFFER,
CORESIGHT_DEV_SUBTYPE_SINK_SYSMEM,
+   CORESIGHT_DEV_SUBTYPE_SINK_PERCPU_SYSMEM,

[PATCH v5 18/19] Documentation: trace: Add documentation for TRBE

2021-03-23 Thread Suzuki K Poulose

From: Anshuman Khandual 

Add documentation for the  TRBE under trace/coresight.

Cc: Jonathan Corbet 
Cc: linux-...@vger.kernel.org
Reviewed-by: Mathieu Poirier 
Signed-off-by: Anshuman Khandual 
[ Split from the TRBE driver patch ]
Signed-off-by: Suzuki K Poulose 
---
 .../trace/coresight/coresight-trbe.rst| 38 +++
 1 file changed, 38 insertions(+)
 create mode 100644 Documentation/trace/coresight/coresight-trbe.rst

diff --git a/Documentation/trace/coresight/coresight-trbe.rst 
b/Documentation/trace/coresight/coresight-trbe.rst
new file mode 100644
index ..b9928ef148da
--- /dev/null
+++ b/Documentation/trace/coresight/coresight-trbe.rst
@@ -0,0 +1,38 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+Trace Buffer Extension (TRBE).
+==
+
+:Author:   Anshuman Khandual 
+:Date: November 2020
+
+Hardware Description
+
+
+Trace Buffer Extension (TRBE) is a percpu hardware which captures in system
+memory, CPU traces generated from a corresponding percpu tracing unit. This
+gets plugged in as a coresight sink device because the corresponding trace
+generators (ETE), are plugged in as source device.
+
+The TRBE is not compliant to CoreSight architecture specifications, but is
+driven via the CoreSight driver framework to support the ETE (which is
+CoreSight compliant) integration.
+
+Sysfs files and directories
+---
+
+The TRBE devices appear on the existing coresight bus alongside the other
+coresight devices::
+
+   >$ ls /sys/bus/coresight/devices
+   trbe0  trbe1  trbe2 trbe3
+
+The ``trbe`` named TRBEs are associated with a CPU.::
+
+   >$ ls /sys/bus/coresight/devices/trbe0/
+align flag
+
+*Key file items are:-*
+   * ``align``: TRBE write pointer alignment
+   * ``flag``: TRBE updates memory with access and dirty flags
-- 
2.24.1

[PATCH v5 19/19] dts: bindings: Document device tree bindings for Arm TRBE

2021-03-23 Thread Suzuki K Poulose

Document the device tree bindings for Trace Buffer Extension (TRBE).

Cc: Anshuman Khandual 
Cc: Mathieu Poirier 
Cc: Rob Herring 
Cc: devicet...@vger.kernel.org
Reviewed-by: Rob Herring 
Signed-off-by: Suzuki K Poulose 
---
 .../devicetree/bindings/arm/trbe.yaml | 49 +++
 MAINTAINERS   |  1 +
 2 files changed, 50 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/trbe.yaml

diff --git a/Documentation/devicetree/bindings/arm/trbe.yaml 
b/Documentation/devicetree/bindings/arm/trbe.yaml
new file mode 100644
index ..4402d7bfd1fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/trbe.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/trbe.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Trace Buffer Extensions
+
+maintainers:
+  - Anshuman Khandual 
+
+description: |
+  Arm Trace Buffer Extension (TRBE) is a per CPU component
+  for storing trace generated on the CPU to memory. It is
+  accessed via CPU system registers. The software can verify
+  if it is permitted to use the component by checking the
+  TRBIDR register.
+
+properties:
+  $nodename:
+const: "trbe"
+  compatible:
+items:
+  - const: arm,trace-buffer-extension
+
+  interrupts:
+description: |
+   Exactly 1 PPI must be listed. For heterogeneous systems where
+   TRBE is only supported on a subset of the CPUs, please consult
+   the arm,gic-v3 binding for details on describing a PPI partition.
+maxItems: 1
+
+required:
+  - compatible
+  - interrupts
+
+additionalProperties: false
+
+examples:
+
+  - |
+   #include 
+
+   trbe {
+ compatible = "arm,trace-buffer-extension";
+ interrupts = ;
+   };
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index 3454ed1011c8..fbe863456ed1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1762,6 +1762,7 @@ F:
Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
 F: Documentation/devicetree/bindings/arm/coresight-cti.yaml
 F: Documentation/devicetree/bindings/arm/coresight.txt
 F: Documentation/devicetree/bindings/arm/ete.yaml
+F: Documentation/devicetree/bindings/arm/trbe.yaml
 F: Documentation/trace/coresight/*
 F: drivers/hwtracing/coresight/*
 F: include/dt-bindings/arm/coresight-cti-dt.h
-- 
2.24.1

[PATCH v5 17/19] Documentation: coresight: trbe: Sysfs ABI description

2021-03-23 Thread Suzuki K Poulose

From: Anshuman Khandual 

Add sysfs ABI documentation for the TRBE devices.

Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Jonathan Corbet 
Cc: linux-...@vger.kernel.org
Reviewed-by: Mathieu Poirier 
Signed-off-by: Anshuman Khandual 
[ Split from the TRBE driver patch ]
Signed-off-by: Suzuki K Poulose 
---
 .../ABI/testing/sysfs-bus-coresight-devices-trbe   | 14 ++
 1 file changed, 14 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe

diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe 
b/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
new file mode 100644
index ..ad3bbc6fa751
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-trbe
@@ -0,0 +1,14 @@
+What:  /sys/bus/coresight/devices/trbe/align
+Date:  March 2021
+KernelVersion: 5.13
+Contact:   Anshuman Khandual 
+Description:   (Read) Shows the TRBE write pointer alignment. This value
+   is fetched from the TRBIDR register.
+
+What:  /sys/bus/coresight/devices/trbe/flag
+Date:  March 2021
+KernelVersion: 5.13
+Contact:   Anshuman Khandual 
+Description:   (Read) Shows if TRBE updates in the memory are with access
+   and dirty flag updates as well. This value is fetched from
+   the TRBIDR register.
-- 
2.24.1

decompress.c:undefined reference to `ftrace_likely_update'

2021-03-23 Thread kernel test robot

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   84196390620ac0e5070ae36af84c137c6216a7dc
commit: baec970aa5ba11099ad7a91773350c91fb2113f0 mips: Add N64 machine type
date:   9 weeks ago
config: mips-randconfig-r014-20210323 (attached as .config)
compiler: mips-linux-gcc (GCC) 9.3.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=baec970aa5ba11099ad7a91773350c91fb2113f0
git remote add linus 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
git fetch --no-tags linus master
git checkout baec970aa5ba11099ad7a91773350c91fb2113f0
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross 
ARCH=mips 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   mips-linux-ld: arch/mips/boot/compressed/decompress.o: in function 
`LZ4_decompress_safe_withSmallPrefix':
>> decompress.c:(.text+0xa4): undefined reference to `ftrace_likely_update'
>> mips-linux-ld: decompress.c:(.text+0x114): undefined reference to 
>> `ftrace_likely_update'
   mips-linux-ld: decompress.c:(.text+0x184): undefined reference to 
`ftrace_likely_update'
   mips-linux-ld: decompress.c:(.text+0x23c): undefined reference to 
`ftrace_likely_update'
   mips-linux-ld: decompress.c:(.text+0x270): undefined reference to 
`ftrace_likely_update'
   mips-linux-ld: 
arch/mips/boot/compressed/decompress.o:decompress.c:(.text+0x294): more 
undefined references to `ftrace_likely_update' follow

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip

[PATCH v5 14/19] coresight: etm-perf: Handle stale output handles

2021-03-23 Thread Suzuki K Poulose

The context associated with an ETM for a given perf event
includes :
  - handle -> the perf output handle for the AUX buffer.
  - the path for the trace components
  - the buffer config for the sink.

The path and the buffer config are part of the "aux_priv" data
(etm_event_data) setup by the setup_aux() callback, and made available
via perf_get_aux(handle).

Now with a sink supporting IRQ, the sink could "end" an output
handle when the buffer reaches the programmed limit and would try
to restart a handle. This could fail if there is not enough
space left the AUX buffer (e.g, the userspace has not consumed
the data). This leaves the "handle" disconnected from the "event"
and also the "perf_get_aux()" cleared. This all happens within
the sink driver, without the etm_perf driver being aware.
Now when the event is actually stopped, etm_event_stop()
will need to access the "event_data". But since the handle
is not valid anymore, we loose the information to stop the
"trace" path. So, we need a reliable way to access the etm_event_data
even when the handle may not be active.

This patch replaces the per_cpu handle array with a per_cpu context
for the ETM, which tracks the "handle" as well as the "etm_event_data".
The context notes the etm_event_data at etm_event_start() and clears
it at etm_event_stop(). This makes sure that we don't access a
stale "etm_event_data" as we are guaranteed that it is not
freed by free_aux() as long as the event is active and tracing,
also provides us with access to the critical information
needed to wind up a session even in the absence of an active
output_handle.

This is not an issue for the legacy sinks as none of them supports
an IRQ and is centrally handled by the etm-perf.

Cc: Mathieu Poirier 
Cc: Anshuman Khandual 
Cc: Leo Yan 
Cc: Mike Leach 
Reviewed-by: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../hwtracing/coresight/coresight-etm-perf.c  | 59 +--
 1 file changed, 54 insertions(+), 5 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c 
b/drivers/hwtracing/coresight/coresight-etm-perf.c
index aa0974bd265b..f123c26b9f54 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -24,7 +24,26 @@
 static struct pmu etm_pmu;
 static bool etm_perf_up;
 
-static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
+/*
+ * An ETM context for a running event includes the perf aux handle
+ * and aux_data. For ETM, the aux_data (etm_event_data), consists of
+ * the trace path and the sink configuration. The event data is accessible
+ * via perf_get_aux(handle). However, a sink could "end" a perf output
+ * handle via the IRQ handler. And if the "sink" encounters a failure
+ * to "begin" another session (e.g due to lack of space in the buffer),
+ * the handle will be cleared. Thus, the event_data may not be accessible
+ * from the handle when we get to the etm_event_stop(), which is required
+ * for stopping the trace path. The event_data is guaranteed to stay alive
+ * until "free_aux()", which cannot happen as long as the event is active on
+ * the ETM. Thus the event_data for the session must be part of the ETM context
+ * to make sure we can disable the trace path.
+ */
+struct etm_ctxt {
+   struct perf_output_handle handle;
+   struct etm_event_data *event_data;
+};
+
+static DEFINE_PER_CPU(struct etm_ctxt, etm_ctxt);
 static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
 
 /*
@@ -376,13 +395,18 @@ static void etm_event_start(struct perf_event *event, int 
flags)
 {
int cpu = smp_processor_id();
struct etm_event_data *event_data;
-   struct perf_output_handle *handle = this_cpu_ptr(_handle);
+   struct etm_ctxt *ctxt = this_cpu_ptr(_ctxt);
+   struct perf_output_handle *handle = >handle;
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
struct list_head *path;
 
if (!csdev)
goto fail;
 
+   /* Have we messed up our tracking ? */
+   if (WARN_ON(ctxt->event_data))
+   goto fail;
+
/*
 * Deal with the ring buffer API and get a handle on the
 * session's information.
@@ -418,6 +442,8 @@ static void etm_event_start(struct perf_event *event, int 
flags)
if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
goto fail_disable_path;
 
+   /* Save the event_data for this ETM */
+   ctxt->event_data = event_data;
 out:
return;
 
@@ -436,13 +462,30 @@ static void etm_event_stop(struct perf_event *event, int 
mode)
int cpu = smp_processor_id();
unsigned long size;
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
-   struct perf_output_handle *handle = this_cpu_ptr(_handle);
-   struct etm_event_data *event_data = perf_get_aux(handle);
+   struct etm_ctxt *ctxt = this_cpu_ptr(_ctxt);
+   struct

[PATCH v5 11/19] coresight: etm4x: Add support for PE OS lock

2021-03-23 Thread Suzuki K Poulose

ETE may not implement the OS lock and instead could rely on
the PE OS Lock for the trace unit access. This is indicated
by the TRCOLSR.OSM == 0b100. Add support for handling the
PE OS lock

Cc: Mike Leach 
Reviewed-by: mike.leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
 .../coresight/coresight-etm4x-core.c  | 50 +++
 drivers/hwtracing/coresight/coresight-etm4x.h | 15 ++
 2 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 00297906669c..35802caca32a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -115,30 +115,59 @@ void etm4x_sysreg_write(u64 val, u32 offset, bool 
_relaxed, bool _64bit)
}
 }
 
-static void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata, struct 
csdev_access *csa)
+static void etm_detect_os_lock(struct etmv4_drvdata *drvdata,
+  struct csdev_access *csa)
 {
-   /* Writing 0 to TRCOSLAR unlocks the trace registers */
-   etm4x_relaxed_write32(csa, 0x0, TRCOSLAR);
-   drvdata->os_unlock = true;
+   u32 oslsr = etm4x_relaxed_read32(csa, TRCOSLSR);
+
+   drvdata->os_lock_model = ETM_OSLSR_OSLM(oslsr);
+}
+
+static void etm_write_os_lock(struct etmv4_drvdata *drvdata,
+ struct csdev_access *csa, u32 val)
+{
+   val = !!val;
+
+   switch (drvdata->os_lock_model) {
+   case ETM_OSLOCK_PRESENT:
+   etm4x_relaxed_write32(csa, val, TRCOSLAR);
+   break;
+   case ETM_OSLOCK_PE:
+   write_sysreg_s(val, SYS_OSLAR_EL1);
+   break;
+   default:
+   pr_warn_once("CPU%d: Unsupported Trace OSLock model: %x\n",
+smp_processor_id(), drvdata->os_lock_model);
+   fallthrough;
+   case ETM_OSLOCK_NI:
+   return;
+   }
isb();
 }
 
+static inline void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata,
+ struct csdev_access *csa)
+{
+   WARN_ON(drvdata->cpu != smp_processor_id());
+
+   /* Writing 0 to OS Lock unlocks the trace unit registers */
+   etm_write_os_lock(drvdata, csa, 0x0);
+   drvdata->os_unlock = true;
+}
+
 static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
 {
if (!WARN_ON(!drvdata->csdev))
etm4_os_unlock_csa(drvdata, >csdev->access);
-
 }
 
 static void etm4_os_lock(struct etmv4_drvdata *drvdata)
 {
if (WARN_ON(!drvdata->csdev))
return;
-
-   /* Writing 0x1 to TRCOSLAR locks the trace registers */
-   etm4x_relaxed_write32(>csdev->access, 0x1, TRCOSLAR);
+   /* Writing 0x1 to OS Lock locks the trace registers */
+   etm_write_os_lock(drvdata, >csdev->access, 0x1);
drvdata->os_unlock = false;
-   isb();
 }
 
 static void etm4_cs_lock(struct etmv4_drvdata *drvdata,
@@ -937,6 +966,9 @@ static void etm4_init_arch_data(void *info)
if (!etm4_init_csdev_access(drvdata, csa))
return;
 
+   /* Detect the support for OS Lock before we actually use it */
+   etm_detect_os_lock(drvdata, csa);
+
/* Make sure all registers are accessible */
etm4_os_unlock_csa(drvdata, csa);
etm4_cs_unlock(drvdata, csa);
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h 
b/drivers/hwtracing/coresight/coresight-etm4x.h
index f6478ef642bf..5b961c5b78d1 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -505,6 +505,20 @@
 ETM_MODE_EXCL_KERN | \
 ETM_MODE_EXCL_USER)
 
+/*
+ * TRCOSLSR.OSLM advertises the OS Lock model.
+ * OSLM[2:0] = TRCOSLSR[4:3,0]
+ *
+ * 0b000 - Trace OS Lock is not implemented.
+ * 0b010 - Trace OS Lock is implemented.
+ * 0b100 - Trace OS Lock is not implemented, unit is controlled by PE OS 
Lock.
+ */
+#define ETM_OSLOCK_NI  0b000
+#define ETM_OSLOCK_PRESENT 0b010
+#define ETM_OSLOCK_PE  0b100
+
+#define ETM_OSLSR_OSLM(oslsr)  oslsr) & GENMASK(4, 3)) >> 2) | (oslsr & 
0x1))
+
 /*
  * TRCDEVARCH Bit field definitions
  * Bits[31:21] - ARCHITECT = Always Arm Ltd.
@@ -898,6 +912,7 @@ struct etmv4_drvdata {
u8  s_ex_level;
u8  ns_ex_level;
u8  q_support;
+   u8  os_lock_model;
boolsticky_enable;
boolboot_enable;
boolos_unlock;
-- 
2.24.1

[PATCH v5 13/19] dts: bindings: Document device tree bindings for ETE

2021-03-23 Thread Suzuki K Poulose

Document the device tree bindings for Embedded Trace Extensions.
ETE can be connected to legacy coresight components and thus
could optionally contain a connection graph as described by
the CoreSight bindings.

Cc: devicet...@vger.kernel.org
Cc: Mathieu Poirier 
Cc: Mike Leach 
Cc: Rob Herring 
Signed-off-by: Suzuki K Poulose 
---
Changes since v4:
 - Fix the out-ports definition (Rob Herring)
---
 .../devicetree/bindings/arm/ete.yaml  | 75 +++
 MAINTAINERS   |  1 +
 2 files changed, 76 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/arm/ete.yaml

diff --git a/Documentation/devicetree/bindings/arm/ete.yaml 
b/Documentation/devicetree/bindings/arm/ete.yaml
new file mode 100644
index ..7f9b2d1e1147
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ete.yaml
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+# Copyright 2021, Arm Ltd
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/ete.yaml#;
+$schema: "http://devicetree.org/meta-schemas/core.yaml#;
+
+title: ARM Embedded Trace Extensions
+
+maintainers:
+  - Suzuki K Poulose 
+  - Mathieu Poirier 
+
+description: |
+  Arm Embedded Trace Extension(ETE) is a per CPU trace component that
+  allows tracing the CPU execution. It overlaps with the CoreSight ETMv4
+  architecture and has extended support for future architecture changes.
+  The trace generated by the ETE could be stored via legacy CoreSight
+  components (e.g, TMC-ETR) or other means (e.g, using a per CPU buffer
+  Arm Trace Buffer Extension (TRBE)). Since the ETE can be connected to
+  legacy CoreSight components, a node must be listed per instance, along
+  with any optional connection graph as per the coresight bindings.
+  See bindings/arm/coresight.txt.
+
+properties:
+  $nodename:
+pattern: "^ete([0-9a-f]+)$"
+  compatible:
+items:
+  - const: arm,embedded-trace-extension
+
+  cpu:
+description: |
+  Handle to the cpu this ETE is bound to.
+$ref: /schemas/types.yaml#/definitions/phandle
+
+  out-ports:
+description: |
+  Output connections from the ETE to legacy CoreSight trace bus.
+$ref: /schemas/graph.yaml#/properties/ports
+properties:
+  port:
+description: Output connection from the ETE to legacy CoreSight Trace 
bus.
+$ref: /schemas/graph.yaml#/properties/port
+
+required:
+  - compatible
+  - cpu
+
+additionalProperties: false
+
+examples:
+
+# An ETE node without legacy CoreSight connections
+  - |
+ete0 {
+  compatible = "arm,embedded-trace-extension";
+  cpu = <_0>;
+};
+# An ETE node with legacy CoreSight connections
+  - |
+   ete1 {
+  compatible = "arm,embedded-trace-extension";
+  cpu = <_1>;
+
+  out-ports {/* legacy coresight connection */
+ port {
+ ete1_out_port: endpoint {
+remote-endpoint = <_in_port0>;
+ };
+ };
+  };
+   };
+
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index 9e876927c60d..3454ed1011c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1761,6 +1761,7 @@ F:
Documentation/ABI/testing/sysfs-bus-coresight-devices-*
 F: Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt
 F: Documentation/devicetree/bindings/arm/coresight-cti.yaml
 F: Documentation/devicetree/bindings/arm/coresight.txt
+F: Documentation/devicetree/bindings/arm/ete.yaml
 F: Documentation/trace/coresight/*
 F: drivers/hwtracing/coresight/*
 F: include/dt-bindings/arm/coresight-cti-dt.h
-- 
2.24.1

[PATCH v5 12/19] coresight: ete: Add support for ETE tracing

2021-03-23 Thread Suzuki K Poulose

Add ETE as one of the supported device types we support
with ETM4x driver. The devices are named following the
existing convention as ete.

ETE mandates that the trace resource status register is programmed
before the tracing is turned on. For the moment simply write to
it indicating TraceActive.

ETE shares most of the registers with ETMv4 except for some and
also adds some new registers. Re-arrange the ETMv4x list
to share the common definitions and add the ETE sysreg support.

Reviewed-by: Mike Leach 
Reviewed-by: Mathieu Poirier 
Signed-off-by: Suzuki K Poulose 
---
Changes since v4:
 - Make ete sysreg read/write static (kernel test robot)
 - Squashed ETE sysreg support patch to this one.
 - Write 0 to TRCSTATR
---
 drivers/hwtracing/coresight/Kconfig   | 10 +--
 .../coresight/coresight-etm4x-core.c  | 90 ---
 .../coresight/coresight-etm4x-sysfs.c | 19 +++-
 drivers/hwtracing/coresight/coresight-etm4x.h | 66 --
 4 files changed, 155 insertions(+), 30 deletions(-)

diff --git a/drivers/hwtracing/coresight/Kconfig 
b/drivers/hwtracing/coresight/Kconfig
index 7b44ba22cbe1..f154ae7e705d 100644
--- a/drivers/hwtracing/coresight/Kconfig
+++ b/drivers/hwtracing/coresight/Kconfig
@@ -97,15 +97,15 @@ config CORESIGHT_SOURCE_ETM3X
  module will be called coresight-etm3x.
 
 config CORESIGHT_SOURCE_ETM4X
-   tristate "CoreSight Embedded Trace Macrocell 4.x driver"
+   tristate "CoreSight ETMv4.x / ETE driver"
depends on ARM64
select CORESIGHT_LINKS_AND_SINKS
select PID_IN_CONTEXTIDR
help
- This driver provides support for the ETM4.x tracer module, tracing the
- instructions that a processor is executing. This is primarily useful
- for instruction level tracing. Depending on the implemented version
- data tracing may also be available.
+ This driver provides support for the CoreSight Embedded Trace 
Macrocell
+ version 4.x and the Embedded Trace Extensions (ETE). Both are CPU 
tracer
+ modules, tracing the instructions that a processor is executing. This 
is
+ primarily useful for instruction level tracing.
 
  To compile this driver as a module, choose M here: the
  module will be called coresight-etm4x.
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c 
b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 35802caca32a..efb84ced83dd 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -115,6 +115,38 @@ void etm4x_sysreg_write(u64 val, u32 offset, bool 
_relaxed, bool _64bit)
}
 }
 
+static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit)
+{
+   u64 res = 0;
+
+   switch (offset) {
+   ETE_READ_CASES(res)
+   default :
+   pr_warn_ratelimited("ete: trying to read unsupported register 
@%x\n",
+   offset);
+   }
+
+   if (!_relaxed)
+   __iormb(res);   /* Imitate the !relaxed I/O helpers */
+
+   return res;
+}
+
+static void ete_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+   if (!_relaxed)
+   __iowmb();  /* Imitate the !relaxed I/O helpers */
+   if (!_64bit)
+   val &= GENMASK(31, 0);
+
+   switch (offset) {
+   ETE_WRITE_CASES(val)
+   default :
+   pr_warn_ratelimited("ete: trying to write to unsupported 
register @%x\n",
+   offset);
+   }
+}
+
 static void etm_detect_os_lock(struct etmv4_drvdata *drvdata,
   struct csdev_access *csa)
 {
@@ -401,6 +433,13 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
etm4x_relaxed_write32(csa, trcpdcr | TRCPDCR_PU, TRCPDCR);
}
 
+   /*
+* ETE mandates that the TRCRSR is written to before
+* enabling it.
+*/
+   if (etm4x_is_ete(drvdata))
+   etm4x_relaxed_write32(csa, TRCRSR_TA, TRCRSR);
+
/* Enable the trace unit */
etm4x_relaxed_write32(csa, 1, TRCPRGCTLR);
 
@@ -862,13 +901,24 @@ static bool etm4_init_sysreg_access(struct etmv4_drvdata 
*drvdata,
 * ETMs implementing sysreg access must implement TRCDEVARCH.
 */
devarch = read_etm4x_sysreg_const_offset(TRCDEVARCH);
-   if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH)
+   switch (devarch & ETM_DEVARCH_ID_MASK) {
+   case ETM_DEVARCH_ETMv4x_ARCH:
+   *csa = (struct csdev_access) {
+   .io_mem = false,
+   .read   = etm4x_sysreg_read,
+   .write  = etm4x_sysreg_write,
+   };
+   break;
+   case ETM_DEVARCH_ETE_ARCH:
+   *csa = (struct csdev_access) {
+   .io_mem = false,
+   .read   = ete_sysreg_read,
+

Re: [PATCH 1/1] nvme-pci: add the DISABLE_WRITE_ZEROES quirk for a Samsung PM1725a

2021-03-23 Thread Christoph Hellwig

On Tue, Mar 23, 2021 at 09:37:49AM +0100, Javier González wrote:
> Quick question. It seems like the current quirk simply disables
> write-zeroes. Would you be open for a quirk that aligns with MDTS for
> models that implemented it this way before TP4040?

Aligning to MDTS is our current behavior, although all kernels up to
5.11 had a bug in the calculation.

[PATCH v2] usb: gadget: legacy: fix error return code of msg_bind()

2021-03-23 Thread Jia-Ju Bai

When usb_otg_descriptor_alloc() returns NULL to usb_desc, no error
return code of msg_bind() is assigned.
To fix this bug, status is assigned with -ENOMEM in this case.

Reported-by: TOTE Robot 
Signed-off-by: Jia-Ju Bai 
---
v2:
* Fix a mistake in the report.
  Thank Greg for good advice.

---
 drivers/usb/gadget/legacy/mass_storage.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/legacy/mass_storage.c 
b/drivers/usb/gadget/legacy/mass_storage.c
index 9ed22c5fb7fe..ac1741126619 100644
--- a/drivers/usb/gadget/legacy/mass_storage.c
+++ b/drivers/usb/gadget/legacy/mass_storage.c
@@ -175,8 +175,10 @@ static int msg_bind(struct usb_composite_dev *cdev)
struct usb_descriptor_header *usb_desc;
 
usb_desc = usb_otg_descriptor_alloc(cdev->gadget);
-   if (!usb_desc)
+   if (!usb_desc) {
+   status = -ENOMEM;
goto fail_string_ids;
+   }
usb_otg_descriptor_init(cdev->gadget, usb_desc);
otg_desc[0] = usb_desc;
otg_desc[1] = NULL;
-- 
2.17.1

[PATCH] kasan: fix hwasan build for gcc

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

gcc-11 adds support for -fsanitize=kernel-hwaddress, so it becomes
possible to enable CONFIG_KASAN_SW_TAGS.

Unfortunately this fails to build at the moment, because the
corresponding command line arguments use llvm specific syntax.

Change it to use the cc-param macro instead, which works on both
clang and gcc.

Signed-off-by: Arnd Bergmann 
---
 scripts/Makefile.kasan | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan
index 1e000cc2e7b4..0a2789783d1b 100644
--- a/scripts/Makefile.kasan
+++ b/scripts/Makefile.kasan
@@ -36,14 +36,14 @@ endif # CONFIG_KASAN_GENERIC
 ifdef CONFIG_KASAN_SW_TAGS
 
 ifdef CONFIG_KASAN_INLINE
-instrumentation_flags := -mllvm 
-hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET)
+instrumentation_flags := $(call 
cc-param,hwasan-mapping-offset=$(KASAN_SHADOW_OFFSET))
 else
-instrumentation_flags := -mllvm -hwasan-instrument-with-calls=1
+instrumentation_flags := $(call cc-param,hwasan-instrument-with-calls=1)
 endif
 
 CFLAGS_KASAN := -fsanitize=kernel-hwaddress \
-   -mllvm -hwasan-instrument-stack=$(CONFIG_KASAN_STACK) \
-   -mllvm -hwasan-use-short-granules=0 \
+   $(call cc-param,hwasan-instrument-stack=$(CONFIG_KASAN_STACK)) \
+   $(call cc-param,hwasan-use-short-granules=0) \
$(instrumentation_flags)
 
 endif # CONFIG_KASAN_SW_TAGS
-- 
2.29.2

[PATCH -next] mm, page_alloc: avoid page_to_pfn() in move_freepages()

2021-03-23 Thread Liu Shixin

From: Kefeng Wang 

The start_pfn and end_pfn are already available in move_freepages_block(),
there is no need to go back and forth between page and pfn in move_freepages
and move_freepages_block, and pfn_valid_within() should validate pfn first
before touching the page.

Signed-off-by: Kefeng Wang 
Signed-off-by: Liu Shixin 
---
 mm/page_alloc.c | 28 +---
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c53fe4fa10bf..ccfaa8158862 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2425,19 +2425,21 @@ static inline struct page 
*__rmqueue_cma_fallback(struct zone *zone,
  * boundary. If alignment is required, use move_freepages_block()
  */
 static int move_freepages(struct zone *zone,
- struct page *start_page, struct page *end_page,
+ unsigned long start_pfn, unsigned long end_pfn,
  int migratetype, int *num_movable)
 {
struct page *page;
+   unsigned long pfn;
unsigned int order;
int pages_moved = 0;
 
-   for (page = start_page; page <= end_page;) {
-   if (!pfn_valid_within(page_to_pfn(page))) {
-   page++;
+   for (pfn = start_pfn; pfn <= end_pfn;) {
+   if (!pfn_valid_within(pfn)) {
+   pfn++;
continue;
}
 
+   page = pfn_to_page(pfn);
if (!PageBuddy(page)) {
/*
 * We assume that pages that could be isolated for
@@ -2447,8 +2449,7 @@ static int move_freepages(struct zone *zone,
if (num_movable &&
(PageLRU(page) || __PageMovable(page)))
(*num_movable)++;
-
-   page++;
+   pfn++;
continue;
}
 
@@ -2458,7 +2459,7 @@ static int move_freepages(struct zone *zone,
 
order = buddy_order(page);
move_to_free_list(page, zone, order, migratetype);
-   page += 1 << order;
+   pfn += 1 << order;
pages_moved += 1 << order;
}
 
@@ -2468,25 +2469,22 @@ static int move_freepages(struct zone *zone,
 int move_freepages_block(struct zone *zone, struct page *page,
int migratetype, int *num_movable)
 {
-   unsigned long start_pfn, end_pfn;
-   struct page *start_page, *end_page;
+   unsigned long start_pfn, end_pfn, pfn;
 
if (num_movable)
*num_movable = 0;
 
-   start_pfn = page_to_pfn(page);
-   start_pfn = start_pfn & ~(pageblock_nr_pages-1);
-   start_page = pfn_to_page(start_pfn);
-   end_page = start_page + pageblock_nr_pages - 1;
+   pfn = page_to_pfn(page);
+   start_pfn = pfn & ~(pageblock_nr_pages - 1);
end_pfn = start_pfn + pageblock_nr_pages - 1;
 
/* Do not cross zone boundaries */
if (!zone_spans_pfn(zone, start_pfn))
-   start_page = page;
+   start_pfn = pfn;
if (!zone_spans_pfn(zone, end_pfn))
return 0;
 
-   return move_freepages(zone, start_page, end_page, migratetype,
+   return move_freepages(zone, start_pfn, end_pfn, migratetype,
num_movable);
 }
 
-- 
2.25.1

RE: [PATCH v31 2/4] scsi: ufs: L2P map management for HPB read

2021-03-23 Thread Avri Altman

> 
> On 2021-03-23 14:37, Daejun Park wrote:
> >> On 2021-03-23 14:19, Daejun Park wrote:
>  On 2021-03-23 13:37, Daejun Park wrote:
> >> On 2021-03-23 12:22, Can Guo wrote:
> >>> On 2021-03-22 17:11, Bean Huo wrote:
>  On Mon, 2021-03-22 at 15:54 +0900, Daejun Park wrote:
> > +   switch (rsp_field->hpb_op) {
> >
> > +   case HPB_RSP_REQ_REGION_UPDATE:
> >
> > +   if (data_seg_len != DEV_DATA_SEG_LEN)
> >
> > +   dev_warn(>sdev_ufs_lu->sdev_dev,
> >
> > +"%s: data seg length is not
> > same.\n",
> >
> > +__func__);
> >
> > +   ufshpb_rsp_req_region_update(hpb, rsp_field);
> >
> > +   break;
> >
> > +   case HPB_RSP_DEV_RESET:
> >
> > +   dev_warn(>sdev_ufs_lu->sdev_dev,
> >
> > +"UFS device lost HPB information
> > during
> > PM.\n");
> >
> > +   break;
> 
>  Hi Deajun,
>  This series looks good to me. Just here I have one question. You
>  didn't
>  handle HPB_RSP_DEV_RESET, just a warning.  Based on your SS UFS,
>  how
>  to
>  handle HPB_RSP_DEV_RESET from the host side? Do you think we
>  shoud
>  reset host side HPB entry as well or what else?
> 
> 
>  Bean
> >>>
> >>> Same question here - I am still collecting feedbacks from flash
> >>> vendors
> >>> about
> >>> what is recommanded host behavior on reception of HPB Op code
> >>> 0x2,
> >>> since it
> >>> is not cleared defined in HPB2.0 specs.
> >>>
> >>> Can Guo.
> >>
> >> I think the question should be asked in the HPB2.0 patch, since in
> >> HPB1.0 device
> >> control mode, a HPB reset in device side does not impact anything
> >> in
> >> host side -
> >> host is not writing back any HPB entries to device anyways and HPB
> >> Read
> >> cmd with
> >> invalid HPB entries shall be treated as normal Read(10) cmd
> >> without
> >> any
> >> problems.
> >
> > Yes, UFS device will process read command even the HPB entries are
> > valid or
> > not. So it is warning about read performance drop by dev reset.
> 
>  Yeah, but still I am 100% sure about what should host do in case of
>  HPB2.0
>  when it receives HPB Op code 0x2, I am waiting for feedbacks.
> >>>
> >>> I think the host has two choices when it receives 0x2.
> >>> One is nothing on host.
> >>> The other is discarding all HPB entries in the host.
> >>>
> >>> In the JEDEC HPB spec, it as follows:
> >>> When the device is powered off by the host, the device may restore
> >>> L2P
> >>> map
> >>> data upon power up or build from the host’s HPB READ command.
> >>>
> >>> If some UFS builds L2P map data from the host's HPB READ commands, we
> >>> don't
> >>> have to discard HPB entries in the host.
> >>>
> >>> So I thinks there is nothing to do when it receives 0x2.
> >>
> >> But in HPB2.0, if we do nothing to active regions in host side, host
> >> can
> >> write
> >> HPB entries (which host thinks valid, but actually invalid in device
> >> side since
> >> reset happened) back to device through HPB Write Buffer cmds (BUFFER
> >> ID
> >> = 0x2).
> >> My question is that are all UFSs OK with this?
> >
> > Yes, it must be OK.
> >
> > Please refer the following the HPB 2.0 spec:
> >
> > If the HPB Entries sent by HPB WRITE BUFFER are removed by the device,
> > for example, because they are not consumed for a long enough period of
> > time,
> > then the HPB READ command for the removed HPB entries shall be handled
> > as a
> > normal READ command.
> >
> 
> No, it is talking about the subsequent HPB READ cmd sent after a HPB
> WRITE BUFFER cmd,
> but not the HPB WRITE BUFFER cmd itself...
Looks like this discussion is going the same way as we had in host mode.
HPB-WRITE-BUFFER 0x2, if exist,  is always a companion to HPB-READ.
You shouldn't consider them separately.

The device is expected to handle invalid ppn by itself, and specifically for 
this case,
As Daejun explained, Handle each HPB-READ (and its companion HPB-WRITE-BUFFER) 
like READ10.

For device mode, doing nothing in case of dev reset, seems to me like the right 
thing to do.

Thanks,
Avri

> 
> Thanks,
> Can Guo.
> 
> > Thanks,
> > Daejun
> >
> >> Thanks,
> >> Can Guo.
> >>
> >>>
> >>> Thanks,
> >>> Daejun
> >>>
>  Thanks,
>  Can Guo.
> 
> >
> > Thanks,
> > Daejun
> >
> >> Please correct me if I am wrong.
> >
> >
> >
> >> Thanks,
> >> Can Guo.
> >>
> >>
> >>
> 
> 
> 
> >>
> >>
> >>

[PATCH] x86: turn off -fcf-protection for realmode

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

The new Ubuntu gcc packages turn on -fcf-protection globally,
which causes a build failure in the x86 realmode code:

cc1: error: ‘-fcf-protection’ is not compatible with this target

Turn it off explicitly on compilers that understand this option.

Signed-off-by: Arnd Bergmann 
---
 arch/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 5689d799bd12..0a1278512cc9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -27,7 +27,7 @@ endif
 REALMODE_CFLAGS:= -m16 -Os -DDISABLE_BRANCH_PROFILING \
   -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-  -mno-mmx -mno-sse
+  -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
 
 REALMODE_CFLAGS += -ffreestanding
 REALMODE_CFLAGS += -fno-stack-protector
-- 
2.29.2

Re: fs/io_uring.c:6920:12: warning: stack frame size of 1040 bytes in function 'io_submit_sqes'

2021-03-23 Thread Pavel Begunkov

On 23/03/2021 11:31, kernel test robot wrote:
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
> master
> head:   84196390620ac0e5070ae36af84c137c6216a7dc
> commit: e5d1bc0a91f16959aa279aa3ee9fdc246d4bb382 io_uring: defer flushing 
> cached reqs
> date:   6 weeks ago
> config: powerpc64-randconfig-r023-20210323 (attached as .config)
> compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 
> 14696baaf4c43fe53f738bc292bbe169eed93d5d)
> reproduce (this is a W=1 build):
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install powerpc64 cross compiling tool for clang build
> # apt-get install binutils-powerpc64-linux-gnu
> # 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e5d1bc0a91f16959aa279aa3ee9fdc246d4bb382
> git remote add linus 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
> git fetch --no-tags linus master
> git checkout e5d1bc0a91f16959aa279aa3ee9fdc246d4bb382
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross 
> ARCH=powerpc64 
> 
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
> 
> All warnings (new ones prefixed by >>):
> 
>>> fs/io_uring.c:6920:12: warning: stack frame size of 1040 bytes in function 
>>> 'io_submit_sqes' [-Wframe-larger-than=]
>static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
>   ^
>1 warning generated. b

I don't know, for up-to-date code all submission functions are under
128 bytes for me, including io_submit_sqes with everything heavily
inlined into it. I believe it's just a strange config keeping
everything on stack for some reason (too under optimised?).


> 
> 
> vim +/io_submit_sqes +6920 fs/io_uring.c
> 
> 0553b8bda8709c Pavel Begunkov 2020-04-08  6919  
> 0f2122045b9462 Jens Axboe 2020-09-13 @6920  static int 
> io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
> 6c271ce2f1d572 Jens Axboe 2019-01-10  6921  {
> 863e05604a6fb4 Pavel Begunkov 2020-10-27  6922struct io_submit_link 
> link;
> 9e645e1105ca60 Jens Axboe 2019-05-10  6923int i, submitted = 0;
> 6c271ce2f1d572 Jens Axboe 2019-01-10  6924  
> c4a2ed72c9a615 Jens Axboe 2019-11-21  6925/* if we have a backlog 
> and couldn't flush it all, return BUSY */
> ad3eb2c89fb24d Jens Axboe 2019-12-18  6926if (test_bit(0, 
> >sq_check_overflow)) {
> 6c503150ae33ee Pavel Begunkov 2021-01-04  6927if 
> (!__io_cqring_overflow_flush(ctx, false, NULL, NULL))
> 1d7bb1d50fb4dc Jens Axboe 2019-11-06  6928return 
> -EBUSY;
> ad3eb2c89fb24d Jens Axboe 2019-12-18  6929}
> 6c271ce2f1d572 Jens Axboe 2019-01-10  6930  
> ee7d46d9db19de Pavel Begunkov 2019-12-30  6931/* make sure SQ entry 
> isn't read before tail */
> ee7d46d9db19de Pavel Begunkov 2019-12-30  6932nr = min3(nr, 
> ctx->sq_entries, io_sqring_entries(ctx));
> 9ef4f124894b7b Pavel Begunkov 2019-12-30  6933  
> 2b85edfc0c90ef Pavel Begunkov 2019-12-28  6934if 
> (!percpu_ref_tryget_many(>refs, nr))
> 2b85edfc0c90ef Pavel Begunkov 2019-12-28  6935return -EAGAIN;
> 6c271ce2f1d572 Jens Axboe 2019-01-10  6936  
> d8a6df10aac9f2 Jens Axboe 2020-10-15  6937
> percpu_counter_add(>io_uring->inflight, nr);
> faf7b51c06973f Jens Axboe 2020-10-07  6938refcount_add(nr, 
> >usage);
> 6c271ce2f1d572 Jens Axboe 2019-01-10  6939  
> ba88ff112bdfde Pavel Begunkov 2021-02-10  6940
> io_submit_state_start(>submit_state, nr);
> 863e05604a6fb4 Pavel Begunkov 2020-10-27  6941link.head = NULL;
> b14cca0c84c760 Pavel Begunkov 2020-01-17  6942  
> 6c271ce2f1d572 Jens Axboe 2019-01-10  6943for (i = 0; i < nr; 
> i++) {
> 3529d8c2b353e6 Jens Axboe 2019-12-19  6944const struct 
> io_uring_sqe *sqe;
> 196be95cd55720 Pavel Begunkov 2019-11-07  6945struct io_kiocb 
> *req;
> 1cb1edb2f5ba8a Pavel Begunkov 2020-02-06  6946int err;
> fb5ccc98782f65 Pavel Begunkov 2019-10-25  6947  
> b1e50e549b1372 Pavel Begunkov 2020-04-08  6948sqe = 
> io_get_sqe(ctx);
> b1e50e549b1372 Pavel Begunkov 2020-04-08  6949if 
> (unlikely(!sqe)) {
> b1e50e549b1372 Pavel Begunkov 2020-04-08  6950
> io_consume_sqe(ctx);
> b1e50e549b1372 Pavel Begunkov 2020-04-08  6951

Re: [PATCH -next] mm, page_alloc: avoid page_to_pfn() in move_freepages()

2021-03-23 Thread Matthew Wilcox

On Tue, Mar 23, 2021 at 09:12:15PM +0800, Liu Shixin wrote:
> From: Kefeng Wang 
> 
> The start_pfn and end_pfn are already available in move_freepages_block(),
> there is no need to go back and forth between page and pfn in move_freepages
> and move_freepages_block, and pfn_valid_within() should validate pfn first
> before touching the page.

This looks good to me:

Reviewed-by: Matthew Wilcox (Oracle) 

>  static int move_freepages(struct zone *zone,
> -   struct page *start_page, struct page *end_page,
> +   unsigned long start_pfn, unsigned long end_pfn,
> int migratetype, int *num_movable)
>  {
>   struct page *page;
> + unsigned long pfn;
>   unsigned int order;
>   int pages_moved = 0;
>  
> - for (page = start_page; page <= end_page;) {
> - if (!pfn_valid_within(page_to_pfn(page))) {
> - page++;
> + for (pfn = start_pfn; pfn <= end_pfn;) {
> + if (!pfn_valid_within(pfn)) {
> + pfn++;
>   continue;
>   }
>  
> + page = pfn_to_page(pfn);

I wonder if this wouldn't be even better if we did:

struct page *start_page = pfn_to_page(start_pfn);

for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
struct page *page = start_page + pfn - start_pfn;

if (!pfn_valid_within(pfn))
continue;

> -
> - page++;
> + pfn++;
>   continue;

... then we can drop the increment of pfn here

>   }
>  
> @@ -2458,7 +2459,7 @@ static int move_freepages(struct zone *zone,
>  
>   order = buddy_order(page);
>   move_to_free_list(page, zone, order, migratetype);
> - page += 1 << order;
> + pfn += 1 << order;

... and change this to pfn += (1 << order) - 1;

Do you have any numbers to quantify the benefit of this change?

[PATCH] scsi: pm8001: avoid -Wrestrict warning

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

On some configurations, gcc warns about overlapping source and
destination arguments to snprintf:

drivers/scsi/pm8001/pm8001_init.c: In function 'pm8001_request_msix':
drivers/scsi/pm8001/pm8001_init.c:977:3: error: 'snprintf' argument 4 may 
overlap destination object 'pm8001_ha' [-Werror=restrict]
  977 |   snprintf(drvname, len, "%s-%d", pm8001_ha->name, i);
  |   ^~~
drivers/scsi/pm8001/pm8001_init.c:962:56: note: destination object referenced 
by 'restrict'-qualified argument 1 was declared here
  962 | static u32 pm8001_request_msix(struct pm8001_hba_info *pm8001_ha)
  |^

I first assumed this was a gcc bug, as that should not happen, but
a reduced test case makes it clear that this happens when the loop
counter is not bounded by the array size.

Help the compiler out by adding an explicit limit here to make the
code slightly more robust and avoid the warning.

Link: https://godbolt.org/z/6T1qPM
Signed-off-by: Arnd Bergmann 
---
 drivers/scsi/pm8001/pm8001_init.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/pm8001/pm8001_init.c 
b/drivers/scsi/pm8001/pm8001_init.c
index bd626ef876da..a268c647b987 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -963,6 +963,7 @@ static u32 pm8001_request_msix(struct pm8001_hba_info 
*pm8001_ha)
 {
u32 i = 0, j = 0;
int flag = 0, rc = 0;
+   int nr_irqs = pm8001_ha->number_of_intr;
 
if (pm8001_ha->chip_id != chip_8001)
flag &= ~IRQF_SHARED;
@@ -971,7 +972,10 @@ static u32 pm8001_request_msix(struct pm8001_hba_info 
*pm8001_ha)
   "pci_enable_msix request number of intr %d\n",
   pm8001_ha->number_of_intr);
 
-   for (i = 0; i < pm8001_ha->number_of_intr; i++) {
+   if (nr_irqs > ARRAY_SIZE(pm8001_ha->intr_drvname))
+   nr_irqs = ARRAY_SIZE(pm8001_ha->intr_drvname);
+
+   for (i = 0; i < nr_irqs; i++) {
snprintf(pm8001_ha->intr_drvname[i],
sizeof(pm8001_ha->intr_drvname[0]),
"%s-%d", pm8001_ha->name, i);
-- 
2.29.2

[PATCH v2 5/9] staging: rtl8723bs: remove function prototypes in hal/odm.c

2021-03-23 Thread Fabio Aiuto

fix the following checkpatch issues:

WARNING: externs should be avoided in .c files
285: FILE: drivers/staging/rtl8723bs/hal/odm.c:285:
+void odm_CommonInfoSelfInit(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
287: FILE: drivers/staging/rtl8723bs/hal/odm.c:287:
+void odm_CommonInfoSelfUpdate(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
289: FILE: drivers/staging/rtl8723bs/hal/odm.c:289:
+void odm_CmnInfoInit_Debug(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
291: FILE: drivers/staging/rtl8723bs/hal/odm.c:291:
+void odm_BasicDbgMessage(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
305: FILE: drivers/staging/rtl8723bs/hal/odm.c:305:
+void odm_RefreshRateAdaptiveMaskCE(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
309: FILE: drivers/staging/rtl8723bs/hal/odm.c:309:
+void odm_RSSIMonitorInit(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
311: FILE: drivers/staging/rtl8723bs/hal/odm.c:311:
+void odm_RSSIMonitorCheckCE(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
313: FILE: drivers/staging/rtl8723bs/hal/odm.c:313:
+void odm_RSSIMonitorCheck(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
315: FILE: drivers/staging/rtl8723bs/hal/odm.c:315:
+void odm_SwAntDetectInit(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
317: FILE: drivers/staging/rtl8723bs/hal/odm.c:317:
+void odm_SwAntDivChkAntSwitchCallback(void *FunctionContext);
--
WARNING: externs should be avoided in .c files
321: FILE: drivers/staging/rtl8723bs/hal/odm.c:321:
+void odm_GlobalAdapterCheck(void);
--
WARNING: externs should be avoided in .c files
323: FILE: drivers/staging/rtl8723bs/hal/odm.c:323:
+void odm_RefreshRateAdaptiveMask(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
325: FILE: drivers/staging/rtl8723bs/hal/odm.c:325:
+void ODM_TXPowerTrackingCheck(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
327: FILE: drivers/staging/rtl8723bs/hal/odm.c:327:
+void odm_RateAdaptiveMaskInit(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
330: FILE: drivers/staging/rtl8723bs/hal/odm.c:330:
+void odm_TXPowerTrackingInit(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
338: FILE: drivers/staging/rtl8723bs/hal/odm.c:338:
+void odm_InitHybridAntDiv(struct dm_odm_t *pDM_Odm);
--
WARNING: externs should be avoided in .c files
340: FILE: drivers/staging/rtl8723bs/hal/odm.c:340:
+bool odm_StaDefAntSel(
--
WARNING: externs should be avoided in .c files
349: FILE: drivers/staging/rtl8723bs/hal/odm.c:349:
+void odm_SetRxIdleAnt(struct dm_odm_t *pDM_Odm, u8 Ant, bool bDualPath);
--
WARNING: externs should be avoided in .c files
353: FILE: drivers/staging/rtl8723bs/hal/odm.c:353:
+void odm_HwAntDiv(struct dm_odm_t *pDM_Odm);

removed function prototypes  and made staics where needed
in hal/odm.c

moved function definition in file to let the compiler work

Signed-off-by: Fabio Aiuto 
---
 drivers/staging/rtl8723bs/hal/odm.c | 1717 +--
 1 file changed, 824 insertions(+), 893 deletions(-)

diff --git a/drivers/staging/rtl8723bs/hal/odm.c 
b/drivers/staging/rtl8723bs/hal/odm.c
index 49d552105a65..3d92eee3a840 100644
--- a/drivers/staging/rtl8723bs/hal/odm.c
+++ b/drivers/staging/rtl8723bs/hal/odm.c
@@ -279,1126 +279,1057 @@ u32 TxScalingTable_Jaguar[TXSCALE_TABLE_SIZE] = {
0x3FE  /*  36, +6.0dB */
 };
 
-/*  Local Function predefine. */
-
-/* STARTCOMMON INFO RELATED--- */
-void odm_CommonInfoSelfInit(struct dm_odm_t *pDM_Odm);
-
-void odm_CommonInfoSelfUpdate(struct dm_odm_t *pDM_Odm);
+/* Remove Edca by Yu Chen */
 
-void odm_CmnInfoInit_Debug(struct dm_odm_t *pDM_Odm);
 
-void odm_BasicDbgMessage(struct dm_odm_t *pDM_Odm);
+#define RxDefaultAnt1  0x65a9
+#define RxDefaultAnt2  0x569a
 
-/* ENDCOMMON INFO RELATED--- */
+static void odm_CommonInfoSelfInit(struct dm_odm_t *pDM_Odm)
+{
+   pDM_Odm->bCckHighPower = (bool) PHY_QueryBBReg(pDM_Odm->Adapter, 
ODM_REG(CCK_RPT_FORMAT, pDM_Odm), ODM_BIT(CCK_RPT_FORMAT, pDM_Odm));
+   pDM_Odm->RFPathRxEnable = (u8) PHY_QueryBBReg(pDM_Odm->Adapter, 
ODM_REG(BB_RX_PATH, pDM_Odm), ODM_BIT(BB_RX_PATH, pDM_Odm));
 
-/* START---DIG--- */
+   ODM_InitDebugSetting(pDM_Odm);
 
-/* Remove by Yuchen */
+   pDM_Odm->TxRate = 0xFF;
+}
 
-/* END---DIG--- */
+static void odm_CommonInfoSelfUpdate(struct dm_odm_t *pDM_Odm)
+{
+   u8 EntryCnt = 0;
+   u8 i;
+   PSTA_INFO_T pEntry;
 
-/* START---BB POWER SAVE--- */
-/* Remove BB power Saving by YuChen */
-/* END-BB POWER SAVE--- */
+   if

Re: [CRIU] [PATCH] mnt: allow to add a mount into an existing group

2021-03-23 Thread Pavel Tikhomirov


Hi! Can we restart the discussion on this topic?

In CRIU we need to be able to dump/restore all mount trees of system 
container (CT). CT can have anything inside - users which create their 
custom mounts configuration, systemd with custom mount namespaces for 
it's services, nested application containers inside the CT with their 
own mount namespaces, and all mounts in CT mount trees can be grouped by 
sharing groupes (e.g. same shared_id + master_id pair), and those groups 
can depend one from another forming a tree structure of sharing groups.


1) Imagine that we have this sharing group tree (in format (shared_id, 
master_id), 0 means no sharing, we don't care about actual mounts for 
now only master-slave dependencies between sharing groups):


(1,0)
  |- (2,1)
  |- (3,1)
   |- (4,3)
|- (0,4)

The main problem of restoring mounts is the fact that sharing groups 
currently can be only inherited, e.g. if you have one mount (first) with 
shared_id = x, master_id = y, the only way to get another mount with 
(x,y) is to create a bindmount from the first mount. Also to create 
mount (y,z) from mount (x,y) one should also first inherit (x,y) via 
bindmount and than change to (y,z).


This means that mentioned above tree puts restriction on the mounts 
creation order, one need to have at least one mount for each of sharing 
groups (1,0), (3,1) and (4,3) before creating the first mount of the 
sharing group (0,4).


But what if we want to mount (restore) actual mounts in this mount tree 
"reverse" order:


mntid   parent  mountpoint  (shared_id, master_id)
101 0   /tmp(0,4)
102 101 /tmp(4,3)
103 102 /tmp(3,1)
104 103 /tmp(1,0)

Mount 104's sharing group should be created before mount 101, 102 and 
103 sharing groups, but mount 104 should be created after those mounts. 
One can actually prepare this setup (on mainstream kernel) by 
pre-creating sharing groups elsewhere and then binding to /tmp in proper 
order with careful unmounting of propagations (see test.sh attached):


[root@snorch propagation-tests]# bash ../test.sh

960 1120 0:56 / /tmp/propagation-tests/tmp rw,relatime master:452 - 
tmpfs propagation-tests-src rw,inode64
958 960 0:56 / /tmp/propagation-tests/tmp/sub rw,relatime shared:452 
master:451 - tmpfs propagation-tests-src rw,inode64
961 958 0:56 / /tmp/propagation-tests/tmp/sub/sub rw,relatime shared:451 
master:433 - tmpfs propagation-tests-src rw,inode64
963 961 0:56 / /tmp/propagation-tests/tmp/sub/sub/sub rw,relatime 
shared:433 - tmpfs propagation-tests-src rw,inode64



But this "pre-creating" from test.sh is not universal at all and only 
works for this simple case. CRIU does not know anything about the 
history of mount creation for system container, it also does not know 
anything about any temporary mounts which were used and then removed. So 
understanding the proper order is almost impossible like Andrew says.


I've also prepared a presentation on Linux Plumbers last year about how 
much problems propagation brings to mounts restore in CRIU, you can take 
a look here https://www.linuxplumbersconf.org/event/7/contributions/640/


2) Propagation creates tons of mounts
3) Mount reparenting
4) "Mount trap"
5) "Non-uniform" propagation
6) “Cross-namespace” sharing groups

Allowing to create mounts private first and create sharing groups later 
and copy sharing groups later instead of inheriting them resolves all 
the problems with propagation at once.


One can take a look on the implementation of sharing group restore in 
CRIU if we have this (mnt: allow to add a mount into an existing group) 
patch applied: 
https://github.com/Snorch/criu/blob/bebbded98128ec787950fa8365a6c74ce6a3b2cb/criu/mount-v2.c#L898


Obviously this does not solve all the problems with mounts I know about 
but it's a big step forward in properly supporting them in CRIU. We 
already have this tested in Virtuozzo for almost a year and it works nice.


Notes:

- There is another idea, but I should say early that I don't like it, 
because with it restoring mounts with criu would be still super complex. 
We can add extra flag to mount/move_mount syscall to disable propagation 
temporary so that CRIU can restore the mount tree without problems 2-5, 
also we can now create cross-namespace bindmounts with 
(copy_tree+move_mount) to solve 6. But this solution does not help much 
with problem 1 - ordering and the need of temporary mounts. As you can 
see in test.sh you would still need to think hard to solve different 
similar configurations of reverse order between mounts and sharing groups.


- We can actually prohibit cross-namespace MS_SET_GROUP if you like. (If 
both namespaces are non abstract.) We can use open_tree to create a copy 
of the mount with the same sharing group and only then copy sharing from 
the copy while being in proper mountns.


- We still need it:

> this code might be

[PATCH] pinctrl: microchip: fix array overflow

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

Building with 'make W=1' shows an array overflow:

drivers/pinctrl/pinctrl-microchip-sgpio.c: In function 
'microchip_sgpio_irq_settype':
drivers/pinctrl/pinctrl-microchip-sgpio.c:154:39: error: array subscript 10 is 
above array bounds of 'const u8[10]' {aka 'const unsigned char[10]'} 
[-Werror=array-bounds]
  154 |  u32 regoff = priv->properties->regoff[rno] + off;
  |   ^
drivers/pinctrl/pinctrl-microchip-sgpio.c:55:5: note: while referencing 'regoff'
   55 |  u8 regoff[MAXREG];
  | ^~

It's not clear to me what was meant here, my best guess is that the
offset should have been applied to the third argument instead of the
second.

Fixes: be2dc859abd4 ("pinctrl: pinctrl-microchip-sgpio: Add irq support (for 
sparx5)")
Signed-off-by: Arnd Bergmann 
---
 drivers/pinctrl/pinctrl-microchip-sgpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c 
b/drivers/pinctrl/pinctrl-microchip-sgpio.c
index f35edb0eac40..4740613cdd03 100644
--- a/drivers/pinctrl/pinctrl-microchip-sgpio.c
+++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c
@@ -572,7 +572,7 @@ static void microchip_sgpio_irq_settype(struct irq_data 
*data,
/* Type value spread over 2 registers sets: low, high bit */
sgpio_clrsetbits(bank->priv, REG_INT_TRIGGER, addr.bit,
 BIT(addr.port), (!!(type & 0x1)) << addr.port);
-   sgpio_clrsetbits(bank->priv, REG_INT_TRIGGER + SGPIO_MAX_BITS, addr.bit,
+   sgpio_clrsetbits(bank->priv, REG_INT_TRIGGER, addr.bit + SGPIO_MAX_BITS,
 BIT(addr.port), (!!(type & 0x2)) << addr.port);
 
if (type == SGPIO_INT_TRG_LEVEL)
-- 
2.29.2

[RFC PATCH v7 03/22] af_vsock: separate receive data loop

2021-03-23 Thread Arseny Krasnov

Move STREAM specific data receive logic to '__vsock_stream_recvmsg()'
dedicated function, while checks, that will be same for both STREAM
and SEQPACKET sockets, stays in 'vsock_connectible_recvmsg()' shared
functions.

Signed-off-by: Arseny Krasnov 
---
 net/vmw_vsock/af_vsock.c | 116 ++-
 1 file changed, 67 insertions(+), 49 deletions(-)

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 421c0303b26f..0bc661e54262 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1895,65 +1895,22 @@ static int vsock_wait_data(struct sock *sk, struct 
wait_queue_entry *wait,
return data;
 }
 
-static int
-vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
+ size_t len, int flags)
 {
-   struct sock *sk;
-   struct vsock_sock *vsk;
+   struct vsock_transport_recv_notify_data recv_data;
const struct vsock_transport *transport;
-   int err;
-   size_t target;
+   struct vsock_sock *vsk;
ssize_t copied;
+   size_t target;
long timeout;
-   struct vsock_transport_recv_notify_data recv_data;
+   int err;
 
DEFINE_WAIT(wait);
 
-   sk = sock->sk;
vsk = vsock_sk(sk);
-   err = 0;
-
-   lock_sock(sk);
-
transport = vsk->transport;
 
-   if (!transport || sk->sk_state != TCP_ESTABLISHED) {
-   /* Recvmsg is supposed to return 0 if a peer performs an
-* orderly shutdown. Differentiate between that case and when a
-* peer has not connected or a local shutdown occured with the
-* SOCK_DONE flag.
-*/
-   if (sock_flag(sk, SOCK_DONE))
-   err = 0;
-   else
-   err = -ENOTCONN;
-
-   goto out;
-   }
-
-   if (flags & MSG_OOB) {
-   err = -EOPNOTSUPP;
-   goto out;
-   }
-
-   /* We don't check peer_shutdown flag here since peer may actually shut
-* down, but there can be data in the queue that a local socket can
-* receive.
-*/
-   if (sk->sk_shutdown & RCV_SHUTDOWN) {
-   err = 0;
-   goto out;
-   }
-
-   /* It is valid on Linux to pass in a zero-length receive buffer.  This
-* is not an error.  We may as well bail out now.
-*/
-   if (!len) {
-   err = 0;
-   goto out;
-   }
-
/* We must not copy less than target bytes into the user's buffer
 * before returning successfully, so we wait for the consume queue to
 * have that much data to consume before dequeueing.  Note that this
@@ -2012,6 +1969,67 @@ vsock_connectible_recvmsg(struct socket *sock, struct 
msghdr *msg, size_t len,
if (copied > 0)
err = copied;
 
+out:
+   return err;
+}
+
+static int
+vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+   struct sock *sk;
+   struct vsock_sock *vsk;
+   const struct vsock_transport *transport;
+   int err;
+
+   DEFINE_WAIT(wait);
+
+   sk = sock->sk;
+   vsk = vsock_sk(sk);
+   err = 0;
+
+   lock_sock(sk);
+
+   transport = vsk->transport;
+
+   if (!transport || sk->sk_state != TCP_ESTABLISHED) {
+   /* Recvmsg is supposed to return 0 if a peer performs an
+* orderly shutdown. Differentiate between that case and when a
+* peer has not connected or a local shutdown occurred with the
+* SOCK_DONE flag.
+*/
+   if (sock_flag(sk, SOCK_DONE))
+   err = 0;
+   else
+   err = -ENOTCONN;
+
+   goto out;
+   }
+
+   if (flags & MSG_OOB) {
+   err = -EOPNOTSUPP;
+   goto out;
+   }
+
+   /* We don't check peer_shutdown flag here since peer may actually shut
+* down, but there can be data in the queue that a local socket can
+* receive.
+*/
+   if (sk->sk_shutdown & RCV_SHUTDOWN) {
+   err = 0;
+   goto out;
+   }
+
+   /* It is valid on Linux to pass in a zero-length receive buffer.  This
+* is not an error.  We may as well bail out now.
+*/
+   if (!len) {
+   err = 0;
+   goto out;
+   }
+
+   err = __vsock_stream_recvmsg(sk, msg, len, flags);
+
 out:
release_sock(sk);
return err;
-- 
2.25.1

[RFC PATCH v7 04/22] af_vsock: implement SEQPACKET receive loop

2021-03-23 Thread Arseny Krasnov

This adds receive loop for SEQPACKET. It looks like receive loop for
STREAM, but there is a little bit difference:
1) It doesn't call notify callbacks.
2) It doesn't care about 'SO_SNDLOWAT' and 'SO_RCVLOWAT' values, because
   there is no sense for these values in SEQPACKET case.
3) It waits until whole record is received or error is found during
   receiving.
4) It processes and sets 'MSG_TRUNC' flag.

So to avoid extra conditions for two types of socket inside one loop, two
independent functions were created.

Signed-off-by: Arseny Krasnov 
---
 v6 -> v7:
 'seqpacket_get_len' callback now removed, length of message is returned
  by 'seqpacket_dequeue' callback.

 include/net/af_vsock.h   |  4 ++
 net/vmw_vsock/af_vsock.c | 88 +++-
 2 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index b1c717286993..74ac8a4c4168 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -135,6 +135,10 @@ struct vsock_transport {
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
 
+   /* SEQ_PACKET. */
+   int (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
+int flags, bool *msg_ready, size_t 
*record_len);
+
/* Notification. */
int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
int (*notify_poll_out)(struct vsock_sock *, size_t, bool *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 0bc661e54262..fa0c37f97330 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1973,6 +1973,89 @@ static int __vsock_stream_recvmsg(struct sock *sk, 
struct msghdr *msg,
return err;
 }
 
+static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
+size_t len, int flags)
+{
+   const struct vsock_transport *transport;
+   const struct iovec *orig_iov;
+   unsigned long orig_nr_segs;
+   bool msg_ready;
+   struct vsock_sock *vsk;
+   size_t record_len;
+   long timeout;
+   int err = 0;
+   DEFINE_WAIT(wait);
+
+   vsk = vsock_sk(sk);
+   transport = vsk->transport;
+
+   timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+   orig_nr_segs = msg->msg_iter.nr_segs;
+   orig_iov = msg->msg_iter.iov;
+   msg_ready = false;
+   record_len = 0;
+
+   while (1) {
+   err = vsock_wait_data(sk, , timeout, NULL, 0);
+
+   if (err <= 0) {
+   /* In case of any loop break(timeout, signal
+* interrupt or shutdown), we report user that
+* nothing was copied.
+*/
+   err = 0;
+   break;
+   }
+
+   err = transport->seqpacket_dequeue(vsk, msg, flags, _ready, 
_len);
+
+   if (err < 0) {
+   if (err == -EAGAIN) {
+   iov_iter_init(>msg_iter, READ,
+ orig_iov, orig_nr_segs,
+ len);
+   /* Clear 'MSG_EOR' here, because dequeue
+* callback above set it again if it was
+* set by sender. This 'MSG_EOR' is from
+* dropped record.
+*/
+   msg->msg_flags &= ~MSG_EOR;
+   record_len = 0;
+   continue;
+   }
+
+   err = -ENOMEM;
+   break;
+   }
+
+   if (msg_ready)
+   break;
+   }
+
+   if (sk->sk_err)
+   err = -sk->sk_err;
+   else if (sk->sk_shutdown & RCV_SHUTDOWN)
+   err = 0;
+
+   if (msg_ready) {
+   /* User sets MSG_TRUNC, so return real length of
+* packet.
+*/
+   if (flags & MSG_TRUNC)
+   err = record_len;
+   else
+   err = len - msg->msg_iter.count;
+
+   /* Always set MSG_TRUNC if real length of packet is
+* bigger than user's buffer.
+*/
+   if (record_len > len)
+   msg->msg_flags |= MSG_TRUNC;
+   }
+
+   return err;
+}
+
 static int
 vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  int flags)
@@ -2028,7 +2111,10 @@ vsock_connectible_recvmsg(struct socket *sock, struct 
msghdr *msg, size_t len,
goto out;
}
 
-   err = __vsock_stream_recvmsg(sk, msg, len, flags);
+   if (sk->sk_type == SOCK_STREAM)
+   err = __vsock_stream_recvmsg(sk, msg, len,

[RFC PATCH v7 05/22] af_vsock: separate wait space loop

2021-03-23 Thread Arseny Krasnov

This moves loop that waits for space on send to separate function,
because it will be used for SEQ_BEGIN/SEQ_END sending before and
after data transmission. Waiting for SEQ_BEGIN/SEQ_END is needed
because such packets carries SEQPACKET header that couldn't be
fragmented by credit mechanism, so to avoid it, sender waits until
enough space will be ready.

Signed-off-by: Arseny Krasnov 
---
 include/net/af_vsock.h   |  2 +
 net/vmw_vsock/af_vsock.c | 99 +---
 2 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 74ac8a4c4168..7232f6c42a36 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -204,6 +204,8 @@ void vsock_remove_sock(struct vsock_sock *vsk);
 void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
 int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
 bool vsock_find_cid(unsigned int cid);
+int vsock_wait_space(struct sock *sk, size_t space, int flags,
+struct vsock_transport_send_notify_data *send_data);
 
 / TAP /
 
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index fa0c37f97330..617ffe42693d 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1692,6 +1692,65 @@ static int vsock_connectible_getsockopt(struct socket 
*sock,
return 0;
 }
 
+int vsock_wait_space(struct sock *sk, size_t space, int flags,
+struct vsock_transport_send_notify_data *send_data)
+{
+   const struct vsock_transport *transport;
+   struct vsock_sock *vsk;
+   long timeout;
+   int err;
+
+   DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+   vsk = vsock_sk(sk);
+   transport = vsk->transport;
+   timeout = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+   err = 0;
+
+   add_wait_queue(sk_sleep(sk), );
+
+   while (vsock_stream_has_space(vsk) < space &&
+  sk->sk_err == 0 &&
+  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
+  !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
+
+   /* Don't wait for non-blocking sockets. */
+   if (timeout == 0) {
+   err = -EAGAIN;
+   goto out_err;
+   }
+
+   if (send_data) {
+   err = transport->notify_send_pre_block(vsk, send_data);
+   if (err < 0)
+   goto out_err;
+   }
+
+   release_sock(sk);
+   timeout = wait_woken(, TASK_INTERRUPTIBLE, timeout);
+   lock_sock(sk);
+   if (signal_pending(current)) {
+   err = sock_intr_errno(timeout);
+   goto out_err;
+   } else if (timeout == 0) {
+   err = -EAGAIN;
+   goto out_err;
+   }
+   }
+
+   if (sk->sk_err) {
+   err = -sk->sk_err;
+   } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
+  (vsk->peer_shutdown & RCV_SHUTDOWN)) {
+   err = -EPIPE;
+   }
+
+out_err:
+   remove_wait_queue(sk_sleep(sk), );
+   return err;
+}
+EXPORT_SYMBOL_GPL(vsock_wait_space);
+
 static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
 size_t len)
 {
@@ -1699,10 +1758,8 @@ static int vsock_connectible_sendmsg(struct socket 
*sock, struct msghdr *msg,
struct vsock_sock *vsk;
const struct vsock_transport *transport;
ssize_t total_written;
-   long timeout;
int err;
struct vsock_transport_send_notify_data send_data;
-   DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
sk = sock->sk;
vsk = vsock_sk(sk);
@@ -1740,9 +1797,6 @@ static int vsock_connectible_sendmsg(struct socket *sock, 
struct msghdr *msg,
goto out;
}
 
-   /* Wait for room in the produce queue to enqueue our user's data. */
-   timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-
err = transport->notify_send_init(vsk, _data);
if (err < 0)
goto out;
@@ -1750,39 +1804,8 @@ static int vsock_connectible_sendmsg(struct socket 
*sock, struct msghdr *msg,
while (total_written < len) {
ssize_t written;
 
-   add_wait_queue(sk_sleep(sk), );
-   while (vsock_stream_has_space(vsk) == 0 &&
-  sk->sk_err == 0 &&
-  !(sk->sk_shutdown & SEND_SHUTDOWN) &&
-  !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
-
-   /* Don't wait for non-blocking sockets. */
-   if (timeout == 0) {
-   err = -EAGAIN;
-   remove_wait_queue(sk_sleep(sk), );
-   goto out_err;
-   }
-
-   err =

[RFC PATCH v7 06/22] af_vsock: implement send logic for SEQPACKET

2021-03-23 Thread Arseny Krasnov

This adds some logic to current stream enqueue function for SEQPACKET
support:
1) Use transport's seqpacket enqueue callback.
2) Return value from enqueue function is whole record length or error
   for SOCK_SEQPACKET.

Signed-off-by: Arseny Krasnov 
---
 v6 -> v7:
 'seqpacket_enqueue' callback interface changed, 'flags' argument was
 removed, because it was 'msg_flags' field of 'msg' argument which is
 already exists.

 include/net/af_vsock.h   |  2 ++
 net/vmw_vsock/af_vsock.c | 21 +++--
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 7232f6c42a36..b7063820d0bc 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -138,6 +138,8 @@ struct vsock_transport {
/* SEQ_PACKET. */
int (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
 int flags, bool *msg_ready, size_t 
*record_len);
+   int (*seqpacket_enqueue)(struct vsock_sock *vsk, struct msghdr *msg,
+size_t len);
 
/* Notification. */
int (*notify_poll_in)(struct vsock_sock *, size_t, bool *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 617ffe42693d..a98e5daa06b7 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1830,9 +1830,13 @@ static int vsock_connectible_sendmsg(struct socket 
*sock, struct msghdr *msg,
 * responsibility to check how many bytes we were able to send.
 */
 
-   written = transport->stream_enqueue(
-   vsk, msg,
-   len - total_written);
+   if (sk->sk_type == SOCK_SEQPACKET) {
+   written = transport->seqpacket_enqueue(vsk,
+   msg, len - total_written);
+   } else {
+   written = transport->stream_enqueue(vsk,
+   msg, len - total_written);
+   }
if (written < 0) {
err = -ENOMEM;
goto out_err;
@@ -1844,12 +1848,17 @@ static int vsock_connectible_sendmsg(struct socket 
*sock, struct msghdr *msg,
vsk, written, _data);
if (err < 0)
goto out_err;
-
}
 
 out_err:
-   if (total_written > 0)
-   err = total_written;
+   if (total_written > 0) {
+   /* Return number of written bytes only if:
+* 1) SOCK_STREAM socket.
+* 2) SOCK_SEQPACKET socket when whole buffer is sent.
+*/
+   if (sk->sk_type == SOCK_STREAM || total_written == len)
+   err = total_written;
+   }
 out:
release_sock(sk);
return err;
-- 
2.25.1

[PATCH] ARM: delay: avoid clang -Wtautological-constant warning

2021-03-23 Thread Arnd Bergmann

From: Arnd Bergmann 

Passing an 8-bit constant into delay() triggers a warning when building
with 'make W=1' using clang:

drivers/clk/actions/owl-pll.c:182:2: error: result of comparison of constant 
2000 with expression of type 'u8' (aka 'unsigned char') is always false 
[-Werror,-Wtautological-constant-out-of-range-compare]
udelay(pll_hw->delay);
^
arch/arm/include/asm/delay.h:84:9: note: expanded from macro 'udelay'
  ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :  \
   ~~~ ^ ~~
arch/arm/mach-omap2/wd_timer.c:89:3: error: result of comparison of constant 
2000 with expression of type 'u8' (aka 'unsigned char') is always false 
[-Werror,-Wtautological-constant-out-of-range-compare]
udelay(oh->class->sysc->srst_udelay);
^~~~

Shut up the warning by adding a cast to a 64-bit number. A cast to 'int'
would usually be sufficient, but would fail to cause a link-time error
for large 64-bit constants.

Signed-off-by: Arnd Bergmann 
---
 arch/arm/include/asm/delay.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h
index 4f80b72372b4..1bb6417a3a83 100644
--- a/arch/arm/include/asm/delay.h
+++ b/arch/arm/include/asm/delay.h
@@ -81,7 +81,7 @@ extern void __bad_udelay(void);
 
 #define udelay(n)  \
(__builtin_constant_p(n) ?  \
- ((n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() :  \
+ ((u64)(n) > (MAX_UDELAY_MS * 1000) ? __bad_udelay() : \
__const_udelay((n) * UDELAY_MULT)) :\
  __udelay(n))
 
-- 
2.29.2

Re: [PATCH] btrfs: zoned: fix uninitialized max_chunk_size

2021-03-23 Thread David Sterba

On Tue, Mar 23, 2021 at 01:46:19PM +0100, Arnd Bergmann wrote:
> From: Arnd Bergmann 
> 
> The ctl->max_chunk_size member might be used uninitialized
> when none of the three conditions for initializing it in
> init_alloc_chunk_ctl_policy_zoned() are true:
> 
> In function ‘init_alloc_chunk_ctl_policy_zoned’,
> inlined from ‘init_alloc_chunk_ctl’ at fs/btrfs/volumes.c:5023:3,
> inlined from ‘btrfs_alloc_chunk’ at fs/btrfs/volumes.c:5340:2:
> include/linux/compiler-gcc.h:48:45: error: ‘ctl.max_chunk_size’ may be used 
> uninitialized [-Werror=maybe-uninitialized]
>  4998 | ctl->max_chunk_size = min(limit, ctl->max_chunk_size);
>   |   ^~~
> fs/btrfs/volumes.c: In function ‘btrfs_alloc_chunk’:
> fs/btrfs/volumes.c:5316:32: note: ‘ctl’ declared here
>  5316 | struct alloc_chunk_ctl ctl;
>   |^~~
> 
> Initialize it to UINT_MAX and rely on the min() expression to limit
> it.
> 
> Fixes: 1cd6121f2a38 ("btrfs: zoned: implement zoned chunk allocator")
> Signed-off-by: Arnd Bergmann 
> ---
> Note that the -Wmaybe-unintialized warning is globally disabled
> by default. For some reason I got this warning anyway when building
> this specific file with gcc-11.

The warning catches a theoretical case but this would not happen in
pracitce.  There are three bits to check and that covers all valid
options, but there should be a final else {} like is in
init_alloc_chunk_ctl_policy_regular that does not let the function
continue as that would mean there are worse problems.

btrfs_alloc_chunk
  init_alloc_chunk_ctl
init_alloc_chunk_ctl_policy_zoned

and btrfs_alloc_chunk validates the ctl->flags against
BTRFS_BLOCK_GROUP_TYPE_MASK, which is exactly the tree branches.

> ---
>  fs/btrfs/volumes.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> index bc3b33efddc5..b42b423b6a10 100644
> --- a/fs/btrfs/volumes.c
> +++ b/fs/btrfs/volumes.c
> @@ -4980,6 +4980,7 @@ static void init_alloc_chunk_ctl_policy_zoned(
>   u64 type = ctl->type;
>  
>   ctl->max_stripe_size = zone_size;
> + ctl->max_chunk_size = UINT_MAX;

This would allow the min() work but otherwise is not an expected to
happen at all.

>   if (type & BTRFS_BLOCK_GROUP_DATA) {
>   ctl->max_chunk_size = round_down(BTRFS_MAX_DATA_CHUNK_SIZE,
>zone_size);
> -- 
> 2.29.2

Re: [PATCH 02/11] staging: rtl8723bs: moved function prototypes out of core/rtw_efuse.c

2021-03-23 Thread Fabio Aiuto

On Mon, Mar 22, 2021 at 10:29:27PM +0300, Dan Carpenter wrote:
> On Mon, Mar 22, 2021 at 03:31:40PM +0100, Fabio Aiuto wrote:
> > fix the following checkpatch issues:
> > 
> > WARNING: externs should be avoided in .c files
> > 35: FILE: drivers/staging/rtl8723bs/core/rtw_efuse.c:35:
> > +bool
> > 
> > moved two function prototypes in include/rtw_efuse.h
> 
> Can't you just make these functions static instead?
> 
> regards,
> dan carpenter
> 

Hi Dan,

sorry for my recent spamming, I forgot to cc you in my
patchset v2 submit.

Thank you,

fabio

Re: [PATCH] sound/i2c: Simplify the return expression of snd_i2c_bit_sendbyte()

2021-03-23 Thread Johan Hovold

On Tue, Mar 23, 2021 at 08:14:43PM +0800, caizhichao wrote:
> From: Zhichao Cai 
> 
> Simplify the return expression.
> 
> Signed-off-by: Zhichao Cai 
> ---
>  sound/i2c/i2c.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
> 
> diff --git a/sound/i2c/i2c.c b/sound/i2c/i2c.c
> index 847e3b6..dfcc87e 100644
> --- a/sound/i2c/i2c.c
> +++ b/sound/i2c/i2c.c
> @@ -235,9 +235,7 @@ static int snd_i2c_bit_sendbyte(struct snd_i2c_bus *bus, 
> unsigned char data)
>   for (i = 7; i >= 0; i--)
>   snd_i2c_bit_send(bus, !!(data & (1 << i)));
>   err = snd_i2c_bit_ack(bus);
> - if (err < 0)
> - return err;
> - return 0;
> + return err < 0 ? err : 0;

This isn't a simplification, you're just making the code harder to read
for no good reason.

>  }
>  
>  static int snd_i2c_bit_readbyte(struct snd_i2c_bus *bus, int last)

Johan

Re: [RFC PATCH 2/2] integrity: double check iint_cache was initialized

2021-03-23 Thread Tetsuo Handa

On 2021/03/23 21:09, Mimi Zohar wrote:
> Please take a look at the newer version of this patch.   Do you want to
> add any tags?

Oh, I didn't know that you already posted the newer version.

> diff --git a/security/integrity/iint.c b/security/integrity/iint.c
> index 1d20003243c3..0ba01847e836 100644
> --- a/security/integrity/iint.c
> +++ b/security/integrity/iint.c
> @@ -98,6 +98,14 @@ struct integrity_iint_cache *integrity_inode_get(struct 
> inode *inode)
>   struct rb_node *node, *parent = NULL;
>   struct integrity_iint_cache *iint, *test_iint;
>  
> + /*
> +  * The integrity's "iint_cache" is initialized at security_init(),
> +  * unless it is not included in the ordered list of LSMs enabled
> +  * on the boot command line.
> +  */
> + if (!iint_cache)
> + panic("%s: lsm=integrity required.\n", __func__);
> +

This looks strange. If "lsm=" parameter must include "integrity",
it implies that nobody is allowed to disable "integrity" at boot.
Then, why not unconditionally call integrity_iintcache_init() by
not counting on DEFINE_LSM(integrity) declaration?

>   iint = integrity_iint_find(inode);
>   if (iint)
>   return iint;
>

Re: [PATCH RFC 0/3] drivers/char: remove /dev/kmem for good

2021-03-23 Thread David Hildenbrand


On 23.03.21 14:16, Greg Kroah-Hartman wrote:

On Fri, Mar 19, 2021 at 03:34:49PM +0100, David Hildenbrand wrote:

Let's start a discussion if /dev/kmem is worth keeping around and
fixing/maintaining or if we should just remove it now for good.

More details / findings in patch #1. Patch #2 and #3 perform minor cleanups
based on removed /dev/kmem support.

Only compile-tested on x86-64 -- good enough for discussing the general
topic (RFC).


I'll gladly take this in my char/misc tree for now, to show up in
linux-next to get some testing to see if anyone complains.

Let me know if that will help out.


Sure! I'll resend as v1 this week (also removing vwrite() from nommu 
code in patch #3).


Thanks

--
Thanks,

David / dhildenb

[PATCH v2 1/5] mm/migrate.c: remove unnecessary VM_BUG_ON_PAGE on putback_movable_page()

2021-03-23 Thread Miaohe Lin

The !PageLocked() check is implicitly done in PageMovable(). Remove this
explicit one.

Signed-off-by: Miaohe Lin 
---
 mm/migrate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 47df0df8f21a..facec65c7374 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -145,7 +145,6 @@ void putback_movable_page(struct page *page)
 {
struct address_space *mapping;
 
-   VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageMovable(page), page);
VM_BUG_ON_PAGE(!PageIsolated(page), page);
 
-- 
2.19.1

[PATCH v2 2/5] mm/migrate.c: remove unnecessary rc != MIGRATEPAGE_SUCCESS check in 'else' case

2021-03-23 Thread Miaohe Lin

It's guaranteed that in the 'else' case of the rc == MIGRATEPAGE_SUCCESS
check, rc does not equal to MIGRATEPAGE_SUCCESS. Remove this unnecessary
check.

Reviewed-by: David Hildenbrand 
Signed-off-by: Miaohe Lin 
---
 mm/migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index facec65c7374..97da1fabdf72 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1374,7 +1374,7 @@ static int unmap_and_move_huge_page(new_page_t 
get_new_page,
 out:
if (rc == MIGRATEPAGE_SUCCESS)
putback_active_hugepage(hpage);
-   else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
+   else if (rc != -EAGAIN)
list_move_tail(>lru, ret);
 
/*
-- 
2.19.1

[PATCH v2 3/5] mm/migrate.c: fix potential indeterminate pte entry in migrate_vma_insert_page()

2021-03-23 Thread Miaohe Lin

If the zone device page does not belong to un-addressable device memory,
the variable entry will be uninitialized and lead to indeterminate pte
entry ultimately. Fix this unexpected case and warn about it.

Fixes: df6ad69838fc ("mm/device-public-memory: device memory cache coherent 
with CPU")
Signed-off-by: Miaohe Lin 
---
 mm/migrate.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/mm/migrate.c b/mm/migrate.c
index 97da1fabdf72..d372be3da9b2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2972,6 +2972,13 @@ static void migrate_vma_insert_page(struct migrate_vma 
*migrate,
 
swp_entry = make_device_private_entry(page, 
vma->vm_flags & VM_WRITE);
entry = swp_entry_to_pte(swp_entry);
+   } else {
+   /*
+* For now we only support migrating to un-addressable
+* device memory.
+*/
+   pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
+   goto abort;
}
} else {
entry = mk_pte(page, vma->vm_page_prot);
-- 
2.19.1

[PATCH v2 0/5] Cleanup and fixup for mm/migrate.c

2021-03-23 Thread Miaohe Lin

Hi all,
This series contains cleanups to remove unnecessary VM_BUG_ON_PAGE and
rc != MIGRATEPAGE_SUCCESS check. Also use helper function to remove some
duplicated codes. What's more, this fixes potential deadlock in NUMA
balancing shared exec THP case and so on. More details can be found in
the respective changelogs. Thanks!

v1->v2:
Fix removing the wrong assertion per Rafael.
Use pr_warn_once() instead per David.
Collect Reviewed-by tag.

Miaohe Lin (5):
  mm/migrate.c: remove unnecessary VM_BUG_ON_PAGE on
putback_movable_page()
  mm/migrate.c: remove unnecessary rc != MIGRATEPAGE_SUCCESS check in
'else' case
  mm/migrate.c: fix potential indeterminate pte entry in
migrate_vma_insert_page()
  mm/migrate.c: use helper migrate_vma_collect_skip() in
migrate_vma_collect_hole()
  mm/migrate.c: fix potential deadlock in NUMA balancing shared exec THP
case

 mm/migrate.c | 42 +++---
 1 file changed, 19 insertions(+), 23 deletions(-)

-- 
2.19.1

[PATCH v2 4/5] mm/migrate.c: use helper migrate_vma_collect_skip() in migrate_vma_collect_hole()

2021-03-23 Thread Miaohe Lin

It's more recommended to use helper function migrate_vma_collect_skip() to
skip the unexpected case and it also helps remove some duplicated codes.
Move migrate_vma_collect_skip() above migrate_vma_collect_hole() to avoid
compiler warning.

Reviewed-by: David Hildenbrand 
Signed-off-by: Miaohe Lin 
---
 mm/migrate.c | 28 +++-
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index d372be3da9b2..5357a8527ca2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2315,44 +2315,38 @@ int migrate_misplaced_transhuge_page(struct mm_struct 
*mm,
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DEVICE_PRIVATE
-static int migrate_vma_collect_hole(unsigned long start,
+static int migrate_vma_collect_skip(unsigned long start,
unsigned long end,
-   __always_unused int depth,
struct mm_walk *walk)
 {
struct migrate_vma *migrate = walk->private;
unsigned long addr;
 
-   /* Only allow populating anonymous memory. */
-   if (!vma_is_anonymous(walk->vma)) {
-   for (addr = start; addr < end; addr += PAGE_SIZE) {
-   migrate->src[migrate->npages] = 0;
-   migrate->dst[migrate->npages] = 0;
-   migrate->npages++;
-   }
-   return 0;
-   }
-
for (addr = start; addr < end; addr += PAGE_SIZE) {
-   migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
-   migrate->npages++;
-   migrate->cpages++;
+   migrate->src[migrate->npages++] = 0;
}
 
return 0;
 }
 
-static int migrate_vma_collect_skip(unsigned long start,
+static int migrate_vma_collect_hole(unsigned long start,
unsigned long end,
+   __always_unused int depth,
struct mm_walk *walk)
 {
struct migrate_vma *migrate = walk->private;
unsigned long addr;
 
+   /* Only allow populating anonymous memory. */
+   if (!vma_is_anonymous(walk->vma))
+   return migrate_vma_collect_skip(start, end, walk);
+
for (addr = start; addr < end; addr += PAGE_SIZE) {
+   migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
-   migrate->src[migrate->npages++] = 0;
+   migrate->npages++;
+   migrate->cpages++;
}
 
return 0;
-- 
2.19.1

[PATCH v2 5/5] mm/migrate.c: fix potential deadlock in NUMA balancing shared exec THP case

2021-03-23 Thread Miaohe Lin

Since commit c77c5cbafe54 ("mm: migrate: skip shared exec THP for NUMA
balancing"), the NUMA balancing would skip shared exec transhuge page.
But this enhancement is not suitable for transhuge page. Because it's
required that page_mapcount() must be 1 due to no migration pte dance
is done here. On the other hand, the shared exec transhuge page will
leave the migrate_misplaced_page() with pte entry untouched and page
locked. Thus pagefault for NUMA will be triggered again and deadlock
occurs when we start waiting for the page lock held by ourselves.

Fixes: c77c5cbafe54 ("mm: migrate: skip shared exec THP for NUMA balancing")
Signed-off-by: Miaohe Lin 
---
 mm/migrate.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 5357a8527ca2..68bfa1625898 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2192,9 +2192,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
int page_lru = page_is_file_lru(page);
unsigned long start = address & HPAGE_PMD_MASK;
 
-   if (is_shared_exec_page(vma, page))
-   goto out;
-
new_page = alloc_pages_node(node,
(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
HPAGE_PMD_ORDER);
@@ -2306,7 +2303,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 
 out_unlock:
unlock_page(page);
-out:
put_page(page);
return 0;
 }
-- 
2.19.1

Re: [syzbot] WARNING in io_wq_destroy

2021-03-23 Thread Jens Axboe

#syz test: git://git.kernel.dk/linux-block wq-no-manager

-- 
Jens Axboe

Re: [syzbot] WARNING in io_wq_destroy

2021-03-23 Thread syzbot

> #syz test: git://git.kernel.dk/linux-block wq-no-manager

This crash does not have a reproducer. I cannot test it.

>
> -- 
> Jens Axboe
>

Re: [PATCH v3 1/8] workqueue: Add resource managed version of delayed work init

2021-03-23 Thread Hans de Goede

Hi,

On 3/23/21 2:56 PM, Matti Vaittinen wrote:
> A few drivers which need a delayed work-queue must cancel work at driver
> detach. Some of those implement remove() solely for this purpose. Help
> drivers to avoid unnecessary remove and error-branch implementation by
> adding managed verision of delayed work initialization. This will also
> help drivers to avoid mixing manual and devm based unwinding when other
> resources are handled by devm.
> 
> Signed-off-by: Matti Vaittinen 

Thanks, patch looks good to me:

Reviewed-by: Hans de Goede 

Regards,

Hans



> ---
> Changelog from RFCv2:
>  - RFC dropped. No functional changes.
> 
>  include/linux/devm-helpers.h | 53 
>  1 file changed, 53 insertions(+)
>  create mode 100644 include/linux/devm-helpers.h
> 
> diff --git a/include/linux/devm-helpers.h b/include/linux/devm-helpers.h
> new file mode 100644
> index ..f64e0c9f3763
> --- /dev/null
> +++ b/include/linux/devm-helpers.h
> @@ -0,0 +1,53 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef __LINUX_DEVM_HELPERS_H
> +#define __LINUX_DEVM_HELPERS_H
> +
> +/*
> + * Functions which do automatically cancel operations or release resources 
> upon
> + * driver detach.
> + *
> + * These should be helpful to avoid mixing the manual and devm-based resource
> + * management which can be source of annoying, rarely occurring,
> + * hard-to-reproduce bugs.
> + *
> + * Please take into account that devm based cancellation may be performed 
> some
> + * time after the remove() is ran.
> + *
> + * Thus mixing devm and manual resource management can easily cause problems
> + * when unwinding operations with dependencies. IRQ scheduling a work in a 
> queue
> + * is typical example where IRQs are often devm-managed and WQs are manually
> + * cleaned at remove(). If IRQs are not manually freed at remove() (and this 
> is
> + * often the case when we use devm for IRQs) we have a period of time after
> + * remove() - and before devm managed IRQs are freed - where new IRQ may fire
> + * and schedule a work item which won't be cancelled because remove() was
> + * already ran.
> + */
> +
> +#include 
> +#include 
> +
> +static inline void devm_delayed_work_drop(void *res)
> +{
> + cancel_delayed_work_sync(res);
> +}
> +
> +/**
> + * devm_delayed_work_autocancel - Resource-managed work allocation
> + * @dev: Device which lifetime work is bound to
> + * @pdata: work to be cancelled when driver is detached
> + *
> + * Initialize work which is automatically cancelled when driver is detached.
> + * A few drivers need delayed work which must be cancelled before driver
> + * is detached to avoid accessing removed resources.
> + * devm_delayed_work_autocancel() can be used to omit the explicit
> + * cancelleation when driver is detached.
> + */
> +static inline int devm_delayed_work_autocancel(struct device *dev,
> +struct delayed_work *w,
> +work_func_t worker)
> +{
> + INIT_DELAYED_WORK(w, worker);
> + return devm_add_action(dev, devm_delayed_work_drop, w);
> +}
> +
> +#endif
>

Re: [PATCH 3/3] iova: Correct comment for free_cpu_cached_iovas()

2021-03-23 Thread John Garry


On 23/03/2021 13:05, Robin Murphy wrote:

On 2021-03-01 12:12, John Garry wrote:

Function free_cpu_cached_iovas() is not only called when a CPU is
hotplugged, so remove that part of the code comment.


FWIW I read it as clarifying why this is broken out into a separate 
function vs. a monolithic "free all cached IOVAs" routine that handles 
both the per-cpu and global caches 



it never said "*only* used..."


It seems to be implying that.

It's only a code comment, so I don't care too much either way and can 
drop this change.




As such I'd hesitate to call it incorrect, but it's certainly arguable 
whether it needs to be stated or not, especially once the hotplug 
callsite is now obvious in the same file - on which note the function 
itself also shouldn't need to be public any more, no?




Right, I actually missed deleting iommu_dma_free_cpu_cached_iovas(), so 
can fix that now.


Cheers,
John


Robin.


Signed-off-by: John Garry 
---
  drivers/iommu/iova.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index c78312560425..465b3b0eeeb0 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -996,7 +996,7 @@ static void free_iova_rcaches(struct iova_domain 
*iovad)

  }
  /*
- * free all the IOVA ranges cached by a cpu (used when cpu is unplugged)
+ * free all the IOVA ranges cached by a cpu
   */
  void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad)
  {


.

Re: [PATCH bpf-next 2/5] bpf: Add a bpf_snprintf helper

2021-03-23 Thread Florent Revest

On Tue, Mar 23, 2021 at 4:21 AM Alexei Starovoitov
 wrote:
>
> On Wed, Mar 10, 2021 at 11:02:08PM +0100, Florent Revest wrote:
> >
> > +struct bpf_snprintf_buf {
> > + char buf[MAX_SNPRINTF_MEMCPY][MAX_SNPRINTF_STR_LEN];
> > +};
> > +static DEFINE_PER_CPU(struct bpf_snprintf_buf, bpf_snprintf_buf);
> > +static DEFINE_PER_CPU(int, bpf_snprintf_buf_used);
> > +
> > +BPF_CALL_5(bpf_snprintf, char *, out, u32, out_size, char *, fmt, u64 *, 
> > args,
> > +u32, args_len)
> > +{
> > + int err, i, buf_used, copy_size, fmt_cnt = 0, memcpy_cnt = 0;
> > + u64 params[MAX_SNPRINTF_VARARGS];
> > + struct bpf_snprintf_buf *bufs;
> > +
> > + buf_used = this_cpu_inc_return(bpf_snprintf_buf_used);
> > + if (WARN_ON_ONCE(buf_used > 1)) {
>
> this can trigger only if the helper itself gets preempted and
> another bpf prog will run on the same cpu and will call into this helper
> again, right?
> If so, how about adding preempt_disable here to avoid this case?

Ah, neat, that sounds like a good idea indeed. This was really just
cargo-culted from bpf_seq_printf but as part of my grand unification
attempt for the various printf-like helpers, I can try to make it use
preempt_disable as well yes.

> It won't prevent the case where kprobe is inside snprintf core,
> so the counter is still needed, but it wouldn't trigger by accident.

Good point, I will keep it around then.

> Also since bufs are not used always, how about grabbing the
> buffers only when %p or %s are seen in fmt?
> After snprintf() is done it would conditionally do:
> if (bufs_were_used) {
>this_cpu_dec(bpf_snprintf_buf_used);
>preempt_enable();
> }
> This way simple bpf_snprintf won't ever hit EBUSY.

Absolutely, it would be nice. :)

> > + err = -EBUSY;
> > + goto out;
> > + }
> > +
> > + bufs = this_cpu_ptr(_snprintf_buf);
> > +
> > + /*
> > +  * The verifier has already done most of the heavy-work for us in
> > +  * check_bpf_snprintf_call. We know that fmt is well formatted and 
> > that
> > +  * args_len is valid. The only task left is to convert some of the
> > +  * arguments. For the %s and %pi* specifiers, we need to read buffers
> > +  * from a kernel address during the helper call.
> > +  */
> > + for (i = 0; fmt[i] != '\0'; i++) {
> > + if (fmt[i] != '%')
> > + continue;
> > +
> > + if (fmt[i + 1] == '%') {
> > + i++;
> > + continue;
> > + }
> > +
> > + /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] 
> > */
> > + i++;
> > +
> > + /* skip optional "[0 +-][num]" width formating field */
> > + while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
> > +fmt[i] == ' ')
> > + i++;
> > + if (fmt[i] >= '1' && fmt[i] <= '9') {
> > + i++;
> > + while (fmt[i] >= '0' && fmt[i] <= '9')
> > + i++;
> > + }
> > +
> > + if (fmt[i] == 's') {
> > + void *unsafe_ptr = (void *)(long)args[fmt_cnt];
> > +
> > + err = 
> > strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
> > +   unsafe_ptr,
> > +   
> > MAX_SNPRINTF_STR_LEN);
> > + if (err < 0)
> > + bufs->buf[memcpy_cnt][0] = '\0';
> > + params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
>
> how about:
> char buf[512]; instead?
> instead of memcpy_cnt++ remember how many bytes of the buf were used and
> copy next arg after that.
> The scratch space would be used more efficiently.
> The helper would potentially return ENOSPC if the first string printed via %s
> consumed most of the 512 space and the second string doesn't fit.
> But the verifier-time if (memcpy_cnt >= MAX_SNPRINTF_MEMCPY) can be removed.
> Ten small %s will work fine.

Cool! That is also a good idea :)

> We can allocate a page per-cpu when this helper is used by prog and free
> that page when all progs with bpf_snprintf are unloaded.
> But extra complexity is probably not worth it. I would start with 512 per-cpu.
> It's going to be enough for most users.

Yes, let's maybe keep that for later. I think there is already enough
complexity going into the printf-like helpers unification patch.

> Overall looks great. Cannot wait for v2 :)

Ahah wait until you see that patch! :D

Re: [RFC PATCH 2/2] mm,drm/ttm: Use VM_PFNMAP for TTM vmas

2021-03-23 Thread Jason Gunthorpe

On Tue, Mar 23, 2021 at 12:47:24PM +0100, Daniel Vetter wrote:

> > +static inline bool is_cow_mapping(vm_flags_t flags)
> 
> Bit a bikeshed, but I wonder whether the public interface shouldn't be
> vma_is_cow_mapping. Or whether this shouldn't be rejected somewhere else,
> since at least in drivers/gpu we have tons of cases that don't check for
> this and get it all kinds of wrong I think.
> 
> remap_pfn_range handles this for many cases, but by far not for all.
> 
> Anyway patch itself lgtm:
> 
> Reviewed-by: Daniel Vetter 

I would like it if io_remap_pfn_range() did not allow shared mappings
at all.

IIRC it doesn't work anyway, the kernel can't reliably copy from IO
pages eg the "_copy_from_user_inatomic()" under cow_user_page() will
not work on s390 that requires all IO memory be accessed with special
instructions.

Unfortunately I have no idea what the long ago special case of
allowing COW'd IO mappings is. :\

Jason

Re: [RFC PATCH v5 04/10] KVM: selftests: Print the errno besides error-string in TEST_ASSERT

2021-03-23 Thread Andrew Jones

On Tue, Mar 23, 2021 at 09:52:25PM +0800, Yanan Wang wrote:
> Print the errno besides error-string in TEST_ASSERT in the format of
> "errno=%d - %s" will explicitly indicate that the string is an error
> information. Besides, the errno is easier to be used for debugging
> than the error-string.
> 
> Suggested-by: Andrew Jones 
> Signed-off-by: Yanan Wang 
> ---
>  tools/testing/selftests/kvm/lib/assert.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/lib/assert.c 
> b/tools/testing/selftests/kvm/lib/assert.c
> index 5ebbd0d6b472..71ade6100fd3 100644
> --- a/tools/testing/selftests/kvm/lib/assert.c
> +++ b/tools/testing/selftests/kvm/lib/assert.c
> @@ -71,9 +71,9 @@ test_assert(bool exp, const char *exp_str,
>  
>   fprintf(stderr, " Test Assertion Failure \n"
>   "  %s:%u: %s\n"
> - "  pid=%d tid=%d - %s\n",
> + "  pid=%d tid=%d errno=%d - %s\n",
>   file, line, exp_str, getpid(), _gettid(),
> - strerror(errno));
> + errno, strerror(errno));
>   test_dump_stack();
>   if (fmt) {
>   fputs("  ", stderr);
> -- 
> 2.19.1
>

Reviewed-by: Andrew Jones

Re: [RFC PATCH v5 02/10] tools headers: Add a macro to get HUGETLB page sizes for mmap

2021-03-23 Thread Andrew Jones



$SUBJECT says "tools headers", but this is actually changing
a UAPI header and then copying the change to tools.

Thanks,
drew

On Tue, Mar 23, 2021 at 09:52:23PM +0800, Yanan Wang wrote:
> We know that if a system supports multiple hugetlb page sizes,
> the desired hugetlb page size can be specified in bits [26:31]
> of the flag arguments. The value in these 6 bits will be the
> shift of each hugetlb page size.
> 
> So add a macro to get the page size shift and then calculate the
> corresponding hugetlb page size, using flag x.
> 
> Cc: Ben Gardon 
> Cc: Ingo Molnar 
> Cc: Adrian Hunter 
> Cc: Jiri Olsa 
> Cc: Arnaldo Carvalho de Melo 
> Cc: Arnd Bergmann 
> Cc: Michael Kerrisk 
> Cc: Thomas Gleixner 
> Suggested-by: Ben Gardon 
> Signed-off-by: Yanan Wang 
> Reviewed-by: Ben Gardon 
> ---
>  include/uapi/linux/mman.h   | 2 ++
>  tools/include/uapi/linux/mman.h | 2 ++
>  2 files changed, 4 insertions(+)
> 
> diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
> index f55bc680b5b0..d72df73b182d 100644
> --- a/include/uapi/linux/mman.h
> +++ b/include/uapi/linux/mman.h
> @@ -41,4 +41,6 @@
>  #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
>  #define MAP_HUGE_16GBHUGETLB_FLAG_ENCODE_16GB
>  
> +#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & 
> MAP_HUGE_MASK))
> +
>  #endif /* _UAPI_LINUX_MMAN_H */
> diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
> index f55bc680b5b0..d72df73b182d 100644
> --- a/tools/include/uapi/linux/mman.h
> +++ b/tools/include/uapi/linux/mman.h
> @@ -41,4 +41,6 @@
>  #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
>  #define MAP_HUGE_16GBHUGETLB_FLAG_ENCODE_16GB
>  
> +#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & 
> MAP_HUGE_MASK))
> +
>  #endif /* _UAPI_LINUX_MMAN_H */
> -- 
> 2.19.1
>

Re: [RFC PATCH v5 08/10] KVM: selftests: List all hugetlb src types specified with page sizes

2021-03-23 Thread Andrew Jones

On Tue, Mar 23, 2021 at 09:52:29PM +0800, Yanan Wang wrote:
> With VM_MEM_SRC_ANONYMOUS_HUGETLB, we currently can only use system
> default hugetlb pages to back the testing guest memory. In order to
> add flexibility, now list all the known hugetlb backing src types with
> different page sizes, so that we can specify use of hugetlb pages of the
> exact granularity that we want. And as all the known hugetlb page sizes
> are listed, it's appropriate for all architectures.
> 
> Besides, the helper get_backing_src_pagesz() is added to get the
> granularity of different backing src types(anonumous, thp, hugetlb).
> 
> Suggested-by: Ben Gardon 
> Signed-off-by: Yanan Wang 
> ---
>  .../testing/selftests/kvm/include/test_util.h |  18 ++-
>  tools/testing/selftests/kvm/lib/kvm_util.c|   2 +-
>  tools/testing/selftests/kvm/lib/test_util.c   | 109 --
>  3 files changed, 116 insertions(+), 13 deletions(-)
>

Reviewed-by: Andrew Jones

< 3 4 5 6 7 8 9 10 11 12 >

701 - 800 of 1668 matches

Mail list logo