[PATCH 4/9] macintosh/via-macii: Remove read_done state

2020-06-27 Thread Finn Thain
The driver state machine may enter the 'read_done' state when leaving the
'idle' or 'reading' state. This transition is pointless, as is the extra
interrupt it requires. The interrupt is produced by the transceiver
(even when it has no data to send) because an extra EVEN/ODD toggle
was signalled by the driver. Drop the extra state to simplify the code.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") # v5.0+
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 70 ++-
 1 file changed, 28 insertions(+), 42 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 6a5cd7de05baf..d29c87943ca46 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -110,7 +110,6 @@ static enum macii_state {
idle,
sending,
reading,
-   read_done,
 } macii_state;
 
 static struct adb_request *current_req; /* first request struct in the queue */
@@ -411,8 +410,8 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
reply_len = 1;
} else {
/* bus timeout */
-   macii_state = read_done;
reply_len = 0;
+   break;
}
 
/* set ADB state = even for first data byte */
@@ -471,20 +470,6 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
current_req = req->next;
if (req->done)
(*req->done)(req);
-
-   if (!current_req)
-   macii_queue_poll();
-
-   if (current_req && macii_state == idle)
-   macii_start();
-
-   if (macii_state == idle) {
-   /* reset to shift in */
-   via[ACR] &= ~SR_OUT;
-   x = via[SR];
-   /* set ADB state idle - might get SRQ */
-   via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
-   }
break;
}
} else {
@@ -511,12 +496,28 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
} else if (status == ST_ODD && reply_len == 2) {
srq_asserted = true;
} else {
-   macii_state = read_done;
+   macii_state = idle;
+
+   if (bus_timeout)
+   reply_len = 0;
+
+   if (reading_reply) {
+   struct adb_request *req = current_req;
+
+   req->reply_len = reply_len;
+
+   req->complete = 1;
+   current_req = req->next;
+   if (req->done)
+   (*req->done)(req);
+   } else if (reply_len && autopoll_devs) {
+   adb_input(reply_buf, reply_len, 0);
+   }
+   break;
}
}
 
-   if (macii_state == reading &&
-   reply_len < ARRAY_SIZE(reply_buf)) {
+   if (reply_len < ARRAY_SIZE(reply_buf)) {
reply_ptr++;
*reply_ptr = x;
reply_len++;
@@ -526,37 +527,22 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
via[B] ^= ST_MASK;
break;
 
-   case read_done:
-   x = via[SR];
-
-   if (bus_timeout)
-   reply_len = 0;
-
-   if (reading_reply) {
-   reading_reply = 0;
-   req = current_req;
-   req->reply_len = reply_len;
-   req->complete = 1;
-   current_req = req->next;
-   if (req->done)
-   (*req->done)(req);
-   } else if (reply_len && autopoll_devs)
-   adb_input(reply_buf, reply_len, 0);
-
-   macii_state = idle;
+   default:
+   break;
+   }
 
+   if (macii_state == idle) {
if (!current_req)
macii_queue_poll();
 
if (current_req)
macii_start();
 
-   if (macii_state == idle)
+   if (macii_state == idle) {
+   via[ACR] &= ~SR_OUT;
+   x = 

[PATCH 9/9] macintosh/via-macii: Clarify definition of macii_init()

2020-06-27 Thread Finn Thain
The function prototype correctly specifies the 'static' storage class.
Let the function definition match the declaration for better readability.

Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 2f9be4ec7d345..060e03f2264bc 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -140,7 +140,7 @@ static int macii_probe(void)
 }
 
 /* Initialize the driver */
-int macii_init(void)
+static int macii_init(void)
 {
unsigned long flags;
int err;
-- 
2.26.2



[PATCH 0/9] Macintosh II ADB driver fixes

2020-06-27 Thread Finn Thain
Various issues with the via-macii driver have become apparent over the
years. Some examples:

 - A Talk command response can be lost. This can result in phantom devices
being probed or an incorrect device handler ID being retrieved.

 - A reply packet containing a null byte can get truncated. Such packets
are sometimes generated by ADB keyboards.

 - A Talk Register 3 reply from device 15 (that is, command byte 0xFF)
can be mistaken for a bus timeout (empty packet).

This patch series contains fixes for all known bugs in the via-macii
driver, plus a few code style improvements. It has been successfully
tested on an Apple Centris 650 and qemu-system-m68k.

The patched kernel does regress on past QEMU releases, due to ADB
transceiver emulation bugs. Those bugs have been fixed in mainline QEMU.
My thanks go to Mark Cave-Ayland for that effort and for figuring out
the improvements to the signalling between the VIA and the transceiver.

Note to -stable maintainers: these fixes can be cherry-picked without
difficulty, if you have the 5 commits that appeared in v5.0:

b52dce8738938 macintosh/via-macii: Synchronous bus reset
5f93d7081a47e macintosh/via-macii: Remove BUG_ON assertions
5ce6185c2ef4e macintosh/via-macii: Simplify locking
351e5ad327d07 macintosh/via-macii, macintosh/adb-iop: Modernize printk calls
47fd2060660e6 macintosh/via-macii, macintosh/adb-iop: Clean up whitespace

Just for the sake of simplicity, the 'fixes' tags in this series limit
backporting to 'v5.0+'.


Finn Thain (9):
  macintosh/via-macii: Access autopoll_devs when inside lock
  macintosh/via-macii: Poll the device most likely to respond
  macintosh/via-macii: Handle /CTLR_IRQ signal correctly
  macintosh/via-macii: Remove read_done state
  macintosh/via-macii: Handle poll replies correctly
  macintosh/via-macii: Use bool type for reading_reply variable
  macintosh/via-macii: Use unsigned type for autopoll_devs variable
  macintosh/via-macii: Use the stack for reset request storage
  macintosh/via-macii: Clarify definition of macii_init()

 drivers/macintosh/via-macii.c | 324 +++---
 1 file changed, 179 insertions(+), 145 deletions(-)

-- 
2.26.2



[PATCH 8/9] macintosh/via-macii: Use the stack for reset request storage

2020-06-27 Thread Finn Thain
The adb_request struct can be stored on the stack because the request
is synchronous and is completed before the function returns.

Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 447273967e1e8..2f9be4ec7d345 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -313,7 +313,7 @@ static void macii_poll(void)
 /* Reset the bus */
 static int macii_reset_bus(void)
 {
-   static struct adb_request req;
+   struct adb_request req;
 
/* Command = 0, Address = ignored */
adb_request(, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET);
-- 
2.26.2



[PATCH 7/9] macintosh/via-macii: Use unsigned type for autopoll_devs variable

2020-06-27 Thread Finn Thain
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index e143ddb81de34..447273967e1e8 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -125,7 +125,7 @@ static bool srq_asserted;/* have to poll for the device 
that asserted it */
 static u8 last_cmd;  /* the most recent command byte transmitted */
 static u8 last_talk_cmd;/* the most recent Talk command byte transmitted */
 static u8 last_poll_cmd; /* the most recent Talk R0 command byte transmitted */
-static int autopoll_devs;  /* bits set are device addresses to be polled */
+static unsigned int autopoll_devs;  /* bits set are device addresses to poll */
 
 /* Check for MacII style ADB */
 static int macii_probe(void)
@@ -291,7 +291,7 @@ static int macii_autopoll(int devs)
local_irq_save(flags);
 
/* bit 1 == device 1, and so on. */
-   autopoll_devs = devs & 0xFFFE;
+   autopoll_devs = (unsigned int)devs & 0xFFFE;
 
if (!current_req) {
macii_queue_poll();
-- 
2.26.2



[PATCH 6/9] macintosh/via-macii: Use bool type for reading_reply variable

2020-06-27 Thread Finn Thain
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 8d5ef77b4a435..e143ddb81de34 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -116,7 +116,7 @@ static struct adb_request *current_req; /* first request 
struct in the queue */
 static struct adb_request *last_req; /* last request struct in the queue */
 static unsigned char reply_buf[16];/* storage for autopolled replies */
 static unsigned char *reply_ptr; /* next byte in reply_buf or req->reply */
-static int reading_reply;/* store reply in reply_buf else req->reply */
+static bool reading_reply;   /* store reply in reply_buf else req->reply */
 static int data_index;  /* index of the next byte to send from req->data */
 static int reply_len; /* number of bytes received in reply_buf or req->reply */
 static int status;  /* VIA's ADB status bits captured upon interrupt */
@@ -394,7 +394,7 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
WARN_ON((status & ST_MASK) != ST_IDLE);
 
reply_ptr = reply_buf;
-   reading_reply = 0;
+   reading_reply = false;
 
bus_timeout = false;
srq_asserted = false;
@@ -442,7 +442,7 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
 */
macii_state = reading;
 
-   reading_reply = 0;
+   reading_reply = false;
reply_ptr = reply_buf;
*reply_ptr = last_talk_cmd;
reply_len = 1;
@@ -456,7 +456,7 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
if (req->reply_expected) {
macii_state = reading;
 
-   reading_reply = 1;
+   reading_reply = true;
reply_ptr = req->reply;
*reply_ptr = req->data[1];
reply_len = 1;
@@ -466,7 +466,7 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
} else if ((req->data[1] & OP_MASK) == TALK) {
macii_state = reading;
 
-   reading_reply = 0;
+   reading_reply = false;
reply_ptr = reply_buf;
*reply_ptr = req->data[1];
reply_len = 1;
-- 
2.26.2



[PATCH 5/9] macintosh/via-macii: Handle poll replies correctly

2020-06-27 Thread Finn Thain
Userspace applications may use /dev/adb to send Talk requests. Such
requests always have req->reply_expected == 1. The same is true of Talk
requests sent by the kernel, except for poll requests queued internally
by the via-macii driver. Those requests have req->reply_expected == 0.

Consequently, poll reply packets get treated like autopoll reply packets.
(It doesn't make sense to try to distinguish them.) Always enter 'reading'
state after a poll request, so that the reply gets collected and passed
to adb_input(), and none go missing.

All Talk replies passed to adb_input() come from polling or autopolling,
so call adb_input() with the autopoll parameter set to 1.

Fixes: d95fd5fce88f0 ("m68k: Mac II ADB fixes") # v5.0+
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index d29c87943ca46..8d5ef77b4a435 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -463,6 +463,21 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
 
via[ACR] &= ~SR_OUT;
x = via[SR];
+   } else if ((req->data[1] & OP_MASK) == TALK) {
+   macii_state = reading;
+
+   reading_reply = 0;
+   reply_ptr = reply_buf;
+   *reply_ptr = req->data[1];
+   reply_len = 1;
+
+   via[ACR] &= ~SR_OUT;
+   x = via[SR];
+
+   req->complete = 1;
+   current_req = req->next;
+   if (req->done)
+   (*req->done)(req);
} else {
macii_state = idle;
 
@@ -510,8 +525,9 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
current_req = req->next;
if (req->done)
(*req->done)(req);
-   } else if (reply_len && autopoll_devs) {
-   adb_input(reply_buf, reply_len, 0);
+   } else if (reply_len && autopoll_devs &&
+  reply_buf[0] == last_poll_cmd) {
+   adb_input(reply_buf, reply_len, 1);
}
break;
}
-- 
2.26.2



[PATCH 3/9] macintosh/via-macii: Handle /CTLR_IRQ signal correctly

2020-06-27 Thread Finn Thain
I'm told that the /CTLR_IRQ signal from the ADB transceiver gets
interpreted by MacOS to mean SRQ, bus timeout or end-of-packet depending
on the circumstances, and that Linux's via-macii driver does not
correctly interpret this signal.

Instead, the via-macii driver interprets certain received byte values
(0x00 and 0xFF) as signalling end of packet and bus timeout
(respectively). Problem is, those values can also appear under other
circumstances.

This patch changes the bus timeout, end of packet and SRQ detection logic
to bring it closer to the logic that MacOS reportedly uses.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2") # v5.0+
Reported-by: Mark Cave-Ayland 
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 166 --
 1 file changed, 97 insertions(+), 69 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index d4f1a65c5f1fd..6a5cd7de05baf 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -80,6 +80,8 @@ static volatile unsigned char *via;
 /* ADB command byte structure */
 #define ADDR_MASK  0xF0
 #define CMD_MASK   0x0F
+#define OP_MASK0x0C
+#define TALK   0x0C
 
 static int macii_init_via(void);
 static void macii_start(void);
@@ -119,9 +121,10 @@ static int reading_reply;/* store reply in 
reply_buf else req->reply */
 static int data_index;  /* index of the next byte to send from req->data */
 static int reply_len; /* number of bytes received in reply_buf or req->reply */
 static int status;  /* VIA's ADB status bits captured upon interrupt */
-static int last_status;  /* status bits as at previous interrupt */
-static int srq_asserted; /* have to poll for the device that asserted it */
+static bool bus_timeout;   /* no data was sent by the device */
+static bool srq_asserted;/* have to poll for the device that asserted it */
 static u8 last_cmd;  /* the most recent command byte transmitted */
+static u8 last_talk_cmd;/* the most recent Talk command byte transmitted */
 static u8 last_poll_cmd; /* the most recent Talk R0 command byte transmitted */
 static int autopoll_devs;  /* bits set are device addresses to be polled */
 
@@ -170,7 +173,6 @@ static int macii_init_via(void)
 
/* Set up state: idle */
via[B] |= ST_IDLE;
-   last_status = via[B] & (ST_MASK | CTLR_IRQ);
 
/* Shift register on input */
via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT;
@@ -336,13 +338,6 @@ static void macii_start(void)
 * And req->nbytes is the number of bytes of real data plus one.
 */
 
-   /* store command byte */
-   last_cmd = req->data[1];
-
-   /* If this is a Talk Register 0 command, store the command byte */
-   if ((last_cmd & CMD_MASK) == ADB_READREG(0, 0))
-   last_poll_cmd = last_cmd;
-
/* Output mode */
via[ACR] |= SR_OUT;
/* Load data */
@@ -352,6 +347,9 @@ static void macii_start(void)
 
macii_state = sending;
data_index = 2;
+
+   bus_timeout = false;
+   srq_asserted = false;
 }
 
 /*
@@ -360,15 +358,17 @@ static void macii_start(void)
  * generating shift register interrupts (SR_INT) for us. This means there has
  * to be activity on the ADB bus. The chip will poll to achieve this.
  *
- * The basic ADB state machine was left unchanged from the original MacII code
- * by Alan Cox, which was based on the CUDA driver for PowerMac.
- * The syntax of the ADB status lines is totally different on MacII,
- * though. MacII uses the states Command -> Even -> Odd -> Even ->...-> Idle
- * for sending and Idle -> Even -> Odd -> Even ->...-> Idle for receiving.
- * Start and end of a receive packet are signalled by asserting /IRQ on the
- * interrupt line (/IRQ means the CTLR_IRQ bit in port B; not to be confused
- * with the VIA shift register interrupt. /IRQ never actually interrupts the
- * processor, it's just an ordinary input.)
+ * The VIA Port B output signalling works as follows. After the ADB transceiver
+ * sees a transition on the PB4 and PB5 lines it will crank over the VIA shift
+ * register which eventually raises the SR_INT interrupt. The PB4/PB5 outputs
+ * are toggled with each byte as the ADB transaction progresses.
+ *
+ * Request with no reply expected (and empty transceiver buffer):
+ * CMD -> IDLE
+ * Request with expected reply packet (or with buffered autopoll packet):
+ * CMD -> EVEN -> ODD -> EVEN -> ... -> IDLE
+ * Unsolicited packet:
+ * IDLE -> EVEN -> ODD -> EVEN -> ... -> IDLE
  */
 static irqreturn_t macii_interrupt(int irq, void *arg)
 {
@@ -388,31 +388,31 @@ static irqreturn_t macii_interrupt(int irq, void *arg)
}
}
 
-   last_status = status;
status = via[B] & (ST_MASK | CTLR_IRQ);
 
switch (macii_state) {
case idle:
-   if (reading_reply) {
-   

[PATCH 2/9] macintosh/via-macii: Poll the device most likely to respond

2020-06-27 Thread Finn Thain
Poll the most recently polled device by default, rather than the lowest
device address that happens to be enabled in autopoll_devs. This improves
input latency. Re-use macii_queue_poll() rather than duplicate that logic.
This eliminates a static struct and function.

Fixes: d95fd5fce88f0 ("m68k: Mac II ADB fixes") # v5.0+
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 99 +++
 1 file changed, 53 insertions(+), 46 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index 6aa903529570d..d4f1a65c5f1fd 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -77,6 +77,10 @@ static volatile unsigned char *via;
 #define ST_ODD 0x20/* ADB state: odd data byte */
 #define ST_IDLE0x30/* ADB state: idle, nothing to 
send */
 
+/* ADB command byte structure */
+#define ADDR_MASK  0xF0
+#define CMD_MASK   0x0F
+
 static int macii_init_via(void);
 static void macii_start(void);
 static irqreturn_t macii_interrupt(int irq, void *arg);
@@ -117,7 +121,8 @@ static int reply_len; /* number of bytes received in 
reply_buf or req->reply */
 static int status;  /* VIA's ADB status bits captured upon interrupt */
 static int last_status;  /* status bits as at previous interrupt */
 static int srq_asserted; /* have to poll for the device that asserted it */
-static int command_byte; /* the most recent command byte transmitted */
+static u8 last_cmd;  /* the most recent command byte transmitted */
+static u8 last_poll_cmd; /* the most recent Talk R0 command byte transmitted */
 static int autopoll_devs;  /* bits set are device addresses to be polled */
 
 /* Check for MacII style ADB */
@@ -179,35 +184,49 @@ static int macii_init_via(void)
 /* Send an ADB poll (Talk Register 0 command prepended to the request queue) */
 static void macii_queue_poll(void)
 {
-   /* No point polling the active device as it will never assert SRQ, so
-* poll the next device in the autopoll list. This could leave us
-* stuck in a polling loop if an unprobed device is asserting SRQ.
-* In theory, that could only happen if a device was plugged in after
-* probing started. Unplugging it again will break the cycle.
-* (Simply polling the next higher device often ends up polling almost
-* every device (after wrapping around), which takes too long.)
-*/
-   int device_mask;
-   int next_device;
static struct adb_request req;
+   unsigned char poll_command;
+   unsigned int poll_addr;
 
+   /* This only polls devices in the autopoll list, which assumes that
+* unprobed devices never assert SRQ. That could happen if a device was
+* plugged in after the adb bus scan. Unplugging it again will resolve
+* the problem. This behaviour is similar to MacOS.
+*/
if (!autopoll_devs)
return;
 
-   device_mask = (1 << (((command_byte & 0xF0) >> 4) + 1)) - 1;
-   if (autopoll_devs & ~device_mask)
-   next_device = ffs(autopoll_devs & ~device_mask) - 1;
-   else
-   next_device = ffs(autopoll_devs) - 1;
+   /* The device most recently polled may not be the best device to poll
+* right now. Some other device(s) may have signalled SRQ (the active
+* device won't do that). Or the autopoll list may have been changed.
+* Try polling the next higher address.
+*/
+   poll_addr = (last_poll_cmd & ADDR_MASK) >> 4;
+   if ((srq_asserted && last_cmd == last_poll_cmd) ||
+   !(autopoll_devs & (1 << poll_addr))) {
+   unsigned int higher_devs;
+
+   higher_devs = autopoll_devs & -(1 << (poll_addr + 1));
+   poll_addr = ffs(higher_devs ? higher_devs : autopoll_devs) - 1;
+   }
 
-   adb_request(, NULL, ADBREQ_NOSEND, 1, ADB_READREG(next_device, 0));
+   /* Send a Talk Register 0 command */
+   poll_command = ADB_READREG(poll_addr, 0);
+
+   /* No need to repeat this Talk command. The transceiver will do that
+* as long as it is idle.
+*/
+   if (poll_command == last_cmd)
+   return;
+
+   adb_request(, NULL, ADBREQ_NOSEND, 1, poll_command);
 
req.sent = 0;
req.complete = 0;
req.reply_len = 0;
req.next = current_req;
 
-   if (current_req != NULL) {
+   if (WARN_ON(current_req)) {
current_req = 
} else {
current_req = 
@@ -266,37 +285,22 @@ static int macii_write(struct adb_request *req)
 /* Start auto-polling */
 static int macii_autopoll(int devs)
 {
-   static struct adb_request req;
unsigned long flags;
-   int err = 0;
 
local_irq_save(flags);
 
/* bit 1 == device 1, and so on. */
autopoll_devs = devs & 0xFFFE;
 

[PATCH 1/9] macintosh/via-macii: Access autopoll_devs when inside lock

2020-06-27 Thread Finn Thain
The interrupt handler should be excluded when accessing the autopoll_devs
variable.

Fixes: d95fd5fce88f0 ("m68k: Mac II ADB fixes") # v5.0+
Tested-by: Stan Johnson 
Signed-off-by: Finn Thain 
---
 drivers/macintosh/via-macii.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index ac824d7b2dcfc..6aa903529570d 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -270,15 +270,12 @@ static int macii_autopoll(int devs)
unsigned long flags;
int err = 0;
 
+   local_irq_save(flags);
+
/* bit 1 == device 1, and so on. */
autopoll_devs = devs & 0xFFFE;
 
-   if (!autopoll_devs)
-   return 0;
-
-   local_irq_save(flags);
-
-   if (current_req == NULL) {
+   if (autopoll_devs && !current_req) {
/* Send a Talk Reg 0. The controller will repeatedly transmit
 * this as long as it is idle.
 */
-- 
2.26.2



Re: [PATCH 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-06-27 Thread piliu
Hi Hari,

If in [4/11],  get_exclude_memory_ranges() turns out to be unnecessary
,then this patch is abundant either. As my understanding, memblock has
already helped to achieved the purpose that get_exclude_memory_ranges()
wants.

Thanks,
Pingfan

On 06/27/2020 03:04 AM, Hari Bathini wrote:
> Some archs can have special memory regions, within the given memory
> range, which can't be used for the buffer in a kexec segment. As
> kexec_add_buffer() function is being called from generic code as well,
> add weak arch_kexec_add_buffer definition for archs to override & take
> care of special regions before trying to locate a memory hole.
> 
> Signed-off-by: Hari Bathini 
> ---
>  include/linux/kexec.h |5 +
>  kernel/kexec_file.c   |   37 +
>  2 files changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 1776eb2..1237682 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -195,6 +195,11 @@ int __weak arch_kexec_apply_relocations(struct 
> purgatory_info *pi,
>   const Elf_Shdr *relsec,
>   const Elf_Shdr *symtab);
>  
> +extern int arch_kexec_add_buffer(struct kexec_buf *kbuf);
> +
> +/* arch_kexec_add_buffer calls this when it is ready */
> +extern int __kexec_add_buffer(struct kexec_buf *kbuf);
> +
>  extern int kexec_add_buffer(struct kexec_buf *kbuf);
>  int kexec_locate_mem_hole(struct kexec_buf *kbuf);
>  
> diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
> index bb05fd5..a0b4f7f 100644
> --- a/kernel/kexec_file.c
> +++ b/kernel/kexec_file.c
> @@ -669,10 +669,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
>   */
>  int kexec_add_buffer(struct kexec_buf *kbuf)
>  {
> -
> - struct kexec_segment *ksegment;
> - int ret;
> -
>   /* Currently adding segment this way is allowed only in file mode */
>   if (!kbuf->image->file_mode)
>   return -EINVAL;
> @@ -696,6 +692,25 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
>   kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
>   kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
>  
> + return arch_kexec_add_buffer(kbuf);
> +}
> +
> +/**
> + * __kexec_add_buffer - arch_kexec_add_buffer would call this function after
> + *  updating kbuf, to place a buffer in a kexec segment.
> + * @kbuf:   Buffer contents and memory parameters.
> + *
> + * This function assumes that kexec_mutex is held.
> + * On successful return, @kbuf->mem will have the physical address of
> + * the buffer in memory.
> + *
> + * Return: 0 on success, negative errno on error.
> + */
> +int __kexec_add_buffer(struct kexec_buf *kbuf)
> +{
> + struct kexec_segment *ksegment;
> + int ret;
> +
>   /* Walk the RAM ranges and allocate a suitable range for the buffer */
>   ret = kexec_locate_mem_hole(kbuf);
>   if (ret)
> @@ -711,6 +726,20 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
>   return 0;
>  }
>  
> +/**
> + * arch_kexec_add_buffer - Some archs have memory regions within the given
> + * range that can't be used to place a kexec segment.
> + * Such archs can override this function to take care
> + * of them before trying to locate the memory hole.
> + * @kbuf:  Buffer contents and memory parameters.
> + *
> + * Return: 0 on success, negative errno on error.
> + */
> +int __weak arch_kexec_add_buffer(struct kexec_buf *kbuf)
> +{
> + return __kexec_add_buffer(kbuf);
> +}
> +
>  /* Calculate and store the digest of segments */
>  static int kexec_calculate_store_digests(struct kimage *image)
>  {
> 
> 
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
> 



Re: [PATCH 04/11] ppc64/kexec_file: avoid stomping memory used by special regions

2020-06-27 Thread piliu
Hi Hari,

After a quick through for this series, I have a few question/comment on
this patch for the time being. Pls see comment inline.

On 06/27/2020 03:05 AM, Hari Bathini wrote:
> crashkernel region could have an overlap with special memory regions
> like  opal, rtas, tce-table & such. These regions are referred to as
> exclude memory ranges. Setup this ranges during image probe in order
> to avoid them while finding the buffer for different kdump segments.
> Implement kexec_locate_mem_hole_ppc64() that locates a memory hole
> accounting for these ranges. Also, override arch_kexec_add_buffer()
> to locate a memory hole & later call __kexec_add_buffer() function
> with kbuf->mem set to skip the generic locate memory hole lookup.
> 
> Signed-off-by: Hari Bathini 
> ---
>  arch/powerpc/include/asm/crashdump-ppc64.h |   10 +
>  arch/powerpc/include/asm/kexec.h   |7 -
>  arch/powerpc/kexec/elf_64.c|7 +
>  arch/powerpc/kexec/file_load_64.c  |  292 
> 
>  4 files changed, 312 insertions(+), 4 deletions(-)
>  create mode 100644 arch/powerpc/include/asm/crashdump-ppc64.h
> 
> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h 
> b/arch/powerpc/include/asm/crashdump-ppc64.h
> new file mode 100644
> index 000..3596c25
> --- /dev/null
> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +#ifndef _ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H
> +#define _ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H
> +
> +/* min & max addresses for kdump load segments */
> +#define KDUMP_BUF_MIN(crashk_res.start)
> +#define KDUMP_BUF_MAX((crashk_res.end < ppc64_rma_size) ? \
> +  crashk_res.end : (ppc64_rma_size - 1))
> +
> +#endif /* __ARCH_POWERPC_KEXEC_CRASHDUMP_PPC64_H */
> diff --git a/arch/powerpc/include/asm/kexec.h 
> b/arch/powerpc/include/asm/kexec.h
> index 7008ea1..bf47a01 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long 
> indirection_page, unsigned long reboot_co
>  #ifdef CONFIG_KEXEC_FILE
>  extern const struct kexec_file_ops kexec_elf64_ops;
>  
> -#ifdef CONFIG_IMA_KEXEC
>  #define ARCH_HAS_KIMAGE_ARCH
>  
>  struct kimage_arch {
> + struct crash_mem *exclude_ranges;
> +
> +#ifdef CONFIG_IMA_KEXEC
>   phys_addr_t ima_buffer_addr;
>   size_t ima_buffer_size;
> -};
>  #endif
> +};
>  
>  int setup_purgatory(struct kimage *image, const void *slave_code,
>   const void *fdt, unsigned long kernel_load_addr,
> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void 
> *fdt,
>   unsigned long initrd_load_addr,
>   unsigned long initrd_len, const char *cmdline);
>  #endif /* CONFIG_PPC64 */
> +
>  #endif /* CONFIG_KEXEC_FILE */
>  
>  #else /* !CONFIG_KEXEC_CORE */
> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
> index 23ad04c..c695f94 100644
> --- a/arch/powerpc/kexec/elf_64.c
> +++ b/arch/powerpc/kexec/elf_64.c
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  static void *elf64_load(struct kimage *image, char *kernel_buf,
>   unsigned long kernel_len, char *initrd,
> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char 
> *kernel_buf,
>   if (ret)
>   goto out;
>  
> + if (image->type == KEXEC_TYPE_CRASH) {
> + /* min & max buffer values for kdump case */
> + kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN;
> + kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX;
> + }
> +
>   ret = kexec_elf_load(image, , _info, , _load_addr);
>   if (ret)
>   goto out;
> diff --git a/arch/powerpc/kexec/file_load_64.c 
> b/arch/powerpc/kexec/file_load_64.c
> index e6bff960..f1d7160 100644
> --- a/arch/powerpc/kexec/file_load_64.c
> +++ b/arch/powerpc/kexec/file_load_64.c
> @@ -17,6 +17,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  const struct kexec_file_ops * const kexec_file_loaders[] = {
>   _elf64_ops,
> @@ -24,6 +26,247 @@ const struct kexec_file_ops * const kexec_file_loaders[] 
> = {
>  };
>  
>  /**
> + * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
> + * regions like opal/rtas, tce-table, initrd,
> + * kernel, htab which should be avoided while
> + * setting up kexec load segments.
> + * @mem_ranges:Range list to add the memory ranges to.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
Is it needed? See the comment below.
> +{
> + int ret;
> +
> + ret = add_tce_mem_ranges(mem_ranges);
> + if (ret)
> + goto out;
> +
> +

[PATCH] kbuild: introduce ccflags-remove-y and asflags-remove-y

2020-06-27 Thread Masahiro Yamada
CFLAGS_REMOVE_.o works per object, that is, there is no
convenient way to filter out flags for every object in a directory.

Add ccflags-remove-y and asflags-remove-y to make it easily.

Use ccflags-remove-y to clean up some Makefiles.

Suggested-by: Sami Tolvanen 
Signed-off-by: Masahiro Yamada 
---

 arch/arm/boot/compressed/Makefile | 6 +-
 arch/powerpc/xmon/Makefile| 3 +--
 arch/sh/boot/compressed/Makefile  | 5 +
 kernel/trace/Makefile | 4 ++--
 lib/Makefile  | 5 +
 scripts/Makefile.lib  | 4 ++--
 6 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/arch/arm/boot/compressed/Makefile 
b/arch/arm/boot/compressed/Makefile
index 00602a6fba04..3d5691b23951 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -103,13 +103,9 @@ clean-files += piggy_data lib1funcs.S ashldi3.S 
bswapsdi2.S hyp-stub.S
 
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 
-ifeq ($(CONFIG_FUNCTION_TRACER),y)
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
-endif
-
 ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \
 -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN)
+ccflags-remove-$(CONFIG_FUNCTION_TRACER) += -pg
 asflags-y := -DZIMAGE
 
 # Supply kernel BSS size to the decompressor via a linker symbol.
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 89c76ca35640..55cbcdd88ac0 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -7,8 +7,7 @@ UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
 
 # Disable ftrace for the entire directory
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
+ccflags-remove-y += $(CC_FLAGS_FTRACE)
 
 ifdef CONFIG_CC_IS_CLANG
 # clang stores addresses on the stack causing the frame size to blow
diff --git a/arch/sh/boot/compressed/Makefile b/arch/sh/boot/compressed/Makefile
index ad0e2403e56f..589d2d8a573d 100644
--- a/arch/sh/boot/compressed/Makefile
+++ b/arch/sh/boot/compressed/Makefile
@@ -28,10 +28,7 @@ IMAGE_OFFSET := $(shell /bin/bash -c 'printf "0x%08x" \
$(CONFIG_BOOT_LINK_OFFSET)]')
 endif
 
-ifeq ($(CONFIG_MCOUNT),y)
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
-endif
+ccflags-remove-$(CONFIG_MCOUNT) += -pg
 
 LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
   -T $(obj)/../../kernel/vmlinux.lds
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 6575bb0a0434..7492844a8b1b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -2,9 +2,9 @@
 
 # Do not instrument the tracer itself:
 
+ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
+
 ifdef CONFIG_FUNCTION_TRACER
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
 
 # Avoid recursion due to instrumentation.
 KCSAN_SANITIZE := n
diff --git a/lib/Makefile b/lib/Makefile
index b1c42c10073b..b2ed4beddd68 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -3,10 +3,7 @@
 # Makefile for some libs needed in the kernel.
 #
 
-ifdef CONFIG_FUNCTION_TRACER
-ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
-endif
+ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
 
 # These files are disabled because they produce lots of non-interesting and/or
 # flaky coverage that is not a function of syscall inputs. For example,
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 99ac59c59826..5da420f13f9b 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -113,10 +113,10 @@ modfile_flags  = -DKBUILD_MODFILE=$(call 
stringify,$(modfile))
 
 orig_c_flags   = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) \
  $(ccflags-y) $(CFLAGS_$(target-stem).o)
-_c_flags   = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), 
$(orig_c_flags))
+_c_flags   = $(filter-out $(ccflags-remove-y) 
$(CFLAGS_REMOVE_$(target-stem).o), $(orig_c_flags))
 orig_a_flags   = $(KBUILD_CPPFLAGS) $(KBUILD_AFLAGS) \
  $(asflags-y) $(AFLAGS_$(target-stem).o)
-_a_flags   = $(filter-out $(AFLAGS_REMOVE_$(target-stem).o), 
$(orig_a_flags))
+_a_flags   = $(filter-out $(asflags-remove-y) 
$(AFLAGS_REMOVE_$(target-stem).o), $(orig_a_flags))
 _cpp_flags = $(KBUILD_CPPFLAGS) $(cppflags-y) 
$(CPPFLAGS_$(target-stem).lds)
 
 #
-- 
2.25.1



[powerpc:next-test] BUILD SUCCESS 552e738289b6487cd82198369ddd00338b684c0b

2020-06-27 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
next-test
branch HEAD: 552e738289b6487cd82198369ddd00338b684c0b  
powerpc/selftest/ptrace-pkey: IAMR and uamor cannot be updated by ptrace

elapsed time: 2179m

configs tested: 148
configs skipped: 8

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
arc haps_hs_smp_defconfig
s390 allyesconfig
powerpc  g5_defconfig
mipsjmr3927_defconfig
sh   se7751_defconfig
arm   imx_v6_v7_defconfig
armxcep_defconfig
arm  pxa255-idp_defconfig
arm  tango4_defconfig
arm cm_x300_defconfig
sh microdev_defconfig
m68k allmodconfig
powerpc   ppc64_defconfig
h8300allyesconfig
arc defconfig
armlart_defconfig
m68k  atari_defconfig
armmini2440_defconfig
arm  pxa168_defconfig
arm pxa_defconfig
arm lpc18xx_defconfig
mips   ip27_defconfig
arm eseries_pxa_defconfig
mips  loongson3_defconfig
m68k alldefconfig
nios2 10m50_defconfig
mips decstation_defconfig
arm   versatile_defconfig
mips  ath25_defconfig
mipsnlm_xlp_defconfig
sparcallyesconfig
arm   netwinder_defconfig
mipsmaltaup_xpa_defconfig
powerpcgamecube_defconfig
ia64defconfig
h8300   h8s-sim_defconfig
sh  landisk_defconfig
sh  kfr2r09_defconfig
mips  maltasmvp_eva_defconfig
sh apsh4a3a_defconfig
openriscor1ksim_defconfig
arm axm55xx_defconfig
mips  pistachio_defconfig
arm  ixp4xx_defconfig
arm   spear13xx_defconfig
i386 alldefconfig
nds32 allnoconfig
sh   se7724_defconfig
mips loongson1b_defconfig
pariscallnoconfig
mips   jazz_defconfig
x86_64   alldefconfig
arm s3c2410_defconfig
powerpc mpc512x_defconfig
openrisc simple_smp_defconfig
sh   cayman_defconfig
arm bcm2835_defconfig
sh  sh7785lcr_32bit_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allmodconfig
xtensa  defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
parisc  defconfig
parisc   allyesconfig
parisc   

[powerpc:fixes-test] BUILD SUCCESS 896066aa0685af3434637998b76218c2045142a8

2020-06-27 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
fixes-test
branch HEAD: 896066aa0685af3434637998b76218c2045142a8  selftests/powerpc: Fix 
build failure in ebb tests

elapsed time: 2777m

configs tested: 159
configs skipped: 7

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
arc haps_hs_smp_defconfig
s390 allyesconfig
powerpc  g5_defconfig
mipsjmr3927_defconfig
sh   se7751_defconfig
arm   imx_v6_v7_defconfig
armxcep_defconfig
arm  pxa255-idp_defconfig
arm  tango4_defconfig
arm cm_x300_defconfig
sh microdev_defconfig
m68k allmodconfig
powerpc   ppc64_defconfig
arm pxa_defconfig
arm lpc18xx_defconfig
mips   ip27_defconfig
arm eseries_pxa_defconfig
mips  loongson3_defconfig
m68k alldefconfig
nios2 10m50_defconfig
mips decstation_defconfig
arm   versatile_defconfig
mips  ath25_defconfig
mipsnlm_xlp_defconfig
sparcallyesconfig
arm   netwinder_defconfig
mipsmaltaup_xpa_defconfig
powerpcgamecube_defconfig
h8300   h8s-sim_defconfig
sh  landisk_defconfig
sh  kfr2r09_defconfig
mips  maltasmvp_eva_defconfig
sh apsh4a3a_defconfig
openriscor1ksim_defconfig
i386 alldefconfig
sh   se7724_defconfig
mips loongson1b_defconfig
nds32 allnoconfig
pariscallnoconfig
armlart_defconfig
mips   jazz_defconfig
x86_64   alldefconfig
arm s3c2410_defconfig
powerpc mpc512x_defconfig
openrisc simple_smp_defconfig
sh   cayman_defconfig
arm bcm2835_defconfig
sh  sh7785lcr_32bit_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a004-20200624
x86_64   

[powerpc:merge] BUILD SUCCESS f66ebd048174992db612ec8157c83f6c6601e8b0

2020-06-27 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  
merge
branch HEAD: f66ebd048174992db612ec8157c83f6c6601e8b0  Automatic merge of 
'master', 'next' and 'fixes' (2020-06-27 21:56)

elapsed time: 822m

configs tested: 139
configs skipped: 6

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
arc haps_hs_smp_defconfig
powerpc  g5_defconfig
mipsjmr3927_defconfig
sh   se7751_defconfig
arm   imx_v6_v7_defconfig
armxcep_defconfig
arm  pxa255-idp_defconfig
arm  tango4_defconfig
arm cm_x300_defconfig
sh microdev_defconfig
m68k allmodconfig
powerpc   ppc64_defconfig
h8300allyesconfig
arc defconfig
m68k  atari_defconfig
armmini2440_defconfig
arm  pxa168_defconfig
m68k alldefconfig
nios2 10m50_defconfig
mips decstation_defconfig
arm   versatile_defconfig
mips  ath25_defconfig
mipsnlm_xlp_defconfig
sparcallyesconfig
arm   netwinder_defconfig
mipsmaltaup_xpa_defconfig
powerpcgamecube_defconfig
ia64defconfig
h8300   h8s-sim_defconfig
sh  landisk_defconfig
sh  kfr2r09_defconfig
mips  maltasmvp_eva_defconfig
sh apsh4a3a_defconfig
openriscor1ksim_defconfig
i386 alldefconfig
sh   se7724_defconfig
mips loongson1b_defconfig
pariscallnoconfig
armlart_defconfig
mips  loongson3_defconfig
mips   jazz_defconfig
x86_64   alldefconfig
arm s3c2410_defconfig
powerpc mpc512x_defconfig
openrisc simple_smp_defconfig
sh   cayman_defconfig
arm bcm2835_defconfig
sh  sh7785lcr_32bit_defconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64  allnoconfig
ia64 allyesconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
m68k allyesconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allmodconfig
xtensa  defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
parisc  defconfig
parisc   allyesconfig
parisc   allmodconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
powerpc defconfig
i386 randconfig-a002-20200624
i386 randconfig-a006-20200624
i386 randconfig-a003-20200624
i386 

Re: [PATCH 2/8] opeinrisc: switch to generic version of pte allocation

2020-06-27 Thread Stafford Horne
On Sat, Jun 27, 2020 at 05:34:47PM +0300, Mike Rapoport wrote:
> From: Mike Rapoport 
> 
> Replace pte_alloc_one(), pte_free() and pte_free_kernel() with the generic
> implementation. The only actual functional change is the addition of
> __GFP_ACCOUT for the allocation of the user page tables.
> 
> The pte_alloc_one_kernel() is kept back because its implementation on
> openrisc is different than the generic one.
> 
> Signed-off-by: Mike Rapoport 

Thank's for this.

Acked-by: Stafford Horne 


Re: [PATCH 1/3] powerpc: inline doorbell sending functions

2020-06-27 Thread kernel test robot
Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on scottwood/next v5.8-rc2 next-20200626]
[cannot apply to kvm-ppc/kvm-ppc-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use  as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-pseries-IPI-doorbell-improvements/20200627-230544
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-c003-20200628 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All error/warnings (new ones prefixed by >>):

   In file included from arch/powerpc/kernel/asm-offsets.c:38:
   arch/powerpc/include/asm/dbell.h: In function 'doorbell_global_ipi':
>> arch/powerpc/include/asm/dbell.h:114:12: error: implicit declaration of 
>> function 'get_hard_smp_processor_id'; did you mean 'raw_smp_processor_id'? 
>> [-Werror=implicit-function-declaration]
 114 |  u32 tag = get_hard_smp_processor_id(cpu);
 |^
 |raw_smp_processor_id
   arch/powerpc/include/asm/dbell.h: In function 'doorbell_try_core_ipi':
>> arch/powerpc/include/asm/dbell.h:146:28: error: implicit declaration of 
>> function 'cpu_sibling_mask'; did you mean 'cpu_online_mask'? 
>> [-Werror=implicit-function-declaration]
 146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
 |^~~~
 |cpu_online_mask
>> arch/powerpc/include/asm/dbell.h:146:28: warning: passing argument 2 of 
>> 'cpumask_test_cpu' makes pointer from integer without a cast 
>> [-Wint-conversion]
 146 |  if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
 |^~
 ||
 |int
   In file included from include/linux/workqueue.h:15,
from include/linux/rhashtable-types.h:15,
from include/linux/ipc.h:7,
from include/uapi/linux/sem.h:5,
from include/linux/sem.h:5,
from include/linux/compat.h:14,
from arch/powerpc/kernel/asm-offsets.c:14:
   include/linux/cpumask.h:365:67: note: expected 'const struct cpumask *' but 
argument is of type 'int'
 365 | static inline int cpumask_test_cpu(int cpu, const struct cpumask 
*cpumask)
 | 
~~^~~
   cc1: some warnings being treated as errors
   make[2]: *** [scripts/Makefile.build:114: arch/powerpc/kernel/asm-offsets.s] 
Error 1
   make[2]: Target '__build' not remade because of errors.
   make[1]: *** [Makefile:1175: prepare0] Error 2
   make[1]: Target 'prepare' not remade because of errors.
   make: *** [Makefile:185: __sub-make] Error 2
   make: Target 'prepare' not remade because of errors.

vim +114 arch/powerpc/include/asm/dbell.h

   100  
   101  /*
   102   * Doorbells must only be used if CPU_FTR_DBELL is available.
   103   * msgsnd is used in HV, and msgsndp is used in !HV.
   104   *
   105   * These should be used by platform code that is aware of restrictions.
   106   * Other arch code should use ->cause_ipi.
   107   *
   108   * doorbell_global_ipi() sends a dbell to any target CPU.
   109   * Must be used only by architectures that address msgsnd target
   110   * by PIR/get_hard_smp_processor_id.
   111   */
   112  static inline void doorbell_global_ipi(int cpu)
   113  {
 > 114  u32 tag = get_hard_smp_processor_id(cpu);
   115  
   116  kvmppc_set_host_ipi(cpu);
   117  /* Order previous accesses vs. msgsnd, which is treated as a 
store */
   118  ppc_msgsnd_sync();
   119  ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
   120  }
   121  
   122  /*
   123   * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
   124   * Must be used only by architectures that address msgsnd target
   125   * by TIR/cpu_thread_in_core.
   126   */
   127  static inline void doorbell_core_ipi(int cpu)
   128  {
   129  u32 tag = cpu_thread_in_core(cpu);
   130  
   131  kvmppc_set_host_ipi(cpu);
   132  /* Order previous accesses vs. msgsnd, which is treated as a 
store */
   133  ppc_msgsnd_sync();
   134  ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
   135  }
   136  
   137  /*
   138   * Attempt to cause a core doorbell if destination is on the same core.
   139   * Returns 1 on success, 0 on failure.
   140   */
   141  static inline int doorbell_try_core_ipi(int cpu)
   142  {
   143  int 

Re: [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()

2020-06-27 Thread Matthew Wilcox
On Sat, Jun 27, 2020 at 05:34:49PM +0300, Mike Rapoport wrote:
> More elaborate versions on arm64 and x86 account memory for the user page
> tables and call to pgtable_pmd_page_ctor() as the part of PMD page
> initialization.
> 
> Move the arm64 version to include/asm-generic/pgalloc.h and use the generic
> version on several architectures.
> 
> The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is
> not enabled, so there is no functional change for most architectures except
> of the addition of __GFP_ACCOUNT for allocation of user page tables.

Thanks for including this line; it reminded me that we're not setting
the PageTable flag on the page, nor accounting it to the zone page stats.
Hope you don't mind me tagging a patch to do that on as 9/8.

We could also do with a pud_page_[cd]tor and maybe even p4d/pgd versions.
But that brings me to the next question -- could/should some of this
be moved over to asm-generic/pgalloc.h?  The ctor/dtor aren't called
from anywhere else, and there's value to reducing the total amount of
code in mm.h, but then there's also value to keeping all the ifdef
ARCH_ENABLE_SPLIT_PMD_PTLOCK code together too.  So I'm a bit torn.
What do you think?


[PATCH 9/8] mm: Account PMD tables like PTE tables

2020-06-27 Thread Matthew Wilcox
We account the PTE level of the page tables to the process in order to
make smarter OOM decisions and help diagnose why memory is fragmented.
For these same reasons, we should account pages allocated for PMDs.
With larger process address spaces and ASLR, the number of PMDs in use
is higher than it used to be so the inaccuracy is starting to matter.

Signed-off-by: Matthew Wilcox (Oracle) 
---
 include/linux/mm.h | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..b283e25fcffa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2271,7 +2271,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct 
*mm, pmd_t *pmd)
return ptlock_ptr(pmd_to_page(pmd));
 }
 
-static inline bool pgtable_pmd_page_ctor(struct page *page)
+static inline bool pmd_ptlock_init(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
page->pmd_huge_pte = NULL;
@@ -2279,7 +2279,7 @@ static inline bool pgtable_pmd_page_ctor(struct page 
*page)
return ptlock_init(page);
 }
 
-static inline void pgtable_pmd_page_dtor(struct page *page)
+static inline void pmd_ptlock_free(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
VM_BUG_ON_PAGE(page->pmd_huge_pte, page);
@@ -2296,8 +2296,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct 
*mm, pmd_t *pmd)
return >page_table_lock;
 }
 
-static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; }
-static inline void pgtable_pmd_page_dtor(struct page *page) {}
+static inline bool pmd_ptlock_init(struct page *page) { return true; }
+static inline void pmd_ptlock_free(struct page *page) {}
 
 #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
 
@@ -2310,6 +2310,22 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, 
pmd_t *pmd)
return ptl;
 }
 
+static inline bool pgtable_pmd_page_ctor(struct page *page)
+{
+   if (!pmd_ptlock_init(page))
+   return false;
+   __SetPageTable(page);
+   inc_zone_page_state(page, NR_PAGETABLE);
+   return true;
+}
+
+static inline void pgtable_pmd_page_dtor(struct page *page)
+{
+   pmd_ptlock_free(page);
+   __ClearPageTable(page);
+   dec_zone_page_state(page, NR_PAGETABLE);
+}
+
 /*
  * No scalability reason to split PUD locks yet, but follow the same pattern
  * as the PMD locks to make it easier if we decide to.  The VM should not be
-- 
2.27.0



Re: [PATCH 0/8] mm: cleanup usage of

2020-06-27 Thread Matthew Wilcox
On Sat, Jun 27, 2020 at 05:34:45PM +0300, Mike Rapoport wrote:
> Most architectures have very similar versions of pXd_alloc_one() and
> pXd_free_one() for intermediate levels of page table. 
> These patches add generic versions of these functions in
>  and enable use of the generic functions where
> appropriate.

For the series:

Reviewed-by: Matthew Wilcox (Oracle) 


Re: [GIT PULL] Please pull powerpc/linux.git powerpc-5.8-4 tag

2020-06-27 Thread pr-tracker-bot
The pull request you sent on Sat, 27 Jun 2020 22:06:08 +1000:

> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> tags/powerpc-5.8-4

has been merged into torvalds/linux.git:
https://git.kernel.org/torvalds/c/21d2f6850c09fdec730c11d35406da1dc541432d

Thank you!

-- 
Deet-doot-dot, I am a bot.
https://korg.wiki.kernel.org/userdoc/prtracker


[PATCH 3/3] powerpc/pseries: Add KVM guest doorbell restrictions

2020-06-27 Thread Nicholas Piggin
KVM guests have certain restrictions and performance quirks when
using doorbells. This patch tests for KVM environment in doorbell
setup, and optimises IPI performance:

 - PowerVM guests may now use doorbells even if they are secure.

 - KVM guests no longer use doorbells if XIVE is available.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/firmware.h   |  2 ++
 arch/powerpc/include/asm/kvm_para.h   | 26 ++--
 arch/powerpc/platforms/pseries/firmware.c | 14 +
 arch/powerpc/platforms/pseries/smp.c  | 38 ++-
 4 files changed, 42 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/firmware.h 
b/arch/powerpc/include/asm/firmware.h
index 6003c2e533a0..4dadb84ff2b2 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -134,7 +134,9 @@ extern unsigned int __start___fw_ftr_fixup, 
__stop___fw_ftr_fixup;
 
 #ifdef CONFIG_PPC_PSERIES
 void pseries_probe_fw_features(void);
+bool is_kvm_guest(void);
 #else
+static inline bool is_kvm_guest(void) { return false; }
 static inline void pseries_probe_fw_features(void) { };
 #endif
 
diff --git a/arch/powerpc/include/asm/kvm_para.h 
b/arch/powerpc/include/asm/kvm_para.h
index 9c1f6b4b9bbf..744612054c94 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -8,35 +8,15 @@
 #ifndef __POWERPC_KVM_PARA_H__
 #define __POWERPC_KVM_PARA_H__
 
-#include 
-
-#ifdef CONFIG_KVM_GUEST
-
-#include 
-
-static inline int kvm_para_available(void)
-{
-   struct device_node *hyper_node;
-
-   hyper_node = of_find_node_by_path("/hypervisor");
-   if (!hyper_node)
-   return 0;
+#include 
 
-   if (!of_device_is_compatible(hyper_node, "linux,kvm"))
-   return 0;
-
-   return 1;
-}
-
-#else
+#include 
 
 static inline int kvm_para_available(void)
 {
-   return 0;
+   return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
 }
 
-#endif
-
 static inline unsigned int kvm_arch_para_features(void)
 {
unsigned long r;
diff --git a/arch/powerpc/platforms/pseries/firmware.c 
b/arch/powerpc/platforms/pseries/firmware.c
index 3e49cc23a97a..f58eb10011dd 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -184,3 +184,17 @@ void __init pseries_probe_fw_features(void)
 {
of_scan_flat_dt(probe_fw_features, NULL);
 }
+
+bool is_kvm_guest(void)
+{
+   struct device_node *hyper_node;
+
+   hyper_node = of_find_node_by_path("/hypervisor");
+   if (!hyper_node)
+   return 0;
+
+   if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+   return 0;
+
+   return 1;
+}
diff --git a/arch/powerpc/platforms/pseries/smp.c 
b/arch/powerpc/platforms/pseries/smp.c
index 67e6ad5076ce..7af0003b40b6 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -236,24 +236,32 @@ static __init void pSeries_smp_probe(void)
if (!cpu_has_feature(CPU_FTR_SMT))
return;
 
-   /*
-* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp faults
-* to the hypervisor which then reads the instruction from guest
-* memory. This can't be done if the guest is secure, so don't use
-* doorbells in secure guests.
-*
-* Under PowerVM, FSCR[MSGP] is enabled so doorbells could be used
-* by secure guests if we distinguished this from KVM.
-*/
-   if (is_secure_guest())
-   return;
+   if (is_kvm_guest()) {
+   /*
+* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
+* faults to the hypervisor which then reads the instruction
+* from guest memory, which tends to be slower than using XIVE.
+*/
+   if (xive_enabled())
+   return;
+
+   /*
+* XICS hcalls aren't as fast, so we can use msgsndp (which
+* also helps exercise KVM emulation), however KVM can't
+* emulate secure guests because it can't read the instruction
+* out of their memory.
+*/
+   if (is_secure_guest())
+   return;
+   }
 
/*
-* The guest can use doobells for SMT sibling IPIs, which stay in
-* the core rather than going to the interrupt controller. This
-* tends to be slower under KVM where doorbells are emulated, but
-* faster for PowerVM where they're enabled.
+* Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are
+* gang scheduled on the same physical core, so doorbells are always
+* faster than the interrupt controller, and they can be used by
+* secure guests.
 */
+
ic_cause_ipi = smp_ops->cause_ipi;
smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
 }
-- 
2.23.0



[PATCH 2/3] powerpc/pseries: Use doorbells even if XIVE is available

2020-06-27 Thread Nicholas Piggin
KVM supports msgsndp in guests by trapping and emulating the
instruction, so it was decided to always use XIVE for IPIs if it is
available. However on PowerVM systems, msgsndp can be used and gives
better performance. On large systems, high XIVE interrupt rates can
have sub-linear scaling, and using msgsndp can reduce the load on
the interrupt controller.

So switch to using core local doorbells even if XIVE is available.
This reduces performance for KVM guests with an SMT topology by
about 50% for ping-pong context switching between SMT vCPUs. An
option vector (or dt-cpu-ftrs) could be defined to disable msgsndp
to get KVM performance back.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/platforms/pseries/smp.c | 54 ++--
 1 file changed, 36 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/smp.c 
b/arch/powerpc/platforms/pseries/smp.c
index 6891710833be..67e6ad5076ce 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -188,13 +188,16 @@ static int pseries_smp_prepare_cpu(int cpu)
return 0;
 }
 
-static void smp_pseries_cause_ipi(int cpu)
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu) __ro_after_init;
+
+/* Use msgsndp doorbells target is a sibling, else use interrupt controller */
+static void dbell_or_ic_cause_ipi(int cpu)
 {
-   /* POWER9 should not use this handler */
if (doorbell_try_core_ipi(cpu))
return;
 
-   icp_ops->cause_ipi(cpu);
+   ic_cause_ipi(cpu);
 }
 
 static int pseries_cause_nmi_ipi(int cpu)
@@ -218,26 +221,41 @@ static int pseries_cause_nmi_ipi(int cpu)
return 0;
 }
 
-static __init void pSeries_smp_probe_xics(void)
-{
-   xics_smp_probe();
-
-   if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest())
-   smp_ops->cause_ipi = smp_pseries_cause_ipi;
-   else
-   smp_ops->cause_ipi = icp_ops->cause_ipi;
-}
-
 static __init void pSeries_smp_probe(void)
 {
if (xive_enabled())
-   /*
-* Don't use P9 doorbells when XIVE is enabled. IPIs
-* using MMIOs should be faster
-*/
xive_smp_probe();
else
-   pSeries_smp_probe_xics();
+   xics_smp_probe();
+
+   /* No doorbell facility, must use the interrupt controller for IPIs */
+   if (!cpu_has_feature(CPU_FTR_DBELL))
+   return;
+
+   /* Doorbells can only be used for IPIs between SMT siblings */
+   if (!cpu_has_feature(CPU_FTR_SMT))
+   return;
+
+   /*
+* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp faults
+* to the hypervisor which then reads the instruction from guest
+* memory. This can't be done if the guest is secure, so don't use
+* doorbells in secure guests.
+*
+* Under PowerVM, FSCR[MSGP] is enabled so doorbells could be used
+* by secure guests if we distinguished this from KVM.
+*/
+   if (is_secure_guest())
+   return;
+
+   /*
+* The guest can use doobells for SMT sibling IPIs, which stay in
+* the core rather than going to the interrupt controller. This
+* tends to be slower under KVM where doorbells are emulated, but
+* faster for PowerVM where they're enabled.
+*/
+   ic_cause_ipi = smp_ops->cause_ipi;
+   smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
 }
 
 static struct smp_ops_t pseries_smp_ops = {
-- 
2.23.0



[PATCH 0/3] powerpc/pseries: IPI doorbell improvements

2020-06-27 Thread Nicholas Piggin
Thanks for the review, I think I incorporated all your comments, I
also did add KVM detection which avoids introducing a performance
regression.

Thanks,
Nick

Nicholas Piggin (3):
  powerpc: inline doorbell sending functions
  powerpc/pseries: Use doorbells even if XIVE is available
  powerpc/pseries: Add KVM guest doorbell restrictions

 arch/powerpc/include/asm/dbell.h  | 59 +++--
 arch/powerpc/include/asm/firmware.h   |  2 +
 arch/powerpc/include/asm/kvm_para.h   | 26 ++
 arch/powerpc/kernel/dbell.c   | 55 
 arch/powerpc/platforms/pseries/firmware.c | 14 +
 arch/powerpc/platforms/pseries/smp.c  | 62 ---
 6 files changed, 119 insertions(+), 99 deletions(-)

-- 
2.23.0



Re: [PATCH] powerpc/pseries: Use doorbells even if XIVE is available

2020-06-27 Thread Nicholas Piggin
Excerpts from Cédric Le Goater's message of June 26, 2020 5:17 pm:
> Adding David, 
> 
> On 6/25/20 3:11 AM, Michael Ellerman wrote:
>> Nicholas Piggin  writes:
>>> KVM supports msgsndp in guests by trapping and emulating the
>>> instruction, so it was decided to always use XIVE for IPIs if it is
>>> available. However on PowerVM systems, msgsndp can be used and gives
>>> better performance. On large systems, high XIVE interrupt rates can
>>> have sub-linear scaling, and using msgsndp can reduce the load on
>>> the interrupt controller.
>>>
>>> So switch to using core local doorbells even if XIVE is available.
>>> This reduces performance for KVM guests with an SMT topology by
>>> about 50% for ping-pong context switching between SMT vCPUs.
>> 
>> You have to take explicit steps to configure KVM in that way with qemu.
>> eg. "qemu .. -smp 8" will give you 8 SMT1 CPUs by default.
>> 
>>> An option vector (or dt-cpu-ftrs) could be defined to disable msgsndp
>>> to get KVM performance back.
> 
> An option vector would require a PAPR change. Unless the architecture 
> reserves some bits for the implementation, but I don't think so. Same
> for CAS.
> 
>> Qemu/KVM populates /proc/device-tree/hypervisor, so we *could* look at
>> that. Though adding PowerVM/KVM specific hacks is obviously a very
>> slippery slope.
> 
> QEMU could advertise a property "emulated-msgsndp", or something similar, 
> which would be interpreted by Linux as a CPU feature and taken into account 
> when doing the IPIs.

What I'm going to do is detect KVM here (we already have a KVM detection
test using that dt property). The IPI setup code already has KVM hacks 
in it, so I don't really see the problem with puting them behind a KVM
test.

I think doing cpu ftrs or some specific entry for msgsndp in particular
is the right way to go, but in the interests of making existing KVM work
I'll do this.

Thanks,
Nick


[PATCH 8/8] mm: move p?d_alloc_track to separate header file

2020-06-27 Thread Mike Rapoport
From: Joerg Roedel 

The functions are only used in two source files, both residing in mm/
subdirectory, so there is no need for them to be in the global 
header.  Move them to the new mm/pgalloc-track.h header and include it only
where needed.

[rppt: mv include/linux/pgalloc-track.h mm/]

Link: http://lkml.kernel.org/r/20200609120533.25867-1-j...@8bytes.org
Signed-off-by: Joerg Roedel 
Cc: Peter Zijlstra (Intel) 
Cc: Andy Lutomirski 
Cc: Abdul Haleem 
Cc: Satheesh Rajendran 
Cc: Stephen Rothwell 
Cc: Steven Rostedt (VMware) 
Cc: Mike Rapoport 
Cc: Christophe Leroy 
Signed-off-by: Mike Rapoport 
---
 include/linux/mm.h | 45 
 mm/ioremap.c   |  2 ++
 mm/pgalloc-track.h | 51 ++
 mm/vmalloc.c   |  1 +
 4 files changed, 54 insertions(+), 45 deletions(-)
 create mode 100644 mm/pgalloc-track.h

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..5e878a3c7c57 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2093,51 +2093,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-
-{
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
-   return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
-   }
-
-   return p4d_offset(pgd, address);
-}
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-{
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
-   return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
-   }
-
-   return pud_offset(p4d, address);
-}
-
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
 }
-
-static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-{
-   if (unlikely(pud_none(*pud))) {
-   if (__pmd_alloc(mm, pud, address))
-   return NULL;
-   *mod_mask |= PGTBL_PUD_MODIFIED;
-   }
-
-   return pmd_offset(pud, address);
-}
 #endif /* CONFIG_MMU */
 
 #if USE_SPLIT_PTE_PTLOCKS
@@ -2253,11 +2213,6 @@ static inline void pgtable_pte_page_dtor(struct page 
*page)
((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
 
-#define pte_alloc_kernel_track(pmd, address, mask) \
-   ((unlikely(pmd_none(*(pmd))) && \
- (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
-   NULL: pte_offset_kernel(pmd, address))
-
 #if USE_SPLIT_PMD_PTLOCKS
 
 static struct page *pmd_to_page(pmd_t *pmd)
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5ee3526f71b8..5fa1ab41d152 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+#include "pgalloc-track.h"
+
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 static int __read_mostly ioremap_p4d_capable;
 static int __read_mostly ioremap_pud_capable;
diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h
new file mode 100644
index ..1dcc865029a2
--- /dev/null
+++ b/mm/pgalloc-track.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLLC_TRACK_H
+#define _LINUX_PGALLLC_TRACK_H
+
+#if defined(CONFIG_MMU)
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
+   return NULL;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
+   }
+
+   return p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
+   return NULL;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
+   }
+
+   return pud_offset(p4d, address);
+}
+
+static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{

[PATCH 6/8] asm-generic: pgalloc: provide generic pgd_free()

2020-06-27 Thread Mike Rapoport
From: Mike Rapoport 

Most architectures define pgd_free() as a wrapper for free_page().

Provide a generic version in asm-generic/pgalloc.h and enable its use for
most architectures.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgalloc.h  | 6 --
 arch/arm/include/asm/pgalloc.h| 1 +
 arch/arm64/include/asm/pgalloc.h  | 1 +
 arch/csky/include/asm/pgalloc.h   | 7 +--
 arch/hexagon/include/asm/pgalloc.h| 7 +--
 arch/ia64/include/asm/pgalloc.h   | 5 -
 arch/m68k/include/asm/sun3_pgalloc.h  | 7 +--
 arch/microblaze/include/asm/pgalloc.h | 6 --
 arch/mips/include/asm/pgalloc.h   | 5 -
 arch/nds32/mm/mm-nds32.c  | 2 ++
 arch/nios2/include/asm/pgalloc.h  | 7 +--
 arch/parisc/include/asm/pgalloc.h | 1 +
 arch/riscv/include/asm/pgalloc.h  | 5 -
 arch/sh/include/asm/pgalloc.h | 1 +
 arch/um/include/asm/pgalloc.h | 1 -
 arch/um/kernel/mem.c  | 5 -
 arch/x86/include/asm/pgalloc.h| 1 +
 arch/xtensa/include/asm/pgalloc.h | 5 -
 include/asm-generic/pgalloc.h | 7 +++
 19 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 4834cd52e9d0..9c6a24fe493d 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -34,10 +34,4 @@ pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
-static inline void
-pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long)pgd);
-}
-
 #endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index c5bdfd404ea5..15f4674715f8 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -65,6 +65,7 @@ static inline void clean_pte_table(pte_t *pte)
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
 #define __HAVE_ARCH_PTE_ALLOC_ONE
+#define __HAVE_ARCH_PGD_FREE
 #include 
 
 static inline pte_t *
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 0965945b595d..3c6a7f5988b1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -13,6 +13,7 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PGD_FREE
 #include 
 
 #define PGD_SIZE   (PTRS_PER_PGD * sizeof(pgd_t))
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index c7c1ed27e348..d58d8146b729 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -9,7 +9,7 @@
 #include 
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
-#include/* for pte_{alloc,free}_one */
+#include 
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
@@ -42,11 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct 
*mm)
return pte;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_pages((unsigned long)pgd, PGD_ORDER);
-}
-
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
pgd_t *ret;
diff --git a/arch/hexagon/include/asm/pgalloc.h 
b/arch/hexagon/include/asm/pgalloc.h
index cc9be514a676..f0c47e6a7427 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -11,7 +11,7 @@
 #include 
 #include 
 
-#include/* for pte_{alloc,free}_one */
+#include 
 
 extern unsigned long long kmap_generation;
 
@@ -41,11 +41,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long) pgd);
-}
-
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte)
 {
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 06f80358e20f..9601cfe83c94 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -29,11 +29,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long)pgd);
-}
-
 #if CONFIG_PGTABLE_LEVELS == 4
 static inline void
 p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud)
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h 
b/arch/m68k/include/asm/sun3_pgalloc.h
index 11b95dadf7c0..000f64869b91 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -13,7 +13,7 @@
 
 #include 
 
-#include/* for pte_{alloc,free}_one */
+#include 
 
 extern const char bad_pmd_string[];
 
@@ -40,11 +40,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t 
*pmd, pgtable_t page
  */
 #define pmd_free(mm, x)do { } while (0)
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{

[PATCH 5/8] asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()

2020-06-27 Thread Mike Rapoport
From: Mike Rapoport 

Several architectures define pud_alloc_one() as a wrapper for
__get_free_page() and pud_free() as a wrapper for free_page().

Provide a generic implementation in asm-generic/pgalloc.h and use it where
appropriate.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/pgalloc.h | 11 ---
 arch/ia64/include/asm/pgalloc.h  |  9 -
 arch/mips/include/asm/pgalloc.h  |  6 +-
 arch/x86/include/asm/pgalloc.h   | 15 ---
 include/asm-generic/pgalloc.h| 30 ++
 5 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 7246d0a662e1..0965945b595d 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -37,17 +37,6 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t 
pmdp, pudval_t prot)
 
 #if CONFIG_PGTABLE_LEVELS > 3
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-   return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
-{
-   BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
-   free_page((unsigned long)pudp);
-}
-
 static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
 {
set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 5da1fc76477b..06f80358e20f 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -41,15 +41,6 @@ p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t 
* pud)
p4d_val(*p4d_entry) = __pa(pud);
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-   return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-   free_page((unsigned long)pud);
-}
 #define __pud_free_tlb(tlb, pud, address)  pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index eed1b3e8c642..e5a840910ce0 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -14,6 +14,7 @@
 #include 
 
 #define __HAVE_ARCH_PMD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_ALLOC_ONE
 #include 
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@@ -87,11 +88,6 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, 
unsigned long address)
return pud;
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-   free_pages((unsigned long)pud, PUD_ORDER);
-}
-
 static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
set_p4d(p4d, __p4d((unsigned long)pud));
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 25feaa117c40..3d1085a14347 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -123,21 +123,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, 
p4d_t *p4d, pud_t *pu
set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-   gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
-   if (mm == _mm)
-   gfp &= ~__GFP_ACCOUNT;
-   return (pud_t *)get_zeroed_page(gfp);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-   BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-   free_page((unsigned long)pud);
-}
-
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 1bc027891a00..d361574aaadf 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -145,6 +145,36 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t 
*pmd)
 
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#ifndef __HAVE_ARCH_PUD_FREE
+/**
+ * pud_alloc_one - allocate a page for PUD-level page table
+ * @mm: the mm_struct of the current context
+ *
+ * Allocates a page using %GFP_PGTABLE_USER for user context and
+ * %GFP_PGTABLE_KERNEL for kernel context.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+   gfp_t gfp = GFP_PGTABLE_USER;
+
+   if (mm == _mm)
+   gfp = GFP_PGTABLE_KERNEL;
+   return (pud_t *)get_zeroed_page(gfp);
+}
+#endif
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+   BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+   free_page((unsigned long)pud);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
-- 
2.26.2



[PATCH 3/8] xtensa: switch to generic version of pte allocation

2020-06-27 Thread Mike Rapoport
From: Mike Rapoport 

xtensa clears PTEs during allocation of the page tables and pte_clear()
sets the PTE to a non-zero value. Splitting ptes_clear() helper out of
pte_alloc_one() and pte_alloc_one_kernel() allows reuse of base generic
allocation methods (__pte_alloc_one() and __pte_alloc_one_kernel()) and the
common GFP mask for page table allocations.

The pte_free() and pte_free_kernel() implementations on xtensa are
identical to the generic ones and can be dropped.

Signed-off-by: Mike Rapoport 
---
 arch/xtensa/include/asm/pgalloc.h | 41 ++-
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/arch/xtensa/include/asm/pgalloc.h 
b/arch/xtensa/include/asm/pgalloc.h
index 1d38f0e755ba..60ee94b42850 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -8,9 +8,14 @@
 #ifndef _XTENSA_PGALLOC_H
 #define _XTENSA_PGALLOC_H
 
+#ifdef CONFIG_MMU
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include 
+
 /*
  * Allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
@@ -33,45 +38,37 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t 
*pgd)
free_page((unsigned long)pgd);
 }
 
+static inline void ptes_clear(pte_t *ptep)
+{
+   int i;
+
+   for (i = 0; i < PTRS_PER_PTE; i++)
+   pte_clear(NULL, 0, ptep + i);
+}
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
pte_t *ptep;
-   int i;
 
-   ptep = (pte_t *)__get_free_page(GFP_KERNEL);
+   ptep = (pte_t *)__pte_alloc_one_kernel(mm);
if (!ptep)
return NULL;
-   for (i = 0; i < 1024; i++)
-   pte_clear(NULL, 0, ptep + i);
+   ptes_clear(ptep);
return ptep;
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-   pte_t *pte;
struct page *page;
 
-   pte = pte_alloc_one_kernel(mm);
-   if (!pte)
-   return NULL;
-   page = virt_to_page(pte);
-   if (!pgtable_pte_page_ctor(page)) {
-   __free_page(page);
+   page = __pte_alloc_one(mm, GFP_PGTABLE_USER);
+   if (!page)
return NULL;
-   }
+   ptes_clear(page_address(page));
return page;
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_pte_page_dtor(pte);
-   __free_page(pte);
-}
 #define pmd_pgtable(pmd) pmd_page(pmd)
+#endif CONFIG_MMU
 
 #endif /* _XTENSA_PGALLOC_H */
-- 
2.26.2



[PATCH 2/8] opeinrisc: switch to generic version of pte allocation

2020-06-27 Thread Mike Rapoport
From: Mike Rapoport 

Replace pte_alloc_one(), pte_free() and pte_free_kernel() with the generic
implementation. The only actual functional change is the addition of
__GFP_ACCOUT for the allocation of the user page tables.

The pte_alloc_one_kernel() is kept back because its implementation on
openrisc is different than the generic one.

Signed-off-by: Mike Rapoport 
---
 arch/openrisc/include/asm/pgalloc.h | 33 +++--
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/arch/openrisc/include/asm/pgalloc.h 
b/arch/openrisc/include/asm/pgalloc.h
index da12a4c38c4b..88820299ecc4 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -20,6 +20,9 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include 
+
 extern int mem_init_done;
 
 #define pmd_populate_kernel(mm, pmd, pte) \
@@ -61,38 +64,8 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 #endif
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long)pgd);
-}
-
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-   pte = alloc_pages(GFP_KERNEL, 0);
-   if (!pte)
-   return NULL;
-   clear_page(page_address(pte));
-   if (!pgtable_pte_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_pte_page_dtor(pte);
-   __free_page(pte);
-}
-
 #define __pte_free_tlb(tlb, pte, addr) \
 do {   \
pgtable_pte_page_dtor(pte); \
-- 
2.26.2



[PATCH 1/8] mm: remove unneeded includes of

2020-06-27 Thread Mike Rapoport
From: Mike Rapoport 

In the most cases  header is required only for allocations
of page table memory. Most of the .c files that include that header do not
use symbols declared in  and do not require that header.

As for the other header files that used to include , it is
possible to move that include into the .c file that actually uses symbols
from  and drop the include from the header file.

The process was somewhat automated using

sed -i -E '/[<"]asm\/pgalloc\.h/d' \
$(grep -L -w -f /tmp/xx \
$(git grep -E -l '[<"]asm/pgalloc\.h'))

where /tmp/xx contains all the symbols defined in
arch/*/include/asm/pgalloc.h.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/tlbflush.h| 1 -
 arch/alpha/kernel/core_irongate.c| 1 -
 arch/alpha/kernel/core_marvel.c  | 1 -
 arch/alpha/kernel/core_titan.c   | 1 -
 arch/alpha/kernel/machvec_impl.h | 2 --
 arch/alpha/kernel/smp.c  | 1 -
 arch/alpha/mm/numa.c | 1 -
 arch/arc/mm/fault.c  | 1 -
 arch/arc/mm/init.c   | 1 -
 arch/arm/include/asm/tlb.h   | 1 -
 arch/arm/kernel/machine_kexec.c  | 1 -
 arch/arm/kernel/smp.c| 1 -
 arch/arm/kernel/suspend.c| 1 -
 arch/arm/mach-omap2/omap-mpuss-lowpower.c| 1 -
 arch/arm/mm/hugetlbpage.c| 1 -
 arch/arm/mm/mmu.c| 1 +
 arch/arm64/kernel/smp.c  | 1 -
 arch/arm64/mm/hugetlbpage.c  | 1 -
 arch/arm64/mm/ioremap.c  | 1 -
 arch/arm64/mm/mmu.c  | 1 +
 arch/csky/kernel/smp.c   | 1 -
 arch/ia64/include/asm/tlb.h  | 1 -
 arch/ia64/kernel/process.c   | 1 -
 arch/ia64/kernel/smp.c   | 1 -
 arch/ia64/kernel/smpboot.c   | 1 -
 arch/ia64/mm/contig.c| 1 -
 arch/ia64/mm/discontig.c | 1 -
 arch/ia64/mm/hugetlbpage.c   | 1 -
 arch/ia64/mm/tlb.c   | 1 -
 arch/m68k/include/asm/mmu_context.h  | 2 +-
 arch/m68k/kernel/dma.c   | 2 +-
 arch/m68k/kernel/traps.c | 3 +--
 arch/m68k/mm/cache.c | 2 +-
 arch/m68k/mm/fault.c | 1 -
 arch/m68k/mm/kmap.c  | 2 +-
 arch/m68k/mm/mcfmmu.c| 1 +
 arch/m68k/mm/memory.c| 1 -
 arch/m68k/sun3x/dvma.c   | 2 +-
 arch/microblaze/include/asm/tlbflush.h   | 1 -
 arch/microblaze/kernel/process.c | 1 -
 arch/microblaze/kernel/signal.c  | 1 -
 arch/mips/sgi-ip32/ip32-memory.c | 1 -
 arch/openrisc/include/asm/tlbflush.h | 1 -
 arch/openrisc/kernel/or32_ksyms.c| 1 -
 arch/parisc/include/asm/mmu_context.h| 1 -
 arch/parisc/kernel/cache.c   | 1 -
 arch/parisc/kernel/pci-dma.c | 1 -
 arch/parisc/kernel/process.c | 1 -
 arch/parisc/kernel/signal.c  | 1 -
 arch/parisc/kernel/smp.c | 1 -
 arch/parisc/mm/hugetlbpage.c | 1 -
 arch/parisc/mm/ioremap.c | 2 +-
 arch/powerpc/include/asm/tlb.h   | 1 -
 arch/powerpc/mm/book3s64/hash_hugetlbpage.c  | 1 -
 arch/powerpc/mm/book3s64/hash_pgtable.c  | 1 -
 arch/powerpc/mm/book3s64/hash_tlb.c  | 1 -
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 1 -
 arch/powerpc/mm/init_32.c| 1 -
 arch/powerpc/mm/kasan/8xx.c  | 1 -
 arch/powerpc/mm/kasan/book3s_32.c| 1 -
 arch/powerpc/mm/mem.c| 1 -
 arch/powerpc/mm/nohash/40x.c | 1 -
 arch/powerpc/mm/nohash/8xx.c | 1 -
 arch/powerpc/mm/nohash/fsl_booke.c   | 1 -
 arch/powerpc/mm/nohash/kaslr_booke.c | 1 -
 arch/powerpc/mm/pgtable.c| 1 -
 arch/powerpc/mm/pgtable_64.c | 1 -
 arch/powerpc/mm/ptdump/hashpagetable.c   | 2 +-
 arch/powerpc/mm/ptdump/ptdump.c  | 1 -
 arch/powerpc/platforms/pseries/cmm.c | 1 -
 arch/riscv/mm/fault.c| 1 -
 arch/s390/include/asm/tlb.h  | 1 -
 arch/s390/include/asm/tlbflush.h | 1 -
 arch/s390/kernel/machine_kexec.c | 1 -
 arch/s390/kernel/ptrace.c| 1 -
 arch/s390/kvm/diag.c | 1 -
 arch/s390/kvm/priv.c | 1 -
 arch/s390/kvm/pv.c   | 1 -
 arch/s390/mm/cmm.c   | 1 -
 arch/s390/mm/mmap.c  | 1 -
 arch/s390/mm/pgtable.c   | 1 -
 arch/sh/kernel/idle.c| 1 -
 

[PATCH 0/8] mm: cleanup usage of

2020-06-27 Thread Mike Rapoport
From: Mike Rapoport 

Hi,

Most architectures have very similar versions of pXd_alloc_one() and
pXd_free_one() for intermediate levels of page table. 
These patches add generic versions of these functions in
 and enable use of the generic functions where
appropriate.

In addition, functions declared and defined in  headers
are used mostly by core mm and early mm initialization in arch and there is
no actual reason to have the  included all over the place.
The first patch in this series removes unneeded includes of 

In the end it didn't work out as neatly as I hoped and moving
pXd_alloc_track() definitions to  would require
unnecessary changes to arches that have custom page table allocations, so
I've decided to move lib/ioremap.c to mm/ and make pgalloc-track.h local to
mm/.

Joerg Roedel (1):
  mm: move p?d_alloc_track to separate header file

Mike Rapoport (7):
  mm: remove unneeded includes of 
  opeinrisc: switch to generic version of pte allocation
  xtensa: switch to generic version of pte allocation
  asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
  asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()
  asm-generic: pgalloc: provide generic pgd_free()
  mm: move lib/ioremap.c to mm/

 arch/alpha/include/asm/pgalloc.h | 21 +
 arch/alpha/include/asm/tlbflush.h|  1 -
 arch/alpha/kernel/core_irongate.c|  1 -
 arch/alpha/kernel/core_marvel.c  |  1 -
 arch/alpha/kernel/core_titan.c   |  1 -
 arch/alpha/kernel/machvec_impl.h |  2 -
 arch/alpha/kernel/smp.c  |  1 -
 arch/alpha/mm/numa.c |  1 -
 arch/arc/mm/fault.c  |  1 -
 arch/arc/mm/init.c   |  1 -
 arch/arm/include/asm/pgalloc.h   | 12 +--
 arch/arm/include/asm/tlb.h   |  1 -
 arch/arm/kernel/machine_kexec.c  |  1 -
 arch/arm/kernel/smp.c|  1 -
 arch/arm/kernel/suspend.c|  1 -
 arch/arm/mach-omap2/omap-mpuss-lowpower.c|  1 -
 arch/arm/mm/hugetlbpage.c|  1 -
 arch/arm/mm/mmu.c|  1 +
 arch/arm64/include/asm/pgalloc.h | 39 +-
 arch/arm64/kernel/smp.c  |  1 -
 arch/arm64/mm/hugetlbpage.c  |  1 -
 arch/arm64/mm/ioremap.c  |  1 -
 arch/arm64/mm/mmu.c  |  1 +
 arch/csky/include/asm/pgalloc.h  |  7 +-
 arch/csky/kernel/smp.c   |  1 -
 arch/hexagon/include/asm/pgalloc.h   |  7 +-
 arch/ia64/include/asm/pgalloc.h  | 24 --
 arch/ia64/include/asm/tlb.h  |  1 -
 arch/ia64/kernel/process.c   |  1 -
 arch/ia64/kernel/smp.c   |  1 -
 arch/ia64/kernel/smpboot.c   |  1 -
 arch/ia64/mm/contig.c|  1 -
 arch/ia64/mm/discontig.c |  1 -
 arch/ia64/mm/hugetlbpage.c   |  1 -
 arch/ia64/mm/tlb.c   |  1 -
 arch/m68k/include/asm/mmu_context.h  |  2 +-
 arch/m68k/include/asm/sun3_pgalloc.h |  7 +-
 arch/m68k/kernel/dma.c   |  2 +-
 arch/m68k/kernel/traps.c |  3 +-
 arch/m68k/mm/cache.c |  2 +-
 arch/m68k/mm/fault.c |  1 -
 arch/m68k/mm/kmap.c  |  2 +-
 arch/m68k/mm/mcfmmu.c|  1 +
 arch/m68k/mm/memory.c|  1 -
 arch/m68k/sun3x/dvma.c   |  2 +-
 arch/microblaze/include/asm/pgalloc.h|  6 --
 arch/microblaze/include/asm/tlbflush.h   |  1 -
 arch/microblaze/kernel/process.c |  1 -
 arch/microblaze/kernel/signal.c  |  1 -
 arch/mips/include/asm/pgalloc.h  | 19 +
 arch/mips/sgi-ip32/ip32-memory.c |  1 -
 arch/nds32/mm/mm-nds32.c |  2 +
 arch/nios2/include/asm/pgalloc.h |  7 +-
 arch/openrisc/include/asm/pgalloc.h  | 33 +---
 arch/openrisc/include/asm/tlbflush.h |  1 -
 arch/openrisc/kernel/or32_ksyms.c|  1 -
 arch/parisc/include/asm/mmu_context.h|  1 -
 arch/parisc/include/asm/pgalloc.h| 12 +--
 arch/parisc/kernel/cache.c   |  1 -
 arch/parisc/kernel/pci-dma.c |  1 -
 arch/parisc/kernel/process.c |  1 -
 arch/parisc/kernel/signal.c  |  1 -
 arch/parisc/kernel/smp.c |  1 -
 arch/parisc/mm/hugetlbpage.c |  1 -
 arch/parisc/mm/ioremap.c |  2 +-
 arch/powerpc/include/asm/tlb.h   |  1 -
 arch/powerpc/mm/book3s64/hash_hugetlbpage.c  |  1 -
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  1 -
 arch/powerpc/mm/book3s64/hash_tlb.c  |  1 -
 

[PATCH 5/5] exec: add a kernel_execveat helper

2020-06-27 Thread Christoph Hellwig
Add a kernel_execveat helper to execute a binary with kernel space argv
and envp pointers.  Switch executing init and user mode helpers to this
new helper instead of relying on the implicit set_fs(KERNEL_DS) for early
init code and kernel threads, and move the getname call into the
do_execve helper.

Signed-off-by: Christoph Hellwig 
---
 fs/exec.c   | 109 
 include/linux/binfmts.h |   6 +--
 init/main.c |   6 +--
 kernel/umh.c|   8 ++-
 4 files changed, 95 insertions(+), 34 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 34781db6bf6889..7923b8334ae600 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -435,6 +435,21 @@ static int count_strings(const char __user *const __user 
*argv)
return i;
 }
 
+static int count_kernel_strings(const char *const *argv)
+{
+   int i;
+
+   if (!argv)
+   return 0;
+
+   for (i = 0; argv[i]; i++) {
+   if (i >= MAX_ARG_STRINGS)
+   return -E2BIG;
+   }
+
+   return i;
+}
+
 static int check_arg_limit(struct linux_binprm *bprm)
 {
unsigned long limit, ptr_size;
@@ -611,6 +626,19 @@ int copy_string_kernel(const char *arg, struct 
linux_binprm *bprm)
 }
 EXPORT_SYMBOL(copy_string_kernel);
 
+static int copy_strings_kernel(int argc, const char *const *argv,
+   struct linux_binprm *bprm)
+{
+   int ret;
+
+   while (argc-- > 0) {
+   ret = copy_string_kernel(argv[argc], bprm);
+   if (ret)
+   break;
+   }
+   return ret;
+}
+
 #ifdef CONFIG_MMU
 
 /*
@@ -1793,9 +1821,11 @@ static int exec_binprm(struct linux_binprm *bprm)
return 0;
 }
 
-int do_execveat(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
const char __user *const __user *argv,
const char __user *const __user *envp,
+   const char *const *kernel_argv,
+   const char *const *kernel_envp,
int flags, struct file *file)
 {
char *pathbuf = NULL;
@@ -1876,16 +1906,30 @@ int do_execveat(int fd, struct filename *filename,
if (retval)
goto out_unmark;
 
-   bprm->argc = count_strings(argv);
-   if (bprm->argc < 0) {
-   retval = bprm->argc;
-   goto out;
-   }
+   if (unlikely(kernel_argv)) {
+   bprm->argc = count_kernel_strings(kernel_argv);
+   if (bprm->argc < 0) {
+   retval = bprm->argc;
+   goto out;
+   }
 
-   bprm->envc = count_strings(envp);
-   if (bprm->envc < 0) {
-   retval = bprm->envc;
-   goto out;
+   bprm->envc = count_kernel_strings(kernel_envp);
+   if (bprm->envc < 0) {
+   retval = bprm->envc;
+   goto out;
+   }
+   } else {
+   bprm->argc = count_strings(argv);
+   if (bprm->argc < 0) {
+   retval = bprm->argc;
+   goto out;
+   }
+
+   bprm->envc = count_strings(envp);
+   if (bprm->envc < 0) {
+   retval = bprm->envc;
+   goto out;
+   }
}
 
retval = check_arg_limit(bprm);
@@ -1902,13 +1946,22 @@ int do_execveat(int fd, struct filename *filename,
goto out;
 
bprm->exec = bprm->p;
-   retval = copy_strings(bprm->envc, envp, bprm);
-   if (retval < 0)
-   goto out;
 
-   retval = copy_strings(bprm->argc, argv, bprm);
-   if (retval < 0)
-   goto out;
+   if (unlikely(kernel_argv)) {
+   retval = copy_strings_kernel(bprm->envc, kernel_envp, bprm);
+   if (retval < 0)
+   goto out;
+   retval = copy_strings_kernel(bprm->argc, kernel_argv, bprm);
+   if (retval < 0)
+   goto out;
+   } else {
+   retval = copy_strings(bprm->envc, envp, bprm);
+   if (retval < 0)
+   goto out;
+   retval = copy_strings(bprm->argc, argv, bprm);
+   if (retval < 0)
+   goto out;
+   }
 
retval = exec_binprm(bprm);
if (retval < 0)
@@ -1959,6 +2012,23 @@ int do_execveat(int fd, struct filename *filename,
return retval;
 }
 
+static int do_execveat(int fd, const char *filename,
+  const char __user *const __user *argv,
+  const char __user *const __user *envp, int flags)
+{
+   int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+   struct filename *name = getname_flags(filename, lookup_flags, NULL);
+
+   return __do_execveat(fd, name, argv, envp, NULL, NULL, flags, NULL);
+}
+
+int kernel_execveat(int fd, const char 

[PATCH 3/5] exec: cleanup the count() function

2020-06-27 Thread Christoph Hellwig
Remove the max argument as it is hard wired to MAX_ARG_STRINGS, and
give the function a slightly less generic name.

Signed-off-by: Christoph Hellwig 
---
 fs/exec.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 4e5db0e35797a5..a5d91f8b1341d5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -407,9 +407,9 @@ get_user_arg_ptr(const char __user *const __user *argv, int 
nr)
 }
 
 /*
- * count() counts the number of strings in array ARGV.
+ * count_strings() counts the number of strings in array ARGV.
  */
-static int count(const char __user *const __user *argv, int max)
+static int count_strings(const char __user *const __user *argv)
 {
int i = 0;
 
@@ -423,7 +423,7 @@ static int count(const char __user *const __user *argv, int 
max)
if (IS_ERR(p))
return -EFAULT;
 
-   if (i >= max)
+   if (i >= MAX_ARG_STRINGS)
return -E2BIG;
++i;
 
@@ -441,11 +441,11 @@ static int prepare_arg_pages(struct linux_binprm *bprm,
 {
unsigned long limit, ptr_size;
 
-   bprm->argc = count(argv, MAX_ARG_STRINGS);
+   bprm->argc = count_strings(argv);
if (bprm->argc < 0)
return bprm->argc;
 
-   bprm->envc = count(envp, MAX_ARG_STRINGS);
+   bprm->envc = count_strings(envp);
if (bprm->envc < 0)
return bprm->envc;
 
-- 
2.26.2



[PATCH 1/5] exec: cleanup the execve wrappers

2020-06-27 Thread Christoph Hellwig
Remove a whole bunch of wrappers that eventually all call
__do_execve_file, and consolidate the execvce helpers to:

  (1) __do_execveat, which is the lowest level helper implementing the
  actual functionality
  (2) do_execvat, which is used by all callers that want native
  pointers
  (3) do_compat_execve, which is used by all compat syscalls

Signed-off-by: Christoph Hellwig 
---
 fs/exec.c   | 98 +++--
 include/linux/binfmts.h | 12 ++---
 init/main.c |  7 +--
 kernel/umh.c| 16 +++
 4 files changed, 41 insertions(+), 92 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index e6e8a9a7032784..354fdaa536ae7d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1815,10 +1815,7 @@ static int exec_binprm(struct linux_binprm *bprm)
return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-static int __do_execve_file(int fd, struct filename *filename,
+static int __do_execveat(int fd, struct filename *filename,
struct user_arg_ptr argv,
struct user_arg_ptr envp,
int flags, struct file *file)
@@ -1972,74 +1969,16 @@ static int __do_execve_file(int fd, struct filename 
*filename,
return retval;
 }
 
-static int do_execveat_common(int fd, struct filename *filename,
- struct user_arg_ptr argv,
- struct user_arg_ptr envp,
- int flags)
-{
-   return __do_execve_file(fd, filename, argv, envp, flags, NULL);
-}
-
-int do_execve_file(struct file *file, void *__argv, void *__envp)
-{
-   struct user_arg_ptr argv = { .ptr.native = __argv };
-   struct user_arg_ptr envp = { .ptr.native = __envp };
-
-   return __do_execve_file(AT_FDCWD, NULL, argv, envp, 0, file);
-}
-
-int do_execve(struct filename *filename,
-   const char __user *const __user *__argv,
-   const char __user *const __user *__envp)
-{
-   struct user_arg_ptr argv = { .ptr.native = __argv };
-   struct user_arg_ptr envp = { .ptr.native = __envp };
-   return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
 int do_execveat(int fd, struct filename *filename,
const char __user *const __user *__argv,
const char __user *const __user *__envp,
-   int flags)
+   int flags, struct file *file)
 {
struct user_arg_ptr argv = { .ptr.native = __argv };
struct user_arg_ptr envp = { .ptr.native = __envp };
 
-   return do_execveat_common(fd, filename, argv, envp, flags);
-}
-
-#ifdef CONFIG_COMPAT
-static int compat_do_execve(struct filename *filename,
-   const compat_uptr_t __user *__argv,
-   const compat_uptr_t __user *__envp)
-{
-   struct user_arg_ptr argv = {
-   .is_compat = true,
-   .ptr.compat = __argv,
-   };
-   struct user_arg_ptr envp = {
-   .is_compat = true,
-   .ptr.compat = __envp,
-   };
-   return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
-}
-
-static int compat_do_execveat(int fd, struct filename *filename,
- const compat_uptr_t __user *__argv,
- const compat_uptr_t __user *__envp,
- int flags)
-{
-   struct user_arg_ptr argv = {
-   .is_compat = true,
-   .ptr.compat = __argv,
-   };
-   struct user_arg_ptr envp = {
-   .is_compat = true,
-   .ptr.compat = __envp,
-   };
-   return do_execveat_common(fd, filename, argv, envp, flags);
+   return __do_execveat(fd, filename, argv, envp, flags, file);
 }
-#endif
 
 void set_binfmt(struct linux_binfmt *new)
 {
@@ -2070,7 +2009,7 @@ SYSCALL_DEFINE3(execve,
const char __user *const __user *, argv,
const char __user *const __user *, envp)
 {
-   return do_execve(getname(filename), argv, envp);
+   return do_execveat(AT_FDCWD, getname(filename), argv, envp, 0, NULL);
 }
 
 SYSCALL_DEFINE5(execveat,
@@ -2080,18 +2019,34 @@ SYSCALL_DEFINE5(execveat,
int, flags)
 {
int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+   struct filename *name = getname_flags(filename, lookup_flags, NULL);
 
-   return do_execveat(fd,
-  getname_flags(filename, lookup_flags, NULL),
-  argv, envp, flags);
+   return do_execveat(fd, name, argv, envp, flags, NULL);
 }
 
 #ifdef CONFIG_COMPAT
+static int do_compat_execve(int fd, struct filename *filename,
+   const compat_uptr_t __user *__argv,
+   const compat_uptr_t __user *__envp,
+   int flags)
+{
+   struct user_arg_ptr argv = {
+   .is_compat = true,
+   .ptr.compat = __argv,
+   };
+   struct user_arg_ptr envp = {
+   .is_compat = true,
+  

[PATCH 2/5] exec: simplify the compat syscall handling

2020-06-27 Thread Christoph Hellwig
The only differenence betweeen the compat exec* syscalls and their
native versions is that compat_ptr sign extension, and the fact that
the pointer arithmetics for the two dimensional arrays needs to use
the compat pointer size.  Instead of the compat wrappers and the
struct user_arg_ptr machinery just use in_compat_syscall() to do the
right thing for the compat case deep inside get_user_arg_ptr().

Signed-off-by: Christoph Hellwig 
---
 arch/arm64/include/asm/unistd32.h |   4 +-
 arch/mips/kernel/syscalls/syscall_n32.tbl |   4 +-
 arch/mips/kernel/syscalls/syscall_o32.tbl |   4 +-
 arch/parisc/kernel/syscalls/syscall.tbl   |   4 +-
 arch/powerpc/kernel/syscalls/syscall.tbl  |   4 +-
 arch/s390/kernel/syscalls/syscall.tbl |   4 +-
 arch/sparc/kernel/syscalls.S  |   4 +-
 arch/x86/entry/syscall_x32.c  |   7 ++
 arch/x86/entry/syscalls/syscall_32.tbl|   4 +-
 arch/x86/entry/syscalls/syscall_64.tbl|   4 +-
 fs/exec.c | 103 --
 include/linux/compat.h|   7 --
 include/uapi/asm-generic/unistd.h |   4 +-
 tools/include/uapi/asm-generic/unistd.h   |   4 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl   |   4 +-
 .../perf/arch/s390/entry/syscalls/syscall.tbl |   4 +-
 .../arch/x86/entry/syscalls/syscall_64.tbl|   4 +-
 17 files changed, 56 insertions(+), 117 deletions(-)

diff --git a/arch/arm64/include/asm/unistd32.h 
b/arch/arm64/include/asm/unistd32.h
index 6d95d0c8bf2f47..141f5d2ff1c34f 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -33,7 +33,7 @@ __SYSCALL(__NR_link, sys_link)
 #define __NR_unlink 10
 __SYSCALL(__NR_unlink, sys_unlink)
 #define __NR_execve 11
-__SYSCALL(__NR_execve, compat_sys_execve)
+__SYSCALL(__NR_execve, sys_execve)
 #define __NR_chdir 12
 __SYSCALL(__NR_chdir, sys_chdir)
/* 13 was sys_time */
@@ -785,7 +785,7 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 386
 __SYSCALL(__NR_bpf, sys_bpf)
 #define __NR_execveat 387
-__SYSCALL(__NR_execveat, compat_sys_execveat)
+__SYSCALL(__NR_execveat, sys_execveat)
 #define __NR_userfaultfd 388
 __SYSCALL(__NR_userfaultfd, sys_userfaultfd)
 #define __NR_membarrier 389
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl 
b/arch/mips/kernel/syscalls/syscall_n32.tbl
index f777141f52568f..e861b5ab7179c9 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -64,7 +64,7 @@
 54 n32 getsockopt  compat_sys_getsockopt
 55 n32 clone   __sys_clone
 56 n32 fork__sys_fork
-57 n32 execve  compat_sys_execve
+57 n32 execve  sys_execve
 58 n32 exitsys_exit
 59 n32 wait4   compat_sys_wait4
 60 n32 killsys_kill
@@ -328,7 +328,7 @@
 317n32 getrandom   sys_getrandom
 318n32 memfd_createsys_memfd_create
 319n32 bpf sys_bpf
-320n32 execveatcompat_sys_execveat
+320n32 execveatsys_execveat
 321n32 userfaultfd sys_userfaultfd
 322n32 membarrier  sys_membarrier
 323n32 mlock2  sys_mlock2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl 
b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 13280625d312e9..bba80f74e9968e 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -18,7 +18,7 @@
 8  o32 creat   sys_creat
 9  o32 linksys_link
 10 o32 unlink  sys_unlink
-11 o32 execve  sys_execve  
compat_sys_execve
+11 o32 execve  sys_execve
 12 o32 chdir   sys_chdir
 13 o32 timesys_time32
 14 o32 mknod   sys_mknod
@@ -367,7 +367,7 @@
 353o32 getrandom   sys_getrandom
 354o32 memfd_createsys_memfd_create
 355o32 bpf sys_bpf
-356o32 execveatsys_execveat
compat_sys_execveat
+356o32 execveatsys_execveat
 357o32 userfaultfd sys_userfaultfd
 358o32 membarrier  sys_membarrier
 359o32 mlock2  sys_mlock2
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl 

properly support exec with kernel pointers v3

2020-06-27 Thread Christoph Hellwig
Hi all,

this series first cleans up the exec code and then adds proper
kernel_execveat and kernel_wait callers instead of relying on the fact
that the early init code and kernel threads implicitly run with
the address limit set to KERNEL_DS.

Note that the cleanup removes the compat execve(at) handlers entirely, as
we can handle the compat difference very nicely in a unified codebase.
x32 needs two hacky #defines for that for now, although those can go
away if the x32 syscall rework from Brian gets merged.

I think this is ready to get picked up.  What would the best tree be?
Most important a git tree would be good, as I have other work building
on top of it.


Changes since v2:
 - drop the kernel_wait addition, as this interacts with a series
   from Luis and should be merged together with that one

Changes since v1:
 - remove a pointless ifdef from get_user_arg_ptr
 - remove the need for a compat syscall handler for x32


Diffstat:
 arch/arm64/include/asm/unistd32.h  |4 
 arch/mips/kernel/syscalls/syscall_n32.tbl  |4 
 arch/mips/kernel/syscalls/syscall_o32.tbl  |4 
 arch/parisc/kernel/syscalls/syscall.tbl|4 
 arch/powerpc/kernel/syscalls/syscall.tbl   |4 
 arch/s390/kernel/syscalls/syscall.tbl  |4 
 arch/sparc/kernel/syscalls.S   |4 
 arch/x86/entry/syscall_x32.c   |7 
 arch/x86/entry/syscalls/syscall_32.tbl |4 
 arch/x86/entry/syscalls/syscall_64.tbl |4 
 fs/exec.c  |  248 -
 include/linux/binfmts.h|   10 
 include/linux/compat.h |7 
 include/uapi/asm-generic/unistd.h  |4 
 init/main.c|5 
 kernel/umh.c   |   14 -
 tools/include/uapi/asm-generic/unistd.h|4 
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl |4 
 tools/perf/arch/s390/entry/syscalls/syscall.tbl|4 
 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl  |4 
 20 files changed, 149 insertions(+), 198 deletions(-)


[PATCH 2/2] powerpc/mm/books64/pkeys: Rename is_pkey_enabled()

2020-06-27 Thread Aneesh Kumar K.V
Rename is_pkey_enabled() to is_pkey_masked() to better indicates that
this check is to make sure the key is available for userspace usage. For it to
be made available both the bits in UAMOR should be set to 1 (0b11).

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/pkeys.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index ca5fcb4bff32..70d760ade922 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -206,18 +206,16 @@ static inline void write_uamor(u64 value)
mtspr(SPRN_UAMOR, value);
 }
 
-static bool is_pkey_enabled(int pkey)
+static bool is_pkey_masked(int pkey)
 {
u64 uamor = read_uamor();
u64 pkey_bits = 0x3ul << pkeyshift(pkey);
u64 uamor_pkey_bits = (uamor & pkey_bits);
 
/*
-* Both the bits in UAMOR corresponding to the key should be set or
-* reset.
+* Both the bits in UAMOR corresponding to the key should be set
 */
-   WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
-   return !!(uamor_pkey_bits);
+   return (uamor_pkey_bits != pkey_bits);
 }
 
 static inline void init_amr(int pkey, u8 init_bits)
@@ -246,7 +244,7 @@ int __arch_set_user_pkey_access(struct task_struct *tsk, 
int pkey,
u64 new_amr_bits = 0x0ul;
u64 new_iamr_bits = 0x0ul;
 
-   if (!is_pkey_enabled(pkey))
+   if (is_pkey_masked(pkey))
return -EINVAL;
 
if (init_val & PKEY_DISABLE_EXECUTE) {
-- 
2.26.2



[PATCH 1/2] powerpc/mm/book3s54/pkeys: make pkey access check work on execute_only_key

2020-06-27 Thread Aneesh Kumar K.V
pkey_access_permitted() should not check for pkey is available in UAMOR or not.
The kernel needs to do that check only while allocating keys. This also makes
sure execute_only_key which is marked as non-manageable via UAMOR gives the
right access check return w.r.t pkey_access_permitted().

This fix the page fault loop when using PROT_EXEC as below

addr = mmap(0, page_sz, PROT_EXEC, MAP_FILE | MAP_PRIVATE, fildes, 0);
x =  *addr);

Fixes: c46241a370a6 ("powerpc/pkeys: Check vma before returning key fault error 
to the user")

Reported-by: Jan Stancek 
Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/mm/book3s64/pkeys.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 1199fc2bfaec..ca5fcb4bff32 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -353,9 +353,6 @@ static bool pkey_access_permitted(int pkey, bool write, 
bool execute)
int pkey_shift;
u64 amr;
 
-   if (!is_pkey_enabled(pkey))
-   return true;
-
pkey_shift = pkeyshift(pkey);
if (execute && !(read_iamr() & (IAMR_EX_BIT << pkey_shift)))
return true;
-- 
2.26.2



Re: [PATCH 02/11] powerpc/kexec_file: mark PPC64 specific code

2020-06-27 Thread Christophe Leroy




Le 26/06/2020 à 21:04, Hari Bathini a écrit :

Some of the kexec_file_load code isn't PPC64 specific. Move PPC64
specific code from kexec/file_load.c to kexec/file_load_64.c. Also,
rename purgatory/trampoline.S to purgatory/trampoline_64.S in the
same spirit.


At the time being, CONFIG_KEXEC_FILE depends on PPC64.
Are you planning to make it work on PPC32 as well ?
Otherwise I don't understand the purpose of this patch.

Also, what is being done in this patch seems to go far beyond what you 
describe above. It is propably worth splitting in several patches with 
proper explanation.


Christophe



Signed-off-by: Hari Bathini 
---
  arch/powerpc/include/asm/kexec.h   |   11 +++
  arch/powerpc/kexec/Makefile|2 -
  arch/powerpc/kexec/elf_64.c|7 +-
  arch/powerpc/kexec/file_load.c |   37 ++
  arch/powerpc/kexec/file_load_64.c  |  108 ++
  arch/powerpc/purgatory/Makefile|4 +
  arch/powerpc/purgatory/trampoline.S|  117 
  arch/powerpc/purgatory/trampoline_64.S |  117 
  8 files changed, 248 insertions(+), 155 deletions(-)
  create mode 100644 arch/powerpc/kexec/file_load_64.c
  delete mode 100644 arch/powerpc/purgatory/trampoline.S
  create mode 100644 arch/powerpc/purgatory/trampoline_64.S

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index c684768..7008ea1 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -114,8 +114,17 @@ int setup_purgatory(struct kimage *image, const void 
*slave_code,
unsigned long fdt_load_addr);
  int setup_new_fdt(const struct kimage *image, void *fdt,
  unsigned long initrd_load_addr, unsigned long initrd_len,
- const char *cmdline);
+ const char *cmdline, int *node);
  int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
+
+#ifdef CONFIG_PPC64
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+   unsigned long initrd_load_addr,
+   unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_PPC64 */
  #endif /* CONFIG_KEXEC_FILE */
  
  #else /* !CONFIG_KEXEC_CORE */

diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 86380c6..67c3553 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@ obj-y   += core.o crash.o core_$(BITS).o
  
  obj-$(CONFIG_PPC32)		+= relocate_32.o
  
-obj-$(CONFIG_KEXEC_FILE)	+= file_load.o elf_$(BITS).o

+obj-$(CONFIG_KEXEC_FILE)   += file_load.o file_load_$(BITS).o elf_$(BITS).o
  
  ifdef CONFIG_HAVE_IMA_KEXEC

  ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6..23ad04c 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -88,7 +88,8 @@ static void *elf64_load(struct kimage *image, char 
*kernel_buf,
goto out;
}
  
-	ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);

+   ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
+ initrd_len, cmdline);
if (ret)
goto out;
  
@@ -107,8 +108,8 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,

pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
  
  	slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;

-   ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
- fdt_load_addr);
+   ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+   fdt_load_addr);
if (ret)
pr_err("Error setting up the purgatory.\n");
  
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c

index 143c917..99a2c4d 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -1,6 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0-only
  /*
- * ppc64 code to implement the kexec_file_load syscall
+ * powerpc code to implement the kexec_file_load syscall
   *
   * Copyright (C) 2004  Adam Litke (a...@us.ibm.com)
   * Copyright (C) 2004  IBM Corp.
@@ -16,26 +16,10 @@
  
  #include 

  #include 
-#include 
  #include 
  #include 
  
-#define SLAVE_CODE_SIZE		256

-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-   _elf64_ops,
-   NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
- unsigned long buf_len)
-{
-   /* We don't support crash kernels yet. */
-   if (image->type == KEXEC_TYPE_CRASH)
- 

Re: [PATCH 01/11] kexec_file: allow archs to handle special regions while locating memory hole

2020-06-27 Thread Christophe Leroy




Le 26/06/2020 à 21:04, Hari Bathini a écrit :

Some archs can have special memory regions, within the given memory
range, which can't be used for the buffer in a kexec segment. As
kexec_add_buffer() function is being called from generic code as well,
add weak arch_kexec_add_buffer definition for archs to override & take
care of special regions before trying to locate a memory hole.

Signed-off-by: Hari Bathini 
---
  include/linux/kexec.h |5 +
  kernel/kexec_file.c   |   37 +
  2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 1776eb2..1237682 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -195,6 +195,11 @@ int __weak arch_kexec_apply_relocations(struct 
purgatory_info *pi,
const Elf_Shdr *relsec,
const Elf_Shdr *symtab);
  
+extern int arch_kexec_add_buffer(struct kexec_buf *kbuf);

+


extern keywork is useless here, please remove (checkpatch also complains 
about it usually).



+/* arch_kexec_add_buffer calls this when it is ready */
+extern int __kexec_add_buffer(struct kexec_buf *kbuf);
+


same


  extern int kexec_add_buffer(struct kexec_buf *kbuf);
  int kexec_locate_mem_hole(struct kexec_buf *kbuf);
  
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c

index bb05fd5..a0b4f7f 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -669,10 +669,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
   */
  int kexec_add_buffer(struct kexec_buf *kbuf)
  {
-
-   struct kexec_segment *ksegment;
-   int ret;
-
/* Currently adding segment this way is allowed only in file mode */
if (!kbuf->image->file_mode)
return -EINVAL;
@@ -696,6 +692,25 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE);
kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
  
+	return arch_kexec_add_buffer(kbuf);

+}
+
+/**
+ * __kexec_add_buffer - arch_kexec_add_buffer would call this function after
+ *  updating kbuf, to place a buffer in a kexec segment.
+ * @kbuf:   Buffer contents and memory parameters.
+ *
+ * This function assumes that kexec_mutex is held.
+ * On successful return, @kbuf->mem will have the physical address of
+ * the buffer in memory.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __kexec_add_buffer(struct kexec_buf *kbuf)
+{
+   struct kexec_segment *ksegment;
+   int ret;
+
/* Walk the RAM ranges and allocate a suitable range for the buffer */
ret = kexec_locate_mem_hole(kbuf);
if (ret)
@@ -711,6 +726,20 @@ int kexec_add_buffer(struct kexec_buf *kbuf)
return 0;
  }
  
+/**

+ * arch_kexec_add_buffer - Some archs have memory regions within the given
+ * range that can't be used to place a kexec segment.
+ * Such archs can override this function to take care
+ * of them before trying to locate the memory hole.
+ * @kbuf:  Buffer contents and memory parameters.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak arch_kexec_add_buffer(struct kexec_buf *kbuf)
+{
+   return __kexec_add_buffer(kbuf);
+}
+
  /* Calculate and store the digest of segments */
  static int kexec_calculate_store_digests(struct kimage *image)
  {



Christophe