date:20200510

Re: INFO: task hung in do_read_cache_page (3)

2020-05-10 Thread syzbot

syzbot has found a reproducer for the following crash on:

HEAD commit:e99332e7 gcc-10: mark more functions __init to avoid secti..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=1344fb1410
kernel config:  https://syzkaller.appspot.com/x/.config?x=8a96cf498e199d8b
dashboard link: https://syzkaller.appspot.com/bug?extid=518c54e255b5031adde4
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=146e45ec10
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=16a410

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+518c54e255b5031ad...@syzkaller.appspotmail.com

INFO: task syz-executor928:7064 blocked for more than 143 seconds.
  Not tainted 5.7.0-rc4-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
syz-executor928 D26720  7064   7053 0x4004
Call Trace:
 schedule+0xd0/0x2a0 kernel/sched/core.c:4158
 io_schedule+0x17/0x60 kernel/sched/core.c:5801
 wait_on_page_bit_common mm/filemap.c:1153 [inline]
 wait_on_page_bit mm/filemap.c:1202 [inline]
 wait_on_page_locked include/linux/pagemap.h:528 [inline]
 do_read_cache_page+0x648/0x1810 mm/filemap.c:2814
 read_mapping_page include/linux/pagemap.h:397 [inline]
 read_part_sector+0xf6/0x600 block/partitions/core.c:643
 adfspart_check_ICS+0x9d/0xc80 block/partitions/acorn.c:360
 check_partition block/partitions/core.c:140 [inline]
 blk_add_partitions+0x474/0xe50 block/partitions/core.c:571
 bdev_disk_changed+0x1fb/0x380 fs/block_dev.c:1543
 __blkdev_get+0x130c/0x1530 fs/block_dev.c:1681
 blkdev_get+0x41/0x2b0 fs/block_dev.c:1748
 blkdev_open+0x21d/0x2b0 fs/block_dev.c:1887
 do_dentry_open+0x4ba/0x1290 fs/open.c:797
 do_open fs/namei.c:3229 [inline]
 path_openat+0x1e59/0x27d0 fs/namei.c:3346
 do_filp_open+0x192/0x260 fs/namei.c:3373
 do_sys_openat2+0x585/0x7d0 fs/open.c:1148
 do_sys_open+0xc3/0x140 fs/open.c:1164
 do_syscall_64+0xf6/0x7d0 arch/x86/entry/common.c:295
 entry_SYSCALL_64_after_hwframe+0x49/0xb3
RIP: 0033:0x405a71
Code: Bad RIP value.
RSP: 002b:7f26eda2b830 EFLAGS: 0293 ORIG_RAX: 0002
RAX: ffda RBX: 6667 RCX: 00405a71
RDX:  RSI:  RDI: 7f26eda2b8d0
RBP: 006dbc40 R08: 000f R09: 7f26eda2c700
R10: 7f26eda2c9d0 R11: 0293 R12: 006dbc4c
R13: 7ffcaf21940f R14: 7f26eda2c9c0 R15: 20c49ba5e353f7cf

Showing all locks held in the system:
1 lock held by khungtaskd/1139:
 #0: 899bebc0 (rcu_read_lock){}-{1:2}, at: 
debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:5754
2 locks held by in:imklog/6726:
 #0: 88809eddc3f0 (>f_pos_lock){+.+.}-{3:3}, at: __fdget_pos+0xe9/0x100 
fs/file.c:826
 #1: 8880aa001818 (>list_lock){-.-.}-{2:2}, at: syslog_print 
kernel/printk/printk.c:1392 [inline]
 #1: 8880aa001818 (>list_lock){-.-.}-{2:2}, at: do_syslog 
kernel/printk/printk.c:1557 [inline]
 #1: 8880aa001818 (>list_lock){-.-.}-{2:2}, at: 
do_syslog+0x632/0x16d0 kernel/printk/printk.c:1531
1 lock held by syz-executor928/7064:
 #0: 888088cbb380 (>bd_mutex){+.+.}-{3:3}, at: 
__blkdev_get+0x179/0x1530 fs/block_dev.c:1600

=

NMI backtrace for cpu 1
CPU: 1 PID: 1139 Comm: khungtaskd Not tainted 5.7.0-rc4-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x188/0x20d lib/dump_stack.c:118
 nmi_cpu_backtrace.cold+0x70/0xb1 lib/nmi_backtrace.c:101
 nmi_trigger_cpumask_backtrace+0x231/0x27e lib/nmi_backtrace.c:62
 trigger_all_cpu_backtrace include/linux/nmi.h:146 [inline]
 check_hung_uninterruptible_tasks kernel/hung_task.c:205 [inline]
 watchdog+0xa8c/0x1010 kernel/hung_task.c:289
 kthread+0x388/0x470 kernel/kthread.c:268
 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0 skipped: idling at native_safe_halt+0xe/0x10 
arch/x86/include/asm/irqflags.h:60

[PATCH v3 0/2] serial: lantiq: Make driver modular & console configurable

2020-05-10 Thread Rahul Tanwar

Patch 1 is to make lantiq UART driver's use as console selectable/configurable.
Patch 2 adds changes so the driver can be compiled as module.

v3:
- Fix a section mismatch warning. Reported-by: kbuild test robot 
.

v2:
- Split into two patches. One for console configuration and two for
  modular driver. (Greg KH) 

v1:
- Initial version.


Rahul Tanwar (2):
  serial: lantiq: Make UART's use as console selectable
  serial: lantiq: Make driver modular

 drivers/tty/serial/Kconfig  | 13 +++--
 drivers/tty/serial/lantiq.c | 40 +++-
 2 files changed, 46 insertions(+), 7 deletions(-)

-- 
2.11.0

[PATCH v3 1/2] serial: lantiq: Make UART's use as console selectable

2020-05-10 Thread Rahul Tanwar

Lantiq UART driver can be used for system console. Add changes to
make this driver's use as console selectable/configurable.

Signed-off-by: Rahul Tanwar 
---
 drivers/tty/serial/Kconfig  |  9 -
 drivers/tty/serial/lantiq.c | 11 ++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 4b0a7b98f8c7..bb4009a1135f 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1037,10 +1037,17 @@ config SERIAL_LANTIQ
bool "Lantiq serial driver"
depends on (LANTIQ || X86) || COMPILE_TEST
select SERIAL_CORE
+   help
+ Support for UART on Lantiq and Intel SoCs.
+
+config SERIAL_LANTIQ_CONSOLE
+   bool "Console on Lantiq UART"
+   depends on SERIAL_LANTIQ=y
select SERIAL_CORE_CONSOLE
select SERIAL_EARLYCON
help
- Support for console and UART on Lantiq SoCs.
+ Select this option if you would like to use a Lantiq UART as the
+ system console.
 
 config SERIAL_QE
tristate "Freescale QUICC Engine serial port support"
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index c5e46ff972e4..d3b62a1be6ad 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -597,6 +597,7 @@ static const struct uart_ops lqasc_pops = {
.verify_port =  lqasc_verify_port,
 };
 
+#ifdef CONFIG_SERIAL_LANTIQ_CONSOLE
 static void
 lqasc_console_putchar(struct uart_port *port, int ch)
 {
@@ -705,6 +706,14 @@ lqasc_serial_early_console_setup(struct earlycon_device 
*device,
 OF_EARLYCON_DECLARE(lantiq, "lantiq,asc", lqasc_serial_early_console_setup);
 OF_EARLYCON_DECLARE(lantiq, "intel,lgm-asc", lqasc_serial_early_console_setup);
 
+#define LANTIQ_SERIAL_CONSOLE  (_console)
+
+#else
+
+#define LANTIQ_SERIAL_CONSOLE  NULL
+
+#endif /* CONFIG_SERIAL_LANTIQ_CONSOLE */
+
 static struct uart_driver lqasc_reg = {
.owner =THIS_MODULE,
.driver_name =  DRVNAME,
@@ -712,7 +721,7 @@ static struct uart_driver lqasc_reg = {
.major =0,
.minor =0,
.nr =   MAXPORTS,
-   .cons = _console,
+   .cons = LANTIQ_SERIAL_CONSOLE,
 };
 
 static int fetch_irq_lantiq(struct device *dev, struct ltq_uart_port *ltq_port)
-- 
2.11.0

[PATCH v3 2/2] serial: lantiq: Make driver modular

2020-05-10 Thread Rahul Tanwar

Add changes so Lantiq serial driver can be compiled as a module.

Signed-off-by: Rahul Tanwar 
---
 drivers/tty/serial/Kconfig  |  4 +++-
 drivers/tty/serial/lantiq.c | 29 +
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index bb4009a1135f..c0681da66653 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1034,11 +1034,13 @@ config SERIAL_SIFIVE_CONSOLE
  boot time.)
 
 config SERIAL_LANTIQ
-   bool "Lantiq serial driver"
+   tristate "Lantiq serial driver"
depends on (LANTIQ || X86) || COMPILE_TEST
select SERIAL_CORE
help
  Support for UART on Lantiq and Intel SoCs.
+ To compile this driver as a module, select M here. The
+ module will be called lantiq.
 
 config SERIAL_LANTIQ_CONSOLE
bool "Console on Lantiq UART"
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index d3b62a1be6ad..62813e421f12 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -823,8 +824,7 @@ static void free_irq_intel(struct uart_port *port)
free_irq(ltq_port->common_irq, port);
 }
 
-static int __init
-lqasc_probe(struct platform_device *pdev)
+static int lqasc_probe(struct platform_device *pdev)
 {
struct device_node *node = pdev->dev.of_node;
struct ltq_uart_port *ltq_port;
@@ -908,6 +908,13 @@ lqasc_probe(struct platform_device *pdev)
return ret;
 }
 
+static int lqasc_remove(struct platform_device *pdev)
+{
+   struct uart_port *port = platform_get_drvdata(pdev);
+
+   return uart_remove_one_port(_reg, port);
+}
+
 static const struct ltq_soc_data soc_data_lantiq = {
.fetch_irq = fetch_irq_lantiq,
.request_irq = request_irq_lantiq,
@@ -925,8 +932,11 @@ static const struct of_device_id ltq_asc_match[] = {
{ .compatible = "intel,lgm-asc", .data = _data_intel },
{},
 };
+MODULE_DEVICE_TABLE(of, ltq_asc_match);
 
 static struct platform_driver lqasc_driver = {
+   .probe  = lqasc_probe,
+   .remove = lqasc_remove,
.driver = {
.name   = DRVNAME,
.of_match_table = ltq_asc_match,
@@ -942,10 +952,21 @@ init_lqasc(void)
if (ret != 0)
return ret;
 
-   ret = platform_driver_probe(_driver, lqasc_probe);
+   ret = platform_driver_register(_driver);
if (ret != 0)
uart_unregister_driver(_reg);
 
return ret;
 }
-device_initcall(init_lqasc);
+
+static void __exit exit_lqasc(void)
+{
+   platform_driver_unregister(_driver);
+   uart_unregister_driver(_reg);
+}
+
+module_init(init_lqasc);
+module_exit(exit_lqasc);
+
+MODULE_DESCRIPTION("Serial driver for Lantiq & Intel gateway SoCs");
+MODULE_LICENSE("GPL v2");
-- 
2.11.0

Re: [net-next PATCH v3 4/5] net: phy: Introduce fwnode_get_phy_id()

2020-05-10 Thread Calvin Johnson

Thanks Andrew and Jeremy for the detailed discussion!

On Fri, May 08, 2020 at 08:13:01PM +0200, Andrew Lunn wrote:
> > > It does have a numeric version defined for EISA types. OTOH I suspect that
> > > your right. If there were a "PHY\VEN_ID_" definition, it may 
> > > not
> > > be ideal to parse it. Instead the normal ACPI model of exactly matching 
> > > the
> > > complete string in the phy driver might be more appropriate.
> > 
> > IMO, it should be fine to parse the string to extract the phy_id. Is there 
> > any
> > reason why we cannot do this?
> 
> Some background here, about what the PHY core does.
> 
> PHYs have two ID registers. This contains vendor, device, and often
> revision of the PHY. Only the vendor part is standardised, vendors can
> decide how to use the device part, but it is common for the lowest
> nibble to be revision. The core will read these ID registers, and then
> go through all the PHY drivers registered and ask them if they support
> this ID. The drivers provide a table of IDs and masks. The mask is
> applied, and then if the ID matches, the driver is used. The mask
> allows the revision to be ignored, etc.
> 
> There is a very small number of devices where the vendor messed up,
> and did not put valid contents in the ID registers. In such cases, we
> can read the IDs from device tree. These are then used in exactly the
> same way as if they were read from the device.
> 
> If you want the ACPI model to be used, an exact match on the string,
> you are going to have to modify the core and the drivers. They
> currently don't have any string, and have no idea about different
> revisions which are out in the wild.

I don't think ACPI mandates that OS driver use exact string match and not parse
the string.

First of all, I would suggest that we use "compatible" property instead of _CID.
Not sure of a reason why we cannot. This will simplify implementation of fwnode
APIs.

Already I've pointed out couple of ASL files in tianocore where they are already
used.
one 
eg:https://github.com/tianocore/edk2-platforms/blob/master/Silicon/Marvell/Armada7k8k/AcpiTables/Armada80x0McBin/Dsdt.asl#L280

Even if we use _CID, I'm not sure we are prohibited from extracting characters
(phy_id) from it.
If we decide to use _CID, then we need to define somewhere and standardize
exactly how we are going to use it. I'm not sure where we can do this.

> 
> > > Similarly to how I suspect the next patch's use of "compatible" isn't 
> > > ideal
> > > either, because whether a device is c45 or not, should tend to be fixed 
> > > to a
> > > particular vendor/device implementation and not a firmware provided
> > > property.
> 
> Not exactly true. It is the combination of can the bus master do C45
> and can the device do C45. Unfortunately, we have no knowledge of the
> bus masters capabilities, if it can do C45. And many MDIO drivers will
> do a C22 transaction when asked to perform a C45 transaction. All new
> submissions for MDIO drivers i ask for EOPNOTSUPP to be returned if
> C45 is not supported. But we cannot rely on that. Too much history.

Makes sense to me.

> > 
> > I tend to agree with you on this. Even for DT, ideal case, IMO should be:
> > 
> > 1) mdiobus_scan scans the mdiobus for c22 devices by reading phy id from
> > registers 2 and 3
> > 2) if not found scan for c45 devices <= looks like this is missing in Linux
> > 3) look for phy_id from compatible string.
> 
> It is somewhat more complex, in that there are a small number of
> devices which will respond to both C22 and C45. Generally, you want to
> use C45 if supported. So you would want to do the C45 scan first. But
> then the earlier problem comes to play, you have no idea if the bus
> master actually correctly supports C45.
> 
> Given the issues, we assume all bus masters and PHY devices are C22
> unless DT says the bus master and PHY combination is compatible with
> C45.

Makes sense to me.

[PATCH] mailbox: no error log in mbox_client_txdone() for tx done by irq

2020-05-10 Thread joe_zhuchg

From: Joe Zhu 

client does not know and not care about how controller implement tx done.
mbox_client_txdone() may be called when controller uses irq method.

Signed-off-by: Joe Zhu 
---
 drivers/mailbox/mailbox.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 0b821a5b2db8..116124adf188 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -189,7 +189,9 @@ EXPORT_SYMBOL_GPL(mbox_chan_txdone);
 void mbox_client_txdone(struct mbox_chan *chan, int r)
 {
if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
-   dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+   if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ)))
+   dev_err(chan->mbox->dev,
+  "Client can't run the TX ticker\n");
return;
}
 
-- 
2.17.1

[PATCH v1 net-next 0/3] net: dsa: felix: tc taprio and CBS offload support

2020-05-10 Thread Xiaoliang Yang

This patch series support tc taprio and CBS hardware offload according
to IEEE 802.1Qbv and IEEE-802.1Qav on VSC9959.

Xiaoliang Yang (3):
  net: dsa: felix: qos classified based on pcp
  net: dsa: felix: Configure Time-Aware Scheduler via taprio offload
  net: dsa: felix: add support Credit Based Shaper(CBS) for hardware
offload

 drivers/net/dsa/ocelot/felix.c |  16 +-
 drivers/net/dsa/ocelot/felix.h |   6 +
 drivers/net/dsa/ocelot/felix_vsc9959.c | 215 -
 3 files changed, 235 insertions(+), 2 deletions(-)

-- 
2.17.1

[PATCH v1 net-next 2/3] net: dsa: felix: Configure Time-Aware Scheduler via taprio offload

2020-05-10 Thread Xiaoliang Yang

Ocelot VSC9959 switch supports time-based egress shaping in hardware
according to IEEE 802.1Qbv. This patch add support for TAS configuration
on egress port of VSC9959 switch.

Felix driver is an instance of Ocelot family, with a DSA front-end. The
patch uses tc taprio hardware offload to setup TAS set function on felix
driver.

Signed-off-by: Xiaoliang Yang 
Reviewed-by: Vladimir Oltean 
---
 drivers/net/dsa/ocelot/felix.c |  10 +-
 drivers/net/dsa/ocelot/felix.h |   5 +
 drivers/net/dsa/ocelot/felix_vsc9959.c | 140 +
 3 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 0afdc6fc3f57..edd693d59b8e 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -237,6 +237,10 @@ static void felix_phylink_mac_config(struct dsa_switch 
*ds, int port,
 
if (felix->info->pcs_init)
felix->info->pcs_init(ocelot, port, link_an_mode, state);
+
+   if (felix->info->port_sched_speed_set)
+   felix->info->port_sched_speed_set(ocelot, port,
+ state->speed);
 }
 
 static void felix_phylink_mac_an_restart(struct dsa_switch *ds, int port)
@@ -710,7 +714,7 @@ static void felix_port_policer_del(struct dsa_switch *ds, 
int port)
ocelot_port_policer_del(ocelot, port);
 }
 
-static const struct dsa_switch_ops felix_switch_ops = {
+static struct dsa_switch_ops felix_switch_ops = {
.get_tag_protocol   = felix_get_tag_protocol,
.setup  = felix_setup,
.teardown   = felix_teardown,
@@ -827,6 +831,9 @@ static int felix_pci_probe(struct pci_dev *pdev,
 
ocelot->ptp = 1;
 
+   if (felix->info->port_setup_tc)
+   felix_switch_ops.port_setup_tc = felix->info->port_setup_tc;
+
ds = kzalloc(sizeof(struct dsa_switch), GFP_KERNEL);
if (!ds) {
err = -ENOMEM;
@@ -836,6 +843,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
 
ds->dev = >dev;
ds->num_ports = felix->info->num_ports;
+   ds->num_tx_queues = felix->info->num_tx_queues;
ds->ops = _switch_ops;
ds->priv = ocelot;
felix->ds = ds;
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 0d4ec34309c7..24b13526fcf2 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -20,6 +20,7 @@ struct felix_info {
const struct ocelot_stat_layout *stats_layout;
unsigned intnum_stats;
int num_ports;
+   int num_tx_queues;
struct vcap_field   *vcap_is2_keys;
struct vcap_field   *vcap_is2_actions;
const struct vcap_props *vcap;
@@ -36,6 +37,10 @@ struct felix_info {
int (*prevalidate_phy_mode)(struct ocelot *ocelot, int port,
phy_interface_t phy_mode);
void(*port_qos_map_init)(struct ocelot *ocelot, int port);
+   int (*port_setup_tc)(struct dsa_switch *ds, int port,
+enum tc_setup_type type, void *type_data);
+   void(*port_sched_speed_set)(struct ocelot *ocelot, int port,
+   u32 speed);
 };
 
 extern struct felix_info   felix_info_vsc9959;
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c 
b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 5c931fb3e4cd..ccbd875c7a47 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -3,10 +3,13 @@
  * Copyright 2018-2019 NXP Semiconductors
  */
 #include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include "felix.h"
@@ -28,6 +31,8 @@
 #define USXGMII_LPA_DUPLEX(lpa)(((lpa) & GENMASK(12, 12)) >> 
12)
 #define USXGMII_LPA_SPEED(lpa) (((lpa) & GENMASK(11, 9)) >> 9)
 
+#define VSC9959_TAS_GCL_ENTRY_MAX  63
+
 enum usxgmii_speed {
USXGMII_SPEED_10= 0,
USXGMII_SPEED_100   = 1,
@@ -1231,6 +1236,138 @@ static void vsc9959_port_qos_map_init(struct ocelot 
*ocelot, int port)
}
 }
 
+static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
+   u32 speed)
+{
+   ocelot_rmw_rix(ocelot,
+  QSYS_TAG_CONFIG_LINK_SPEED(speed),
+  QSYS_TAG_CONFIG_LINK_SPEED_M,
+  QSYS_TAG_CONFIG, port);
+}
+
+static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time,
+ u64 cycle_time,
+ struct timespec64 *new_base_ts)
+{
+   struct timespec64 ts;
+   ktime_t new_base_time;
+   ktime_t current_time;
+
+   ocelot_ptp_gettime64(>ptp_info, );
+   current_time =

[PATCH v1 net-next 3/3] net: dsa: felix: add support Credit Based Shaper(CBS) for hardware offload

2020-05-10 Thread Xiaoliang Yang

VSC9959 hardware support the Credit Based Shaper(CBS) which part
of the IEEE-802.1Qav. This patch support sch_cbs set for VSC9959.

Signed-off-by: Xiaoliang Yang 
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 52 +-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c 
b/drivers/net/dsa/ocelot/felix_vsc9959.c
index ccbd875c7a47..d8d1657ee8ba 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -208,7 +208,7 @@ static const u32 vsc9959_qsys_regmap[] = {
REG(QSYS_QMAXSDU_CFG_6, 0x00f62c),
REG(QSYS_QMAXSDU_CFG_7, 0x00f648),
REG(QSYS_PREEMPTION_CFG,0x00f664),
-   REG_RESERVED(QSYS_CIR_CFG),
+   REG(QSYS_CIR_CFG,   0x00),
REG(QSYS_EIR_CFG,   0x04),
REG(QSYS_SE_CFG,0x08),
REG(QSYS_SE_DWRR_CFG,   0x0c),
@@ -1354,6 +1354,54 @@ static int vsc9959_qos_port_tas_set(struct ocelot 
*ocelot, int port,
return ret;
 }
 
+int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
+struct tc_cbs_qopt_offload *cbs_qopt)
+{
+   struct ocelot *ocelot = ds->priv;
+   int port_ix = port * 8 + cbs_qopt->queue;
+   u32 cbs = 0;
+   u32 cir = 0;
+
+   if (cbs_qopt->queue >= ds->num_tx_queues)
+   return -EINVAL;
+
+   if (!cbs_qopt->enable) {
+   ocelot_write_gix(ocelot, QSYS_CIR_CFG_CIR_RATE(0) |
+QSYS_CIR_CFG_CIR_BURST(0),
+QSYS_CIR_CFG, port_ix);
+
+   ocelot_rmw_gix(ocelot, 0, QSYS_SE_CFG_SE_AVB_ENA,
+  QSYS_SE_CFG, port_ix);
+
+   return 0;
+   }
+
+   /* Rate unit is 100 kbps */
+   cir = DIV_ROUND_UP(cbs_qopt->idleslope, 100);
+   cir = (cir ? cir : 1);
+   cir = min_t(u32, GENMASK(14, 0), cir);
+   /* Burst unit is 4kB */
+   cbs = DIV_ROUND_UP(cbs_qopt->hicredit, 4096);
+   /* Avoid using zero burst size */
+   cbs = (cbs ? cbs : 1);
+   cbs = min_t(u32, GENMASK(5, 0), cbs);
+   ocelot_write_gix(ocelot,
+QSYS_CIR_CFG_CIR_RATE(cir) |
+QSYS_CIR_CFG_CIR_BURST(cbs),
+QSYS_CIR_CFG,
+port_ix);
+
+   ocelot_rmw_gix(ocelot,
+  QSYS_SE_CFG_SE_FRM_MODE(0) |
+  QSYS_SE_CFG_SE_AVB_ENA,
+  QSYS_SE_CFG_SE_AVB_ENA |
+  QSYS_SE_CFG_SE_FRM_MODE_M,
+  QSYS_SE_CFG,
+  port_ix);
+
+   return 0;
+}
+
 static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
 enum tc_setup_type type,
 void *type_data)
@@ -1363,6 +1411,8 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, 
int port,
switch (type) {
case TC_SETUP_QDISC_TAPRIO:
return vsc9959_qos_port_tas_set(ocelot, port, type_data);
+   case TC_SETUP_QDISC_CBS:
+   return vsc9959_qos_port_cbs_set(ds, port, type_data);
default:
return -EOPNOTSUPP;
}
-- 
2.17.1

[PATCH v1 net-next 1/3] net: dsa: felix: qos classified based on pcp

2020-05-10 Thread Xiaoliang Yang

Set the default QoS Classification based on PCP and DEI of vlan tag,
after that, frames can be Classified to different Qos based on PCP tag.
If there is no vlan tag or vlan ignored, use port default Qos.

Signed-off-by: Xiaoliang Yang 
---
 drivers/net/dsa/ocelot/felix.c |  6 ++
 drivers/net/dsa/ocelot/felix.h |  1 +
 drivers/net/dsa/ocelot/felix_vsc9959.c | 23 +++
 3 files changed, 30 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index a2dfd73f8a1a..0afdc6fc3f57 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -547,6 +547,12 @@ static int felix_setup(struct dsa_switch *ds)
ocelot_configure_cpu(ocelot, port,
 OCELOT_TAG_PREFIX_NONE,
 OCELOT_TAG_PREFIX_LONG);
+
+   /* Set the default QoS Classification based on PCP and DEI
+* bits of vlan tag.
+*/
+   if (felix->info->port_qos_map_init)
+   felix->info->port_qos_map_init(ocelot, port);
}
 
/* Include the CPU port module in the forwarding mask for unknown
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index b94386fa8d63..0d4ec34309c7 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -35,6 +35,7 @@ struct felix_info {
  struct phylink_link_state *state);
int (*prevalidate_phy_mode)(struct ocelot *ocelot, int port,
phy_interface_t phy_mode);
+   void(*port_qos_map_init)(struct ocelot *ocelot, int port);
 };
 
 extern struct felix_info   felix_info_vsc9959;
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c 
b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 1c56568d5aca..5c931fb3e4cd 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -4,6 +4,7 @@
  */
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1209,6 +1210,27 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
mdiobus_unregister(felix->imdio);
 }
 
+static void vsc9959_port_qos_map_init(struct ocelot *ocelot, int port)
+{
+   int i;
+
+   ocelot_rmw_gix(ocelot,
+  ANA_PORT_QOS_CFG_QOS_PCP_ENA,
+  ANA_PORT_QOS_CFG_QOS_PCP_ENA,
+  ANA_PORT_QOS_CFG,
+  port);
+
+   for (i = 0; i < FELIX_NUM_TC * 2; i++) {
+   ocelot_rmw_ix(ocelot,
+ (ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) |
+ ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i),
+ ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL |
+ ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL_M,
+ ANA_PORT_PCP_DEI_MAP,
+ port, i);
+   }
+}
+
 struct felix_info felix_info_vsc9959 = {
.target_io_res  = vsc9959_target_io_res,
.port_io_res= vsc9959_port_io_res,
@@ -1232,4 +1254,5 @@ struct felix_info felix_info_vsc9959 = {
.pcs_an_restart = vsc9959_pcs_an_restart,
.pcs_link_state = vsc9959_pcs_link_state,
.prevalidate_phy_mode   = vsc9959_prevalidate_phy_mode,
+   .port_qos_map_init  = vsc9959_port_qos_map_init,
 };
-- 
2.17.1

Re: [PATCH] dmaengine: at_hdmac: Replace zero-length array with flexible-array

2020-05-10 Thread Ludovic Desroches

On Thu, May 07, 2020 at 02:00:38PM -0500, Gustavo A. R. Silva wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
> content is safe
> 
> The current codebase makes use of the zero-length array language
> extension to the C90 standard, but the preferred mechanism to declare
> variable-length types such as these ones is a flexible array member[1][2],
> introduced in C99:
> 
> struct foo {
> int stuff;
> struct boo array[];
> };
> 
> By making use of the mechanism above, we will get a compiler warning
> in case the flexible array does not occur last in the structure, which
> will help us prevent some kind of undefined behavior bugs from being
> inadvertently introduced[3] to the codebase from now on.
> 
> Also, notice that, dynamic memory allocations won't be affected by
> this change:
> 
> "Flexible array members have incomplete type, and so the sizeof operator
> may not be applied. As a quirk of the original implementation of
> zero-length arrays, sizeof evaluates to zero."[1]
> 
> sizeof(flexible-array-member) triggers a warning because flexible array
> members have incomplete type[1]. There are some instances of code in
> which the sizeof operator is being incorrectly/erroneously applied to
> zero-length arrays and the result is zero. Such instances may be hiding
> some bugs. So, this work (flexible-array member conversions) will also
> help to get completely rid of those sorts of issues.
> 
> This issue was found with the help of Coccinelle.
> 
> [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
> [2] https://github.com/KSPP/linux/issues/21
> [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")
> 
> Signed-off-by: Gustavo A. R. Silva 
Acked-by: Ludovic Desroches

Thanks
> ---
>  drivers/dma/at_hdmac_regs.h |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
> index 397692e937b3..80fc2fe8c77e 100644
> --- a/drivers/dma/at_hdmac_regs.h
> +++ b/drivers/dma/at_hdmac_regs.h
> @@ -331,7 +331,7 @@ struct at_dma {
> struct dma_pool *dma_desc_pool;
> struct dma_pool *memset_pool;
> /* AT THE END channels table */
> -   struct at_dma_chan  chan[0];
> +   struct at_dma_chan  chan[];
>  };
> 
>  #definedma_readl(atdma, name) \
>

Re: [PATCH] dmaengine: at_xdmac: Replace zero-length array with flexible-array

2020-05-10 Thread Ludovic Desroches

On Thu, May 07, 2020 at 02:00:46PM -0500, Gustavo A. R. Silva wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the 
> content is safe
> 
> The current codebase makes use of the zero-length array language
> extension to the C90 standard, but the preferred mechanism to declare
> variable-length types such as these ones is a flexible array member[1][2],
> introduced in C99:
> 
> struct foo {
> int stuff;
> struct boo array[];
> };
> 
> By making use of the mechanism above, we will get a compiler warning
> in case the flexible array does not occur last in the structure, which
> will help us prevent some kind of undefined behavior bugs from being
> inadvertently introduced[3] to the codebase from now on.
> 
> Also, notice that, dynamic memory allocations won't be affected by
> this change:
> 
> "Flexible array members have incomplete type, and so the sizeof operator
> may not be applied. As a quirk of the original implementation of
> zero-length arrays, sizeof evaluates to zero."[1]
> 
> sizeof(flexible-array-member) triggers a warning because flexible array
> members have incomplete type[1]. There are some instances of code in
> which the sizeof operator is being incorrectly/erroneously applied to
> zero-length arrays and the result is zero. Such instances may be hiding
> some bugs. So, this work (flexible-array member conversions) will also
> help to get completely rid of those sorts of issues.
> 
> This issue was found with the help of Coccinelle.
> 
> [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
> [2] https://github.com/KSPP/linux/issues/21
> [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")
> 
> Signed-off-by: Gustavo A. R. Silva 
Acked-by: Ludovic Desroches

Ludovic Desroches
> ---
>  drivers/dma/at_xdmac.c |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
> index bb0eaf38b594..fd92f048c491 100644
> --- a/drivers/dma/at_xdmac.c
> +++ b/drivers/dma/at_xdmac.c
> @@ -212,7 +212,7 @@ struct at_xdmac {
> struct clk  *clk;
> u32 save_gim;
> struct dma_pool *at_xdmac_desc_pool;
> -   struct at_xdmac_chanchan[0];
> +   struct at_xdmac_chanchan[];
>  };
> 
> 
>

Re: [PATCH 00/15][RFC] Add regulator devfreq support to Panfrost

2020-05-10 Thread Tomeu Vizoso


On 5/10/20 6:55 PM, Clément Péron wrote:

Hi,

This serie cleans and adds regulator support to Panfrost devfreq.
This is mostly based on comment for the freshly introduced lima
devfreq.

We need to add regulator support because on Allwinner the GPU OPP
table defines both frequencies and voltages.

First patches [01-08] should not change the actual behavior
and introduce a proper panfrost_devfreq struct.

Fatches after are WIP and add regulator support.

However I got several issues first we need to avoid getting regulator
if devfreq get by itself the regulator, but as of today the OPP
framework only get and don't enable the regulator...
An HACK for now is to add regulator-always-on in the device-tree.

Then when I enable devfreq I got several faults like.
I'm totally noob on GPU sched/fault and couldn't be helpfull with this.


Do you know at which frequencies do the faults happen? From what I can 
see, it's just the GPU behaving erratically, and the CPU reading random 
values from the GPU registers. Given the subject of this series, I guess 
the GPU isn't getting enough power.


There could be a problem with the OPP table, might be a good idea to see 
what levels are problematic and try with a more conservative table.


Besides that, there could be a problem with clock frequency changes, or 
voltage changes. It may take some time for the final state to be stable, 
depending how the regulation happens.


Thanks,

Tomeu





I got this running glmark2 on T720 (Allwinner H6) with Mesa 20.0.5.
# glmark2-es2-drm
===
 glmark2 2017.07
===
 OpenGL Information
 GL_VENDOR: Panfrost
 GL_RENDERER:   Mali T720 (Panfrost)
 GL_VERSION:OpenGL ES 2.0 Mesa 20.0.5
===

[   93.550063] panfrost 180.gpu: GPU Fault 0x0088 (UNKNOWN) at 
0x80117100
[   94.045401] panfrost 180.gpu: gpu sched timeout, js=0, config=0x3700, 
status=0x8, head=0x21d6c00, tail=0x21d6c00, sched_job=e3c2132f

[  328.871070] panfrost 180.gpu: Unhandled Page fault in AS0 at VA 
0x
[  328.871070] Reason: TODO
[  328.871070] raw fault status: 0xAA0003C2
[  328.871070] decoded fault status: SLAVE FAULT
[  328.871070] exception type 0xC2: TRANSLATION_FAULT_LEVEL2
[  328.871070] access type 0x3: WRITE
[  328.871070] source id 0xAA00
[  329.373327] panfrost 180.gpu: gpu sched timeout, js=1, config=0x3700, 
status=0x8, head=0xa1a4900, tail=0xa1a4900, sched_job=7ac31097
[  329.386527] panfrost 180.gpu: js fault, js=0, status=DATA_INVALID_FAULT, 
head=0xa1a4c00, tail=0xa1a4c00
[  329.396293] panfrost 180.gpu: gpu sched timeout, js=0, config=0x3700, 
status=0x58, head=0xa1a4c00, tail=0xa1a4c00, sched_job=04c90381
[  329.411521] panfrost 180.gpu: Unhandled Page fault in AS0 at VA 
0x
[  329.411521] Reason: TODO
[  329.411521] raw fault status: 0xAA0003C2
[  329.411521] decoded fault status: SLAVE FAULT
[  329.411521] exception type 0xC2: TRANSLATION_FAULT_LEVEL2
[  329.411521] access type 0x3: WRITE
[  329.411521] source id 0xAA00

Thanks for your reviews, help on this serie,
Clement

Clément Péron (15):
   drm/panfrost: avoid static declaration
   drm/panfrost: clean headers in devfreq
   drm/panfrost: don't use pfdevfreq.busy_count to know if hw is idle
   drm/panfrost: introduce panfrost_devfreq struct
   drm/panfrost: use spinlock instead of atomic
   drm/panfrost: properly handle error in probe
   drm/panfrost: use device_property_present to check for OPP
   drm/panfrost: move devfreq_init()/fini() in device
   drm/panfrost: dynamically alloc regulators
   drm/panfrost: add regulators to devfreq
   drm/panfrost: set devfreq clock name
   arm64: defconfig: Enable devfreq cooling device
   arm64: dts: allwinner: h6: Add cooling map for GPU
   [DO NOT MERGE] arm64: dts: allwinner: h6: Add GPU OPP table
   [DO NOT MERGE] arm64: dts: allwinner: force GPU regulator to be always

  .../dts/allwinner/sun50i-h6-beelink-gs1.dts   |   1 +
  arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi  | 102 ++
  arch/arm64/configs/defconfig  |   1 +
  drivers/gpu/drm/panfrost/panfrost_devfreq.c   | 190 --
  drivers/gpu/drm/panfrost/panfrost_devfreq.h   |  32 ++-
  drivers/gpu/drm/panfrost/panfrost_device.c|  56 --
  drivers/gpu/drm/panfrost/panfrost_device.h|  14 +-
  drivers/gpu/drm/panfrost/panfrost_drv.c   |  15 +-
  drivers/gpu/drm/panfrost/panfrost_job.c   |  10 +-
  9 files changed, 310 insertions(+), 111 deletions(-)

[rcu:rcu/dev] BUILD SUCCESS 13e69ca01ce1621ce74248bda86cfad47fa5a0fa

2020-05-10 Thread kbuild test robot

-period sleeps to idle priority  
 0 0 0  
 0  825613e73129 rcu-tasks: Convert sleeps to idle priority   
 0 0 0  
 0  373b78add5ef fs/btrfs: Add cond_resched() for try_release_extent_mapping( 
-1 0 0
-136  13e69ca01ce1 locking/osq_lock: Annotate a data race in osq_lock   
   -93   -93   +83  
ae83d0b416db..13e69ca01ce1 (ALL COMMITS)  
==

elapsed time: 622m

configs tested: 106
configs skipped: 1

The following configs have been built successfully.
More configs may be tested in the coming days.

arm defconfig
arm  allyesconfig
arm  allmodconfig
arm   allnoconfig
arm64allyesconfig
arm64   defconfig
arm64allmodconfig
arm64 allnoconfig
sparcallyesconfig
m68k allyesconfig
parisc   allyesconfig
i386  allnoconfig
i386 allyesconfig
i386defconfig
i386  debian-10.3
ia64 allmodconfig
ia64defconfig
ia64  allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k  allnoconfig
m68k   sun3_defconfig
m68kdefconfig
nios2   defconfig
nios2allyesconfig
openriscdefconfig
c6x  allyesconfig
c6x   allnoconfig
openrisc allyesconfig
nds32   defconfig
nds32 allnoconfig
csky allyesconfig
cskydefconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
h8300allmodconfig
xtensa  defconfig
arc defconfig
arc  allyesconfig
sh   allmodconfig
shallnoconfig
microblazeallnoconfig
mips allyesconfig
mips  allnoconfig
mips allmodconfig
pariscallnoconfig
parisc  defconfig
parisc   allmodconfig
powerpc defconfig
powerpc  allyesconfig
powerpc  rhel-kconfig
powerpc  allmodconfig
powerpc   allnoconfig
i386 randconfig-a006-20200511
i386 randconfig-a005-20200511
i386 randconfig-a003-20200511
i386 randconfig-a001-20200511
i386 randconfig-a004-20200511
i386 randconfig-a002-20200511
i386 randconfig-a006-20200510
i386 randconfig-a005-20200510
i386 randconfig-a003-20200510
i386 randconfig-a001-20200510
i386 randconfig-a004-20200510
i386 randconfig-a002-20200510
x86_64   randconfig-a005-20200511
x86_64   randconfig-a003-20200511
x86_64   randconfig-a006-20200511
x86_64   randconfig-a004-20200511
x86_64   randconfig-a001-20200511
x86_64   randconfig-a002-20200511
x86_64   randconfig-a016-20200511
x86_64   randconfig-a012-20200511
x86_64   randconfig-a014-20200511
i386 randconfig-a012-20200511
i386 randconfig-a016-20200511
i386 randconfig-a014-20200511
i386 randconfig-a011-20200511
i386 randconfig-a013-20200511
i386 randconfig-a015-20200511
riscvallyesconfig
riscv allnoconfig
riscv   defconfig
riscvallmodconfig
s390 allyesconf

Re: [PATCH 11/15] maccess: remove strncpy_from_unsafe

2020-05-10 Thread Masami Hiramatsu

On Wed,  6 May 2020 08:22:19 +0200
Christoph Hellwig  wrote:

> All three callers really should try the explicit kernel and user
> copies instead.  One has already deprecated the somewhat dangerous
> either kernel or user address concept, the other two still need to
> follow up eventually.
> 
> Signed-off-by: Christoph Hellwig 

This looks good to me.

Reviewed-by: Masami Hiramatsu 

Thank you,

> ---
>  include/linux/uaccess.h |  1 -
>  kernel/trace/bpf_trace.c| 40 ++---
>  kernel/trace/trace_kprobe.c |  5 -
>  mm/maccess.c| 39 +---
>  4 files changed, 33 insertions(+), 52 deletions(-)
> 
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index f8c47395a92df..09d6e358883cc 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -311,7 +311,6 @@ extern long probe_user_read(void *dst, const void __user 
> *src, size_t size);
>  extern long notrace probe_kernel_write(void *dst, const void *src, size_t 
> size);
>  extern long notrace probe_user_write(void __user *dst, const void *src, 
> size_t size);
>  
> -extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long 
> count);
>  extern long strncpy_from_kernel_unsafe(char *dst, const void *unsafe_addr,
>  long count);
>  extern long strncpy_from_user_unsafe(char *dst, const void __user 
> *unsafe_addr,
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index e4e202f433903..ffe841433caa1 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -229,9 +229,10 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, 
> const void *unsafe_ptr,
>   int ret = security_locked_down(LOCKDOWN_BPF_READ);
>  
>   if (unlikely(ret < 0))
> - goto out;
> + goto fail;
> +
>   /*
> -  * The strncpy_from_unsafe_*() call will likely not fill the entire
> +  * The strncpy_from_*_unsafe() call will likely not fill the entire
>* buffer, but that's okay in this circumstance as we're probing
>* arbitrary memory anyway similar to bpf_probe_read_*() and might
>* as well probe the stack. Thus, memory is explicitly cleared
> @@ -239,11 +240,18 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, 
> const void *unsafe_ptr,
>* code altogether don't copy garbage; otherwise length of string
>* is returned that can be used for bpf_perf_event_output() et al.
>*/
> - ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
> -   strncpy_from_kernel_unsafe(dst, unsafe_ptr, size);
> - if (unlikely(ret < 0))
> -out:
> - memset(dst, 0, size);
> + ret = strncpy_from_kernel_unsafe(dst, unsafe_ptr, size);
> + if (unlikely(ret < 0)) {
> + if (compat)
> + ret = strncpy_from_user_unsafe(dst,
> + (__force const void __user *)unsafe_ptr,
> + size);
> + if (ret < 0)
> + goto fail;
> + }
> + return 0;
> +fail:
> + memset(dst, 0, size);
>   return ret;
>  }
>  
> @@ -321,6 +329,17 @@ static const struct bpf_func_proto 
> *bpf_get_probe_write_proto(void)
>   return _probe_write_user_proto;
>  }
>  
> +#define BPF_STRNCPY_LEN 64
> +
> +static void bpf_strncpy(char *buf, long unsafe_addr)
> +{
> + buf[0] = 0;
> + if (strncpy_from_kernel_unsafe(buf, (void *)unsafe_addr,
> + BPF_STRNCPY_LEN))
> + strncpy_from_user_unsafe(buf, (void __user *)unsafe_addr,
> + BPF_STRNCPY_LEN);
> +}
> +
>  /*
>   * Only limited trace_printk() conversion specifiers allowed:
>   * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
> @@ -332,7 +351,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, 
> u64, arg1,
>   int mod[3] = {};
>   int fmt_cnt = 0;
>   u64 unsafe_addr;
> - char buf[64];
> + char buf[BPF_STRNCPY_LEN];
>   int i;
>  
>   /*
> @@ -387,10 +406,7 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, 
> u64, arg1,
>   arg3 = (long) buf;
>   break;
>   }
> - buf[0] = 0;
> - strncpy_from_unsafe(buf,
> - (void *) (long) unsafe_addr,
> - sizeof(buf));
> + bpf_strncpy(buf, unsafe_addr);
>   }
>   continue;
>   }
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index a7f43c7ec9880..525d12137325c 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -1238,7 +1238,10 @@ fetch_store_string(unsigned long addr, void *dest, 
> void

Re: [PATCH 12/15] maccess: always use strict semantics for probe_kernel_read

2020-05-10 Thread Masami Hiramatsu

On Mon, 11 May 2020 14:05:36 +0900
Masami Hiramatsu  wrote:

> Hi Christoph,
> 
> At first, thank you for your work on cleaning up these functions!
> 
> On Wed,  6 May 2020 08:22:20 +0200
> Christoph Hellwig  wrote:
> 
> > Except for historical confusion in the kprobes/uprobes and bpf tracers
> > there is no good reason to ever allow user memory accesses from
> > probe_kernel_read.
> 
> Yes, thus now trace_kprobe supports "ustring" type for accessing
> user space memory. (If the address spaces are overwrapped, we have
> no way to distinguish whether an address is kernel or user)
> 
> >  Make the tracers fall back to a probe_user_read
> > if the probe_kernel_read falls to keep the core API clean.
> 
> For trace_kprobes doesn't need to fall back. User must specify
> the probe should be read from user space or kernel space. This is
> because it has  fetch_store_string_user() and probe_mem_read_user()
> variants.

Hmm, wait, I changed my mind. The "string" type currently supports
kernel and user (on some archs, e.g. x86), there is no reason to
restrict it. So let's keep the behavior.
Only if users want to trace "user" data, they can use "ustring".

Reviewed-by: Masami Hiramatsu 

Thank you,


> 
> Thank you,
> 
> 
> > 
> > Signed-off-by: Christoph Hellwig 
> > ---
> >  arch/parisc/lib/memcpy.c|  3 +--
> >  arch/um/kernel/maccess.c|  3 +--
> >  arch/x86/mm/maccess.c   |  5 +
> >  include/linux/uaccess.h |  4 +---
> >  kernel/trace/bpf_trace.c| 20 +--
> >  kernel/trace/trace_kprobe.c | 11 ++-
> >  mm/maccess.c| 39 ++---
> >  7 files changed, 34 insertions(+), 51 deletions(-)
> > 
> > diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
> > index 5ef648bd33119..9fe662b3b5604 100644
> > --- a/arch/parisc/lib/memcpy.c
> > +++ b/arch/parisc/lib/memcpy.c
> > @@ -57,8 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count)
> >  EXPORT_SYMBOL(raw_copy_in_user);
> >  EXPORT_SYMBOL(memcpy);
> >  
> > -bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> > size,
> > -   bool strict)
> > +bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> > size)
> >  {
> > if ((unsigned long)unsafe_src < PAGE_SIZE)
> > return false;
> > diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
> > index 90a1bec923158..734f3d7e57c0f 100644
> > --- a/arch/um/kernel/maccess.c
> > +++ b/arch/um/kernel/maccess.c
> > @@ -7,8 +7,7 @@
> >  #include 
> >  #include 
> >  
> > -bool probe_kernel_read_allowed(void *dst, const void *src, size_t size,
> > -   bool strict)
> > +bool probe_kernel_read_allowed(void *dst, const void *src, size_t size)
> >  {
> > void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
> >  
> > diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
> > index 5c323ab187b27..a1bd81677aa72 100644
> > --- a/arch/x86/mm/maccess.c
> > +++ b/arch/x86/mm/maccess.c
> > @@ -26,10 +26,7 @@ static __always_inline bool invalid_probe_range(u64 
> > vaddr)
> >  }
> >  #endif
> >  
> > -bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> > size,
> > -   bool strict)
> > +bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> > size)
> >  {
> > -   if (!strict)
> > -   return true;
> > return !invalid_probe_range((unsigned long)unsafe_src);
> >  }
> > diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> > index 09d6e358883cc..99e2c2a41164a 100644
> > --- a/include/linux/uaccess.h
> > +++ b/include/linux/uaccess.h
> > @@ -301,11 +301,9 @@ copy_struct_from_user(void *dst, size_t ksize, const 
> > void __user *src,
> > return 0;
> >  }
> >  
> > -bool probe_kernel_read_allowed(void *dst, const void *unsafe_src,
> > -   size_t size, bool strict);
> > +bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> > size);
> >  
> >  extern long probe_kernel_read(void *dst, const void *src, size_t size);
> > -extern long probe_kernel_read_strict(void *dst, const void *src, size_t 
> > size);
> >  extern long probe_user_read(void *dst, const void __user *src, size_t 
> > size);
> >  
> >  extern long notrace probe_kernel_write(void *dst, const void *src, size_t 
> > size);
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index ffe841433caa1..f694befe8ec9b 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -183,12 +183,20 @@ bpf_probe_read_kernel_common(void *dst, u32 size, 
> > const void *unsafe_ptr,
> > int ret = security_locked_down(LOCKDOWN_BPF_READ);
> >  
> > if (unlikely(ret < 0))
> > -   goto out;
> > -   ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
> > - probe_kernel_read_strict(dst, unsafe_ptr, size);
> > -   if (unlikely(ret < 0))
> > -out:
> > -   memset(dst, 0, size);
> > +   goto fail;
> > +
> > +   ret =

Re: [PATCH 00/14] Modularize schedutil

2020-05-10 Thread Viresh Kumar

On 08-05-20, 13:26, Peter Zijlstra wrote:
> At the very least there's that interactive governor that's really
> popular with Android. But IIRC there's a whole scala of home-brew
> governors and tweaks out there.

I removed interactive governor from Android long time back :)

-- 
viresh

Re: [PATCH 12/15] maccess: always use strict semantics for probe_kernel_read

2020-05-10 Thread Masami Hiramatsu

Hi Christoph,

At first, thank you for your work on cleaning up these functions!

On Wed,  6 May 2020 08:22:20 +0200
Christoph Hellwig  wrote:

> Except for historical confusion in the kprobes/uprobes and bpf tracers
> there is no good reason to ever allow user memory accesses from
> probe_kernel_read.

Yes, thus now trace_kprobe supports "ustring" type for accessing
user space memory. (If the address spaces are overwrapped, we have
no way to distinguish whether an address is kernel or user)

>  Make the tracers fall back to a probe_user_read
> if the probe_kernel_read falls to keep the core API clean.

For trace_kprobes doesn't need to fall back. User must specify
the probe should be read from user space or kernel space. This is
because it has  fetch_store_string_user() and probe_mem_read_user()
variants.

Thank you,


> 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/parisc/lib/memcpy.c|  3 +--
>  arch/um/kernel/maccess.c|  3 +--
>  arch/x86/mm/maccess.c   |  5 +
>  include/linux/uaccess.h |  4 +---
>  kernel/trace/bpf_trace.c| 20 +--
>  kernel/trace/trace_kprobe.c | 11 ++-
>  mm/maccess.c| 39 ++---
>  7 files changed, 34 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
> index 5ef648bd33119..9fe662b3b5604 100644
> --- a/arch/parisc/lib/memcpy.c
> +++ b/arch/parisc/lib/memcpy.c
> @@ -57,8 +57,7 @@ void * memcpy(void * dst,const void *src, size_t count)
>  EXPORT_SYMBOL(raw_copy_in_user);
>  EXPORT_SYMBOL(memcpy);
>  
> -bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> size,
> - bool strict)
> +bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> size)
>  {
>   if ((unsigned long)unsafe_src < PAGE_SIZE)
>   return false;
> diff --git a/arch/um/kernel/maccess.c b/arch/um/kernel/maccess.c
> index 90a1bec923158..734f3d7e57c0f 100644
> --- a/arch/um/kernel/maccess.c
> +++ b/arch/um/kernel/maccess.c
> @@ -7,8 +7,7 @@
>  #include 
>  #include 
>  
> -bool probe_kernel_read_allowed(void *dst, const void *src, size_t size,
> - bool strict)
> +bool probe_kernel_read_allowed(void *dst, const void *src, size_t size)
>  {
>   void *psrc = (void *)rounddown((unsigned long)src, PAGE_SIZE);
>  
> diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
> index 5c323ab187b27..a1bd81677aa72 100644
> --- a/arch/x86/mm/maccess.c
> +++ b/arch/x86/mm/maccess.c
> @@ -26,10 +26,7 @@ static __always_inline bool invalid_probe_range(u64 vaddr)
>  }
>  #endif
>  
> -bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> size,
> - bool strict)
> +bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> size)
>  {
> - if (!strict)
> - return true;
>   return !invalid_probe_range((unsigned long)unsafe_src);
>  }
> diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
> index 09d6e358883cc..99e2c2a41164a 100644
> --- a/include/linux/uaccess.h
> +++ b/include/linux/uaccess.h
> @@ -301,11 +301,9 @@ copy_struct_from_user(void *dst, size_t ksize, const 
> void __user *src,
>   return 0;
>  }
>  
> -bool probe_kernel_read_allowed(void *dst, const void *unsafe_src,
> - size_t size, bool strict);
> +bool probe_kernel_read_allowed(void *dst, const void *unsafe_src, size_t 
> size);
>  
>  extern long probe_kernel_read(void *dst, const void *src, size_t size);
> -extern long probe_kernel_read_strict(void *dst, const void *src, size_t 
> size);
>  extern long probe_user_read(void *dst, const void __user *src, size_t size);
>  
>  extern long notrace probe_kernel_write(void *dst, const void *src, size_t 
> size);
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index ffe841433caa1..f694befe8ec9b 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -183,12 +183,20 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const 
> void *unsafe_ptr,
>   int ret = security_locked_down(LOCKDOWN_BPF_READ);
>  
>   if (unlikely(ret < 0))
> - goto out;
> - ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
> -   probe_kernel_read_strict(dst, unsafe_ptr, size);
> - if (unlikely(ret < 0))
> -out:
> - memset(dst, 0, size);
> + goto fail;
> +
> + ret = probe_kernel_read(dst, unsafe_ptr, size);
> + if (unlikely(ret < 0)) {
> + if (compat)
> + ret = probe_user_read(dst,
> + (__force const void __user *)unsafe_ptr, size);
> + if (unlikely(ret < 0))
> + goto fail;
> + }
> +
> + return 0;
> +fail:
> + memset(dst, 0, size);
>   return ret;
>  }
>  
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 525d12137325c..1300c9fd5c755 100644
> --- a/kernel/trace/trace_kprobe.c
> +++

Re: [PATCH v10 00/18] Enable FSGSBASE instructions

2020-05-10 Thread Sasha Levin


On Sun, May 10, 2020 at 05:50:28PM -0700, Andi Kleen wrote:

So this is a check that checks if you're running in user mode if
you have a debug trap with single step, but somehow it triggered
for a user segment.

Probably the regs got corrupted.

Sasha, I suspect you're missing a mov %rsp,%rdi somewhere in the
debug entry path that sets up the regs argument for the C code.


... Ah never mind. Thomas has a better explanation.


FWIW, this series was heavily tested for the past few months to the
point that we're comfortable in enabling it for 3rd party users on
Azure:
https://bugs.launchpad.net/ubuntu/+source/linux-azure/+bug/1877425.

--
Thanks,
Sasha

linux-next: manual merge of the keys tree with Linus' tree

2020-05-10 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the keys tree got a conflict in:

  fs/splice.c

between commit:

  90da2e3f25c8 ("splice: move f_mode checks to do_{splice,tee}()")

from Linus' tree and commit:

  549d46d3827d ("pipe: Add general notification queue support")

from the keys tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc fs/splice.c
index fd0a1e7e5959,6e6ea30c72b4..
--- a/fs/splice.c
+++ b/fs/splice.c
@@@ -1118,12 -1118,8 +1118,12 @@@ long do_splice(struct file *in, loff_t 
loff_t offset;
long ret;
  
 +  if (unlikely(!(in->f_mode & FMODE_READ) ||
 +   !(out->f_mode & FMODE_WRITE)))
 +  return -EBADF;
 +
-   ipipe = get_pipe_info(in);
-   opipe = get_pipe_info(out);
+   ipipe = get_pipe_info(in, true);
+   opipe = get_pipe_info(out, true);
  
if (ipipe && opipe) {
if (off_in || off_out)
@@@ -1757,14 -1766,10 +1757,17 @@@ static int link_pipe(struct pipe_inode_
  static long do_tee(struct file *in, struct file *out, size_t len,
   unsigned int flags)
  {
-   struct pipe_inode_info *ipipe = get_pipe_info(in);
-   struct pipe_inode_info *opipe = get_pipe_info(out);
 -  struct pipe_inode_info *ipipe = get_pipe_info(in, true);
 -  struct pipe_inode_info *opipe = get_pipe_info(out, true);
++  struct pipe_inode_info *ipipe;
++  struct pipe_inode_info *opipe;
int ret = -EINVAL;
  
 +  if (unlikely(!(in->f_mode & FMODE_READ) ||
 +   !(out->f_mode & FMODE_WRITE)))
 +  return -EBADF;
 +
++  ipipe = get_pipe_info(in, true);
++  opipe = get_pipe_info(out, true);
++
/*
 * Duplicate the contents of ipipe to opipe without actually
 * copying the data.


pgpK73bg35eak.pgp
Description: OpenPGP digital signature

[PATCH v12 13/18] x86/fsgsbase/64: Use FSGSBASE instructions on thread copy and ptrace

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

When FSGSBASE is enabled, copying threads and reading FS/GS base using
ptrace must read the actual values.

When copying a thread, use fsgs_save() and copy the saved values. For
ptrace, the bases must be read from memory regardless of the selector
if FSGSBASE is enabled.

Suggested-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 arch/x86/kernel/process.c| 9 +
 arch/x86/kernel/process_64.c | 6 --
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9da70b279dad8..31dd24f9c8d8e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -140,10 +140,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
 #ifdef CONFIG_X86_64
-   savesegment(gs, p->thread.gsindex);
-   p->thread.gsbase = p->thread.gsindex ? 0 : current->thread.gsbase;
-   savesegment(fs, p->thread.fsindex);
-   p->thread.fsbase = p->thread.fsindex ? 0 : current->thread.fsbase;
+   save_fsgs(current);
+   p->thread.fsindex = current->thread.fsindex;
+   p->thread.fsbase = current->thread.fsbase;
+   p->thread.gsindex = current->thread.gsindex;
+   p->thread.gsbase = current->thread.gsbase;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
 #else
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4be88124d81ea..57cdbbb0381ac 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -346,7 +346,8 @@ unsigned long x86_fsbase_read_task(struct task_struct *task)
 
if (task == current)
fsbase = x86_fsbase_read_cpu();
-   else if (task->thread.fsindex == 0)
+   else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
+(task->thread.fsindex == 0))
fsbase = task->thread.fsbase;
else
fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex);
@@ -360,7 +361,8 @@ unsigned long x86_gsbase_read_task(struct task_struct *task)
 
if (task == current)
gsbase = x86_gsbase_read_cpu_inactive();
-   else if (task->thread.gsindex == 0)
+   else if (static_cpu_has(X86_FEATURE_FSGSBASE) ||
+(task->thread.gsindex == 0))
gsbase = task->thread.gsbase;
else
gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex);
-- 
2.20.1

[PATCH v12 06/18] x86/entry/64: Introduce the FIND_PERCPU_BASE macro

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

GS base is used to find per-CPU data in the kernel. But when GS base is
unknown, the per-CPU base can be found from the per_cpu_offset table with a
CPU NR.  The CPU NR is extracted from the limit field of the CPUNODE entry
in GDT, or by the RDPID instruction. This is a prerequisite for using
FSGSBASE in the low level entry code.

Also, add the GAS-compatible RDPID macro as binutils 2.21 does not support
it. Support is added in version 2.27.

Suggested-by: H. Peter Anvin 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
Cc: Vegard Nossum 
---
 arch/x86/entry/calling.h| 34 ++
 arch/x86/include/asm/inst.h | 15 +++
 2 files changed, 49 insertions(+)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 1c7f13bb67286..29982fe140541 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
 
@@ -349,6 +350,39 @@ For 32-bit we have the following conventions - kernel is 
built with
 #endif
 .endm
 
+#ifdef CONFIG_SMP
+
+/*
+ * CPU/node NR is loaded from the limit (size) field of a special segment
+ * descriptor entry in GDT.
+ */
+.macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
+   movq$__CPUNODE_SEG, \reg
+   lsl \reg, \reg
+.endm
+
+/*
+ * Fetch the per-CPU GS base value for this processor and put it in @reg.
+ * We normally use %gs for accessing per-CPU data, but we are setting up
+ * %gs here and obviously can not use %gs itself to access per-CPU data.
+ */
+.macro GET_PERCPU_BASE reg:req
+   ALTERNATIVE \
+   "LOAD_CPU_AND_NODE_SEG_LIMIT \reg", \
+   "RDPID  \reg", \
+   X86_FEATURE_RDPID
+   andq$VDSO_CPUNODE_MASK, \reg
+   movq__per_cpu_offset(, \reg, 8), \reg
+.endm
+
+#else
+
+.macro GET_PERCPU_BASE reg:req
+   movqpcpu_unit_offsets(%rip), \reg
+.endm
+
+#endif /* CONFIG_SMP */
+
 /*
  * This does 'call enter_from_user_mode' unless we can avoid it based on
  * kernel config or using the static jump infrastructure.
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index f5a796da07f88..d063841a17e39 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -306,6 +306,21 @@
.endif
MODRM 0xc0 movq_r64_xmm_opd1 movq_r64_xmm_opd2
.endm
+
+.macro RDPID opd
+   REG_TYPE rdpid_opd_type \opd
+   .if rdpid_opd_type == REG_TYPE_R64
+   R64_NUM rdpid_opd \opd
+   .else
+   R32_NUM rdpid_opd \opd
+   .endif
+   .byte 0xf3
+   .if rdpid_opd > 7
+   PFX_REX rdpid_opd 0
+   .endif
+   .byte 0x0f, 0xc7
+   MODRM 0xc0 rdpid_opd 0x7
+.endm
 #endif
 
 #endif
-- 
2.20.1

[PATCH v12 01/18] x86/ptrace: Prevent ptrace from clearing the FS/GS selector

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

When a ptracer writes a ptracee's FS/GS base with a different value, the
selector is also cleared. While this behavior is incorrect as the selector
should be preserved, most userspace applications did not notice that as
they do not use non-zero segments to begin with.

Instead, with this patch, when a tracee sets the base we will let it do
so without clearing the selector.

The change above means that a tracee that already has a selector set
will fail in an attempt to set the base - the change won't stick and the
value will be instead based on the value of the selector. As with the
above, we haven't found userspace that would be affected by this change.

Suggested-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
[sasha: rewrite commit message]
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 arch/x86/kernel/ptrace.c | 17 ++---
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index f0e1ddbc2fd78..cc56efb75d275 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -380,25 +380,12 @@ static int putreg(struct task_struct *child,
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_MAX)
return -EIO;
-   /*
-* When changing the FS base, use do_arch_prctl_64()
-* to set the index to zero and to set the base
-* as requested.
-*
-* NB: This behavior is nonsensical and likely needs to
-* change when FSGSBASE support is added.
-*/
-   if (child->thread.fsbase != value)
-   return do_arch_prctl_64(child, ARCH_SET_FS, value);
+   x86_fsbase_write_task(child, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
-   /*
-* Exactly the same here as the %fs handling above.
-*/
if (value >= TASK_SIZE_MAX)
return -EIO;
-   if (child->thread.gsbase != value)
-   return do_arch_prctl_64(child, ARCH_SET_GS, value);
+   x86_gsbase_write_task(child, value);
return 0;
 #endif
}
-- 
2.20.1

[PATCH v12 14/18] x86/speculation/swapgs: Check FSGSBASE in enabling SWAPGS mitigation

2020-05-10 Thread Sasha Levin

From: Tony Luck 

Before enabling FSGSBASE the kernel could safely assume that the content
of GS base was a user address. Thus any speculative access as the result
of a mispredicted branch controlling the execution of SWAPGS would be to
a user address. So systems with speculation-proof SMAP did not need to
add additional LFENCE instructions to mitigate.

With FSGSBASE enabled a hostile user can set GS base to a kernel address.
So they can make the kernel speculatively access data they wish to leak
via a side channel. This means that SMAP provides no protection.

Add FSGSBASE as an additional condition to enable the fence-based SWAPGS
mitigation.

Signed-off-by: Tony Luck 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 arch/x86/kernel/cpu/bugs.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index ed54b3b21c396..487603ea51cd1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -450,14 +450,12 @@ static void __init spectre_v1_select_mitigation(void)
 * If FSGSBASE is enabled, the user can put a kernel address in
 * GS, in which case SMAP provides no protection.
 *
-* [ NOTE: Don't check for X86_FEATURE_FSGSBASE until the
-* FSGSBASE enablement patches have been merged. ]
-*
 * If FSGSBASE is disabled, the user can only put a user space
 * address in GS.  That makes an attack harder, but still
 * possible if there's no SMAP protection.
 */
-   if (!smap_works_speculatively()) {
+   if (boot_cpu_has(X86_FEATURE_FSGSBASE) ||
+   !smap_works_speculatively()) {
/*
 * Mitigation can be provided from SWAPGS itself or
 * PTI as the CR3 write in the Meltdown mitigation
-- 
2.20.1

[PATCH v12 02/18] selftests/x86/fsgsbase: Test GS selector on ptracer-induced GS base write

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

The test validates that the selector is not changed when a ptracer writes
the ptracee's GS base.

Originally-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 tools/testing/selftests/x86/fsgsbase.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/x86/fsgsbase.c 
b/tools/testing/selftests/x86/fsgsbase.c
index 15a329da59fa3..950a48b2e3662 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -465,7 +465,7 @@ static void test_ptrace_write_gsbase(void)
wait();
 
if (WSTOPSIG(status) == SIGTRAP) {
-   unsigned long gs, base;
+   unsigned long gs;
unsigned long gs_offset = USER_REGS_OFFSET(gs);
unsigned long base_offset = USER_REGS_OFFSET(gs_base);
 
@@ -481,7 +481,6 @@ static void test_ptrace_write_gsbase(void)
err(1, "PTRACE_POKEUSER");
 
gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
-   base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
 
/*
 * In a non-FSGSBASE system, the nonzero selector will load
@@ -489,11 +488,21 @@ static void test_ptrace_write_gsbase(void)
 * selector value is changed or not by the GSBASE write in
 * a ptracer.
 */
-   if (gs == 0 && base == 0xFF) {
-   printf("[OK]\tGS was reset as expected\n");
-   } else {
+   if (gs != *shared_scratch) {
nerrs++;
-   printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 
0xFF)\n", gs, base);
+   printf("[FAIL]\tGS changed to %lx\n", gs);
+
+   /*
+* On older kernels, poking a nonzero value into the
+* base would zero the selector.  On newer kernels,
+* this behavior has changed -- poking the base
+* changes only the base and, if FSGSBASE is not
+* available, this may not effect.
+*/
+   if (gs == 0)
+   printf("\tNote: this is expected behavior on 
older kernels.\n");
+   } else {
+   printf("[OK]\tGS remained 0x%hx\n", *shared_scratch);
}
}
 
-- 
2.20.1

[PATCH v12 08/18] x86/entry/64: Document GSBASE handling in the paranoid path

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

On FSGSBASE systems, the way to handle GS base in the paranoid path is
different from the existing SWAPGS-based entry/exit path handling. Document
the reason and what has to be done for FSGSBASE enabled systems.

Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 Documentation/x86/entry_64.rst | 9 +
 1 file changed, 9 insertions(+)

diff --git a/Documentation/x86/entry_64.rst b/Documentation/x86/entry_64.rst
index a48b3f6ebbe87..0499a40723af3 100644
--- a/Documentation/x86/entry_64.rst
+++ b/Documentation/x86/entry_64.rst
@@ -108,3 +108,12 @@ We try to only use IST entries and the paranoid entry code 
for vectors
 that absolutely need the more expensive check for the GS base - and we
 generate all 'normal' entry points with the regular (faster) paranoid=0
 variant.
+
+On FSGSBASE systems, however, user space can set GS without kernel
+interaction. It means the value of GS base itself does not imply anything,
+whether a kernel value or a user space value. So, there is no longer a safe
+way to check whether the exception is entering from user mode or kernel
+mode in the paranoid entry code path. So the GS base value needs to be read
+out, saved and the kernel GS base value written. On exit, the saved GS base
+value needs to be restored unconditionally. The non-paranoid entry/exit
+code still uses SWAPGS unconditionally as the state is known.
-- 
2.20.1

[PATCH v2] spi: sun6i: Add support for GPIO chip select lines

2020-05-10 Thread Alistair Francis

Set use_gpio_descriptors as true to support using generic GPIO
lines for the chip select.

Signed-off-by: Alistair Francis 
---
v2:
 - Use use_gpio_descriptors instead of spi_setup

 drivers/spi/spi-sun6i.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index ec7967be9e2f..ecea15534c42 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -470,6 +470,7 @@ static int sun6i_spi_probe(struct platform_device *pdev)
 
master->max_speed_hz = 100 * 1000 * 1000;
master->min_speed_hz = 3 * 1000;
+   master->use_gpio_descriptors = true;
master->set_cs = sun6i_spi_set_cs;
master->transfer_one = sun6i_spi_transfer_one;
master->num_chipselect = 4;
-- 
2.26.2

[PATCH v12 09/18] x86/fsgsbase/64: Add intrinsics for FSGSBASE instructions

2020-05-10 Thread Sasha Levin

From: Andi Kleen 

[ luto: Rename the variables from FS and GS to FSBASE and GSBASE and
  make  safe to include on 32-bit kernels. ]

Signed-off-by: Andi Kleen 
Signed-off-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Sasha Levin 
Reviewed-by: Andy Lutomirski 
Reviewed-by: Andi Kleen 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: H. Peter Anvin 
Cc: Andi Kleen 
---
 arch/x86/include/asm/fsgsbase.h | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index bca4c743de77c..fdd1177499b40 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -19,6 +19,36 @@ extern unsigned long x86_gsbase_read_task(struct task_struct 
*task);
 extern void x86_fsbase_write_task(struct task_struct *task, unsigned long 
fsbase);
 extern void x86_gsbase_write_task(struct task_struct *task, unsigned long 
gsbase);
 
+/* Must be protected by X86_FEATURE_FSGSBASE check. */
+
+static __always_inline unsigned long rdfsbase(void)
+{
+   unsigned long fsbase;
+
+   asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
+
+   return fsbase;
+}
+
+static __always_inline unsigned long rdgsbase(void)
+{
+   unsigned long gsbase;
+
+   asm volatile("rdgsbase %0" : "=r" (gsbase) :: "memory");
+
+   return gsbase;
+}
+
+static __always_inline void wrfsbase(unsigned long fsbase)
+{
+   asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
+}
+
+static __always_inline void wrgsbase(unsigned long gsbase)
+{
+   asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
+}
+
 /* Helper functions for reading/writing FS/GS base */
 
 static inline unsigned long x86_fsbase_read_cpu(void)
-- 
2.20.1

[PATCH v12 11/18] x86/fsgsbase/64: Use FSGSBASE in switch_to() if available

2020-05-10 Thread Sasha Levin

From: Andy Lutomirski 

With the new FSGSBASE instructions, FS/GS base can be efficiently read
and written in __switch_to(). Use that capability to preserve the full
state.

This will enable user code to do whatever it wants with the new
instructions without any kernel-induced gotchas.  (There can still be
architectural gotchas: movl %gs,%eax; movl %eax,%gs may change GS base
if WRGSBASE was used, but users are expected to read the CPU manual
before doing things like that.)

This is a considerable speedup. It seems to save about 100 cycles per
context switch compared to the baseline 4.6-rc1 behavior on a Skylake
laptop.

[ chang: 5~10% performance improvements were seen by a context switch
  benchmark that ran threads with different FS/GS base values (to the
  baseline 4.16). ]

Signed-off-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 arch/x86/kernel/process_64.c | 34 --
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index aaa65f284b9b9..e066750be89a0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -199,8 +199,18 @@ static __always_inline void save_fsgs(struct task_struct 
*task)
 {
savesegment(fs, task->thread.fsindex);
savesegment(gs, task->thread.gsindex);
-   save_base_legacy(task, task->thread.fsindex, FS);
-   save_base_legacy(task, task->thread.gsindex, GS);
+   if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+   /*
+* If FSGSBASE is enabled, we can't make any useful guesses
+* about the base, and user code expects us to save the current
+* value.  Fortunately, reading the base directly is efficient.
+*/
+   task->thread.fsbase = rdfsbase();
+   task->thread.gsbase = x86_gsbase_read_cpu_inactive();
+   } else {
+   save_base_legacy(task, task->thread.fsindex, FS);
+   save_base_legacy(task, task->thread.gsindex, GS);
+   }
 }
 
 #if IS_ENABLED(CONFIG_KVM)
@@ -279,10 +289,22 @@ static __always_inline void load_seg_legacy(unsigned 
short prev_index,
 static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
  struct thread_struct *next)
 {
-   load_seg_legacy(prev->fsindex, prev->fsbase,
-   next->fsindex, next->fsbase, FS);
-   load_seg_legacy(prev->gsindex, prev->gsbase,
-   next->gsindex, next->gsbase, GS);
+   if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+   /* Update the FS and GS selectors if they could have changed. */
+   if (unlikely(prev->fsindex || next->fsindex))
+   loadseg(FS, next->fsindex);
+   if (unlikely(prev->gsindex || next->gsindex))
+   loadseg(GS, next->gsindex);
+
+   /* Update the bases. */
+   wrfsbase(next->fsbase);
+   x86_gsbase_write_cpu_inactive(next->gsbase);
+   } else {
+   load_seg_legacy(prev->fsindex, prev->fsbase,
+   next->fsindex, next->fsbase, FS);
+   load_seg_legacy(prev->gsindex, prev->gsbase,
+   next->gsindex, next->gsbase, GS);
+   }
 }
 
 static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
-- 
2.20.1

[PATCH v12 16/18] x86/fsgsbase/64: Enable FSGSBASE on 64bit by default and add a chicken bit

2020-05-10 Thread Sasha Levin

From: Andy Lutomirski 

Now that FSGSBASE is fully supported, remove unsafe_fsgsbase, enable
FSGSBASE by default, and add nofsgsbase to disable it.

While this changes userspace visible ABI, we could not find a project
that would be affected by this. Few projects were contacted for input
and ack:

- 5-level EPT: 
http://lkml.kernel.org/r/9ddf602b-6c8b-8c1e-ab46-07ed12366...@redhat.com
- rr: https://mail.mozilla.org/pipermail/rr-dev/2018-March/000616.html
- CRIU: https://lists.openvz.org/pipermail/criu/2018-March/040654.html

Signed-off-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 .../admin-guide/kernel-parameters.txt |  3 +-
 arch/x86/kernel/cpu/common.c  | 32 ---
 2 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index af3aaade195b8..1924845c879c2 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3033,8 +3033,7 @@
no5lvl  [X86-64] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
 
-   unsafe_fsgsbase [X86] Allow FSGSBASE instructions.  This will be
-   replaced with a nofsgsbase flag.
+   nofsgsbase  [X86] Disables FSGSBASE instructions.
 
no_console_suspend
[HW] Never suspend the console
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4224760c74e27..0d480cbadc7dc 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -418,21 +418,21 @@ static void __init setup_cr_pinning(void)
static_key_enable(_pinning.key);
 }
 
-/*
- * Temporary hack: FSGSBASE is unsafe until a few kernel code paths are
- * updated. This allows us to get the kernel ready incrementally.
- *
- * Once all the pieces are in place, these will go away and be replaced with
- * a nofsgsbase chicken flag.
- */
-static bool unsafe_fsgsbase;
-
-static __init int setup_unsafe_fsgsbase(char *arg)
+static __init int x86_nofsgsbase_setup(char *arg)
 {
-   unsafe_fsgsbase = true;
+   /* Require an exact match without trailing characters. */
+   if (strlen(arg))
+   return 0;
+
+   /* Do not emit a message if the feature is not present. */
+   if (!boot_cpu_has(X86_FEATURE_FSGSBASE))
+   return 1;
+
+   setup_clear_cpu_cap(X86_FEATURE_FSGSBASE);
+   pr_info("FSGSBASE disabled via kernel command line\n");
return 1;
 }
-__setup("unsafe_fsgsbase", setup_unsafe_fsgsbase);
+__setup("nofsgsbase", x86_nofsgsbase_setup);
 
 /*
  * Protection Keys are not available in 32-bit mode.
@@ -1495,12 +1495,8 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_umip(c);
 
/* Enable FSGSBASE instructions if available. */
-   if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
-   if (unsafe_fsgsbase)
-   cr4_set_bits(X86_CR4_FSGSBASE);
-   else
-   clear_cpu_cap(c, X86_FEATURE_FSGSBASE);
-   }
+   if (cpu_has(c, X86_FEATURE_FSGSBASE))
+   cr4_set_bits(X86_CR4_FSGSBASE);
 
/*
 * The vendor-specific functions might have changed features.
-- 
2.20.1

[PATCH v12 15/18] selftests/x86/fsgsbase: Test ptracer-induced GS base write with FSGSBASE

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

This validates that GS selector and base are independently preserved in
ptrace commands.

Suggested-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 tools/testing/selftests/x86/fsgsbase.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/x86/fsgsbase.c 
b/tools/testing/selftests/x86/fsgsbase.c
index 950a48b2e3662..9a4349813a30a 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -465,7 +465,7 @@ static void test_ptrace_write_gsbase(void)
wait();
 
if (WSTOPSIG(status) == SIGTRAP) {
-   unsigned long gs;
+   unsigned long gs, base;
unsigned long gs_offset = USER_REGS_OFFSET(gs);
unsigned long base_offset = USER_REGS_OFFSET(gs_base);
 
@@ -481,6 +481,7 @@ static void test_ptrace_write_gsbase(void)
err(1, "PTRACE_POKEUSER");
 
gs = ptrace(PTRACE_PEEKUSER, child, gs_offset, NULL);
+   base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
 
/*
 * In a non-FSGSBASE system, the nonzero selector will load
@@ -501,8 +502,14 @@ static void test_ptrace_write_gsbase(void)
 */
if (gs == 0)
printf("\tNote: this is expected behavior on 
older kernels.\n");
+   } else if (have_fsgsbase && (base != 0xFF)) {
+   nerrs++;
+   printf("[FAIL]\tGSBASE changed to %lx\n", base);
} else {
-   printf("[OK]\tGS remained 0x%hx\n", *shared_scratch);
+   printf("[OK]\tGS remained 0x%hx", *shared_scratch);
+   if (have_fsgsbase)
+   printf(" and GSBASE changed to 0xFF");
+   printf("\n");
}
}
 
-- 
2.20.1

[PATCH v12 05/18] x86/entry/64: Switch CR3 before SWAPGS in paranoid entry

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

When FSGSBASE is enabled, the GS base handling in paranoid entry will need
to retrieve the kernel GS base which requires that the kernel page table is
active.

As the CR3 switch to the kernel page tables (PTI is active) does not depend
on kernel GS base, move the CR3 switch in front of the GS base handling.

Comment the EBX content while at it.

No functional change.

Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
Cc: Vegard Nossum 
---
 arch/x86/entry/entry_64.S | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 0da56e6791b73..3ac1313724eaa 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1220,15 +1220,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
cld
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
-   movl$1, %ebx
-   movl$MSR_GS_BASE, %ecx
-   rdmsr
-   testl   %edx, %edx
-   js  1f  /* negative -> in kernel */
-   SWAPGS
-   xorl%ebx, %ebx
 
-1:
/*
 * Always stash CR3 in %r14.  This value will be restored,
 * verbatim, at exit.  Needed if paranoid_entry interrupted
@@ -1238,16 +1230,31 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 * This is also why CS (stashed in the "iret frame" by the
 * hardware at entry) can not be used: this may be a return
 * to kernel code, but with a user CR3 value.
+*
+* Switching CR3 does not depend on kernel GS base so it can
+* be done before switching to the kernel GS base. This is
+* required for FSGSBASE because the kernel GS base has to
+* be retrieved from a kernel internal table.
 */
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
+   /* EBX = 1 -> kernel GSBASE active, no restore required */
+   movl$1, %ebx
/*
-* The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
-* unconditional CR3 write, even in the PTI case.  So do an lfence
-* to prevent GS speculation, regardless of whether PTI is enabled.
+* The kernel-enforced convention is a negative GS base indicates
+* a kernel value. No SWAPGS needed on entry and exit.
 */
-   FENCE_SWAPGS_KERNEL_ENTRY
+   movl$MSR_GS_BASE, %ecx
+   rdmsr
+   testl   %edx, %edx
+   jns .Lparanoid_entry_swapgs
+   ret
 
+.Lparanoid_entry_swapgs:
+   SWAPGS
+   FENCE_SWAPGS_KERNEL_ENTRY
+   /* EBX = 0 -> SWAPGS required on exit */
+   xorl%ebx, %ebx
ret
 SYM_CODE_END(paranoid_entry)
 
-- 
2.20.1

[PATCH v12 07/18] x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

Without FSGSBASE, user space cannot change GS base other than through a
PRCTL. The kernel enforces that the user space GS base value is positive
as negative values are used for detecting the kernel space GS base value
in the paranoid entry code.

If FSGSBASE is enabled, user space can set arbitrary GS base values without
kernel intervention, including negative ones, which breaks the paranoid
entry assumptions.

To avoid this, paranoid entry needs to unconditionally save the current
GS base value independent of the interrupted context, retrieve and write
the kernel GS base and unconditionally restore the saved value on exit.
The restore happens either in paranoid exit or in the special exit path of
the NMI low level code.

All other entry code paths which use unconditional SWAPGS are not affected
as they do not depend on the actual content.

The new logic for paranoid entry, when FSGSBASE is enabled, removes SWAPGS
and replaces with unconditional WRGSBASE. Hence no fences are needed.

Suggested-by: H. Peter Anvin 
Suggested-by: Andy Lutomirski 
Suggested-by: Thomas Gleixner 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Acked-by: Tom Lendacky 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
Cc: Tom Lendacky 
Cc: Vegard Nossum 
---
 arch/x86/entry/calling.h  |  6 +++
 arch/x86/entry/entry_64.S | 78 ++-
 2 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 29982fe140541..6dc2702a939c7 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -342,6 +342,12 @@ For 32-bit we have the following conventions - kernel is 
built with
 #endif
 .endm
 
+.macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
+   rdgsbase \save_reg
+   GET_PERCPU_BASE \scratch_reg
+   wrgsbase \scratch_reg
+.endm
+
 #endif /* CONFIG_X86_64 */
 
 .macro STACKLEAK_ERASE
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3ac1313724eaa..c2c4e063c406d 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -38,6 +38,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "calling.h"
@@ -1211,9 +1212,14 @@ idtentry machine_check   do_mce  
has_error_code=0paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
- * Use slow, but surefire "are we in kernel?" check.
- * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ * Save all registers in pt_regs. Return GS base related information
+ * in EBX depending on the availability of the FSGSBASE instructions:
+ *
+ * FSGSBASER/EBX
+ * N0 -> SWAPGS on exit
+ *  1 -> no SWAPGS on exit
+ *
+ * YGS base value at entry, must be restored in paranoid_exit
  */
 SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
@@ -1238,7 +1244,29 @@ SYM_CODE_START_LOCAL(paranoid_entry)
 */
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
 
-   /* EBX = 1 -> kernel GSBASE active, no restore required */
+   /*
+* Handling GS base depends on the availability of FSGSBASE.
+*
+* Without FSGSBASE the kernel enforces that negative GS base
+* values indicate kernel GS base. With FSGSBASE no assumptions
+* can be made about the GS base value when entering from user
+* space.
+   */
+   ALTERNATIVE "jmp .Lparanoid_entry_checkgs", "", X86_FEATURE_FSGSBASE
+
+   /*
+* Read the current GS base and store it in %rbx unconditionally,
+* retrieve and set the current CPUs kernel GS base. The stored value
+* has to be restored in paranoid_exit unconditionally.
+*
+* This unconditional write of GS base ensures no subsequent load
+* based on a mispredicted GS base.
+*/
+   SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
+   ret
+
+.Lparanoid_entry_checkgs:
+   /* EBX = 1 -> kernel GS base active, no restore required */
movl$1, %ebx
/*
 * The kernel-enforced convention is a negative GS base indicates
@@ -1265,10 +1293,17 @@ SYM_CODE_END(paranoid_entry)
  *
  * We may be returning to very strange contexts (e.g. very early
  * in syscall entry), so checking for preemption here would
- * be complicated.  Fortunately, we there's no good reason
- * to try to handle preemption here.
+ * be complicated.  Fortunately, there's no good reason to try
+ * to handle preemption here.
+ *
+ * R/EBX contains the GS base related information depending on the
+ * availability of the FSGSBASE instructions:
+ *
+ * FSGSBASER/EBX
+ * N0 -> SWAPGS on exit
+ *  1 -> no SWAPGS on exit
  *
- * On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
+ * YUser space GS base, must be

[PATCH v12 10/18] x86/fsgsbase/64: Enable FSGSBASE instructions in helper functions

2020-05-10 Thread Sasha Levin

From: "Chang S. Bae" 

Add CPU feature conditional FS/GS base access to the relevant helper
functions. That allows accelerating certain FS/GS base operations in
subsequent changes.

Note, that while possible, the user space entry/exit GS base operations are
not going to use the new FSGSBASE instructions. The reason is that it would
require additional storage for the user space value which adds more
complexity to the low level code and experiments have shown marginal
benefit. This may be revisited later but for now the SWAPGS based handling
in the entry code is preserved except for the paranoid entry/exit code.

Suggested-by: Tony Luck 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
Cc: Andrew Cooper 
---
 arch/x86/include/asm/fsgsbase.h | 27 +++
 arch/x86/kernel/process_64.c| 58 +
 2 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index fdd1177499b40..aefd53767a5d4 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -49,35 +49,32 @@ static __always_inline void wrgsbase(unsigned long gsbase)
asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
 }
 
+#include 
+
 /* Helper functions for reading/writing FS/GS base */
 
 static inline unsigned long x86_fsbase_read_cpu(void)
 {
unsigned long fsbase;
 
-   rdmsrl(MSR_FS_BASE, fsbase);
+   if (static_cpu_has(X86_FEATURE_FSGSBASE))
+   fsbase = rdfsbase();
+   else
+   rdmsrl(MSR_FS_BASE, fsbase);
 
return fsbase;
 }
 
-static inline unsigned long x86_gsbase_read_cpu_inactive(void)
-{
-   unsigned long gsbase;
-
-   rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
-
-   return gsbase;
-}
-
 static inline void x86_fsbase_write_cpu(unsigned long fsbase)
 {
-   wrmsrl(MSR_FS_BASE, fsbase);
+   if (static_cpu_has(X86_FEATURE_FSGSBASE))
+   wrfsbase(fsbase);
+   else
+   wrmsrl(MSR_FS_BASE, fsbase);
 }
 
-static inline void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
-{
-   wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
-}
+extern unsigned long x86_gsbase_read_cpu_inactive(void);
+extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
 
 #endif /* CONFIG_X86_64 */
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 5ef9d8f25b0e8..aaa65f284b9b9 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -328,6 +328,64 @@ static unsigned long x86_fsgsbase_read_task(struct 
task_struct *task,
return base;
 }
 
+unsigned long x86_gsbase_read_cpu_inactive(void)
+{
+   unsigned long gsbase;
+
+   if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+   bool need_restore = false;
+   unsigned long flags;
+
+   /*
+* We read the inactive GS base value by swapping
+* to make it the active one. But we cannot allow
+* an interrupt while we switch to and from.
+*/
+   if (!irqs_disabled()) {
+   local_irq_save(flags);
+   need_restore = true;
+   }
+
+   native_swapgs();
+   gsbase = rdgsbase();
+   native_swapgs();
+
+   if (need_restore)
+   local_irq_restore(flags);
+   } else {
+   rdmsrl(MSR_KERNEL_GS_BASE, gsbase);
+   }
+
+   return gsbase;
+}
+
+void x86_gsbase_write_cpu_inactive(unsigned long gsbase)
+{
+   if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+   bool need_restore = false;
+   unsigned long flags;
+
+   /*
+* We write the inactive GS base value by swapping
+* to make it the active one. But we cannot allow
+* an interrupt while we switch to and from.
+*/
+   if (!irqs_disabled()) {
+   local_irq_save(flags);
+   need_restore = true;
+   }
+
+   native_swapgs();
+   wrgsbase(gsbase);
+   native_swapgs();
+
+   if (need_restore)
+   local_irq_restore(flags);
+   } else {
+   wrmsrl(MSR_KERNEL_GS_BASE, gsbase);
+   }
+}
+
 unsigned long x86_fsbase_read_task(struct task_struct *task)
 {
unsigned long fsbase;
-- 
2.20.1

[PATCH v12 12/18] x86/fsgsbase/64: move save_fsgs to header file

2020-05-10 Thread Sasha Levin

Given copy_thread_tls() is now shared between 32 and 64 bit and we need
to use save_fsgs() there, move it to a header file.

Signed-off-by: Sasha Levin 
---
 arch/x86/kernel/process.h| 72 
 arch/x86/kernel/process_64.c | 68 --
 2 files changed, 72 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h
index 1d0797b2338a2..2360d340cbf00 100644
--- a/arch/x86/kernel/process.h
+++ b/arch/x86/kernel/process.h
@@ -37,3 +37,75 @@ static inline void switch_to_extra(struct task_struct *prev,
 prev_tif & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev, next);
 }
+
+#ifdef CONFIG_X86_64
+
+enum which_selector {
+   FS,
+   GS
+};
+
+/*
+ * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
+ * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
+ * It's forcibly inlined because it'll generate better code and this function
+ * is hot.
+ */
+static __always_inline void save_base_legacy(struct task_struct *prev_p,
+ unsigned short selector,
+ enum which_selector which)
+{
+   if (likely(selector == 0)) {
+   /*
+* On Intel (without X86_BUG_NULL_SEG), the segment base could
+* be the pre-existing saved base or it could be zero.  On AMD
+* (with X86_BUG_NULL_SEG), the segment base could be almost
+* anything.
+*
+* This branch is very hot (it's hit twice on almost every
+* context switch between 64-bit programs), and avoiding
+* the RDMSR helps a lot, so we just assume that whatever
+* value is already saved is correct.  This matches historical
+* Linux behavior, so it won't break existing applications.
+*
+* To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
+* report that the base is zero, it needs to actually be zero:
+* see the corresponding logic in load_seg_legacy.
+*/
+   } else {
+   /*
+* If the selector is 1, 2, or 3, then the base is zero on
+* !X86_BUG_NULL_SEG CPUs and could be anything on
+* X86_BUG_NULL_SEG CPUs.  In the latter case, Linux
+* has never attempted to preserve the base across context
+* switches.
+*
+* If selector > 3, then it refers to a real segment, and
+* saving the base isn't necessary.
+*/
+   if (which == FS)
+   prev_p->thread.fsbase = 0;
+   else
+   prev_p->thread.gsbase = 0;
+   }
+}
+
+static __always_inline void save_fsgs(struct task_struct *task)
+{
+   savesegment(fs, task->thread.fsindex);
+   savesegment(gs, task->thread.gsindex);
+   if (static_cpu_has(X86_FEATURE_FSGSBASE)) {
+   /*
+* If FSGSBASE is enabled, we can't make any useful guesses
+* about the base, and user code expects us to save the current
+* value.  Fortunately, reading the base directly is efficient.
+*/
+   task->thread.fsbase = rdfsbase();
+   task->thread.gsbase = x86_gsbase_read_cpu_inactive();
+   } else {
+   save_base_legacy(task, task->thread.fsindex, FS);
+   save_base_legacy(task, task->thread.gsindex, GS);
+   }
+}
+
+#endif
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e066750be89a0..4be88124d81ea 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -145,74 +145,6 @@ void release_thread(struct task_struct *dead_task)
WARN_ON(dead_task->mm);
 }
 
-enum which_selector {
-   FS,
-   GS
-};
-
-/*
- * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
- * not available.  The goal is to be reasonably fast on non-FSGSBASE systems.
- * It's forcibly inlined because it'll generate better code and this function
- * is hot.
- */
-static __always_inline void save_base_legacy(struct task_struct *prev_p,
-unsigned short selector,
-enum which_selector which)
-{
-   if (likely(selector == 0)) {
-   /*
-* On Intel (without X86_BUG_NULL_SEG), the segment base could
-* be the pre-existing saved base or it could be zero.  On AMD
-* (with X86_BUG_NULL_SEG), the segment base could be almost
-* anything.
-*
-* This branch is very hot (it's hit twice on almost every
-* context switch between 64-bit

[PATCH v12 17/18] x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2

2020-05-10 Thread Sasha Levin

From: Andi Kleen 

The kernel needs to explicitly enable FSGSBASE. So, the application needs
to know if it can safely use these instructions. Just looking at the CPUID
bit is not enough because it may be running in a kernel that does not
enable the instructions.

One way for the application would be to just try and catch the SIGILL.
But that is difficult to do in libraries which may not want to overwrite
the signal handlers of the main application.

Enumerate the enabled FSGSBASE capability in bit 1 of AT_HWCAP2 in the ELF
aux vector. AT_HWCAP2 is already used by PPC for similar purposes.

The application can access it open coded or by using the getauxval()
function in newer versions of glibc.

Signed-off-by: Andi Kleen 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 arch/x86/include/uapi/asm/hwcap2.h | 3 +++
 arch/x86/kernel/cpu/common.c   | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/hwcap2.h 
b/arch/x86/include/uapi/asm/hwcap2.h
index 8b2effe6efb82..5fdfcb47000f9 100644
--- a/arch/x86/include/uapi/asm/hwcap2.h
+++ b/arch/x86/include/uapi/asm/hwcap2.h
@@ -5,4 +5,7 @@
 /* MONITOR/MWAIT enabled in Ring 3 */
 #define HWCAP2_RING3MWAIT  (1 << 0)
 
+/* Kernel allows FSGSBASE instructions available in Ring 3 */
+#define HWCAP2_FSGSBASEBIT(1)
+
 #endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0d480cbadc7dc..b5a086ea34258 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1495,8 +1495,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_umip(c);
 
/* Enable FSGSBASE instructions if available. */
-   if (cpu_has(c, X86_FEATURE_FSGSBASE))
+   if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
cr4_set_bits(X86_CR4_FSGSBASE);
+   elf_hwcap2 |= HWCAP2_FSGSBASE;
+   }
 
/*
 * The vendor-specific functions might have changed features.
-- 
2.20.1

[PATCH v12 04/18] x86/entry/64: Clean up paranoid exit

2020-05-10 Thread Sasha Levin

From: Andy Lutomirski 

All that paranoid exit needs to do is to disable IRQs, handle IRQ tracing,
then restore CR3, and restore GS base. Simply do those actions in that
order. Cleaning up the spaghetti code.

Signed-off-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
Cc: Vegard Nossum 
---
 arch/x86/entry/entry_64.S | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 3063aa9090f9a..0da56e6791b73 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1266,19 +1266,25 @@ SYM_CODE_END(paranoid_entry)
 SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
+
+   /*
+* The order of operations is important. IRQ tracing requires
+* kernel GS base and CR3. RESTORE_CR3 requires kernel GS base.
+*
+* NB to anyone to try to optimize this code: this code does
+* not execute at all for exceptions from user mode. Those
+* exceptions go through error_exit instead.
+*/
TRACE_IRQS_OFF_DEBUG
-   testl   %ebx, %ebx  /* swapgs needed? */
-   jnz .Lparanoid_exit_no_swapgs
-   TRACE_IRQS_IRETQ
-   /* Always restore stashed CR3 value (see paranoid_entry) */
-   RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
+   RESTORE_CR3 scratch_reg=%rax save_reg=%r14
+
+   /* If EBX is 0, SWAPGS is required */
+   testl   %ebx, %ebx
+   jnz restore_regs_and_return_to_kernel
+
+   /* We are returning to a context with user GS base */
SWAPGS_UNSAFE_STACK
jmp restore_regs_and_return_to_kernel
-.Lparanoid_exit_no_swapgs:
-   TRACE_IRQS_IRETQ_DEBUG
-   /* Always restore stashed CR3 value (see paranoid_entry) */
-   RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
-   jmp restore_regs_and_return_to_kernel
 SYM_CODE_END(paranoid_exit)
 
 /*
-- 
2.20.1

[PATCH v12 03/18] x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE

2020-05-10 Thread Sasha Levin

From: Andy Lutomirski 

This is temporary.  It will allow the next few patches to be tested
incrementally.

Setting unsafe_fsgsbase is a root hole.  Don't do it.

Signed-off-by: Andy Lutomirski 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
---
 .../admin-guide/kernel-parameters.txt |  3 +++
 arch/x86/kernel/cpu/common.c  | 24 +++
 2 files changed, 27 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index 7bc83f3d9bdfe..af3aaade195b8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3033,6 +3033,9 @@
no5lvl  [X86-64] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
 
+   unsafe_fsgsbase [X86] Allow FSGSBASE instructions.  This will be
+   replaced with a nofsgsbase flag.
+
no_console_suspend
[HW] Never suspend the console
Disable suspending of consoles during suspend and
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index bed0cb83fe245..4224760c74e27 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -418,6 +418,22 @@ static void __init setup_cr_pinning(void)
static_key_enable(_pinning.key);
 }
 
+/*
+ * Temporary hack: FSGSBASE is unsafe until a few kernel code paths are
+ * updated. This allows us to get the kernel ready incrementally.
+ *
+ * Once all the pieces are in place, these will go away and be replaced with
+ * a nofsgsbase chicken flag.
+ */
+static bool unsafe_fsgsbase;
+
+static __init int setup_unsafe_fsgsbase(char *arg)
+{
+   unsafe_fsgsbase = true;
+   return 1;
+}
+__setup("unsafe_fsgsbase", setup_unsafe_fsgsbase);
+
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1478,6 +1494,14 @@ static void identify_cpu(struct cpuinfo_x86 *c)
setup_smap(c);
setup_umip(c);
 
+   /* Enable FSGSBASE instructions if available. */
+   if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
+   if (unsafe_fsgsbase)
+   cr4_set_bits(X86_CR4_FSGSBASE);
+   else
+   clear_cpu_cap(c, X86_FEATURE_FSGSBASE);
+   }
+
/*
 * The vendor-specific functions might have changed features.
 * Now we do "generic changes."
-- 
2.20.1

[PATCH v12 18/18] Documentation/x86/64: Add documentation for GS/FS addressing mode

2020-05-10 Thread Sasha Levin

From: Thomas Gleixner 

Explain how the GS/FS based addressing can be utilized in user space
applications along with the differences between the generic prctl() based
GS/FS base control and the FSGSBASE version available on newer CPUs.

Originally-by: Andi Kleen 
Signed-off-by: Thomas Gleixner 
Signed-off-by: Chang S. Bae 
Signed-off-by: Sasha Levin 
Reviewed-by: Tony Luck 
Reviewed-by: Randy Dunlap 
Cc: Thomas Gleixner 
Cc: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: H. Peter Anvin 
Cc: Dave Hansen 
Cc: Tony Luck 
Cc: Andi Kleen 
Cc: Randy Dunlap 
Cc: Jonathan Corbet 
---
 Documentation/x86/x86_64/fsgs.rst  | 199 +
 Documentation/x86/x86_64/index.rst |   1 +
 2 files changed, 200 insertions(+)
 create mode 100644 Documentation/x86/x86_64/fsgs.rst

diff --git a/Documentation/x86/x86_64/fsgs.rst 
b/Documentation/x86/x86_64/fsgs.rst
new file mode 100644
index 0..50960e09e1f66
--- /dev/null
+++ b/Documentation/x86/x86_64/fsgs.rst
@@ -0,0 +1,199 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Using FS and GS segments in user space applications
+===
+
+The x86 architecture supports segmentation. Instructions which access
+memory can use segment register based addressing mode. The following
+notation is used to address a byte within a segment:
+
+  Segment-register:Byte-address
+
+The segment base address is added to the Byte-address to compute the
+resulting virtual address which is accessed. This allows to access multiple
+instances of data with the identical Byte-address, i.e. the same code. The
+selection of a particular instance is purely based on the base-address in
+the segment register.
+
+In 32-bit mode the CPU provides 6 segments, which also support segment
+limits. The limits can be used to enforce address space protections.
+
+In 64-bit mode the CS/SS/DS/ES segments are ignored and the base address is
+always 0 to provide a full 64bit address space. The FS and GS segments are
+still functional in 64-bit mode.
+
+Common FS and GS usage
+--
+
+The FS segment is commonly used to address Thread Local Storage (TLS). FS
+is usually managed by runtime code or a threading library. Variables
+declared with the '__thread' storage class specifier are instantiated per
+thread and the compiler emits the FS: address prefix for accesses to these
+variables. Each thread has its own FS base address so common code can be
+used without complex address offset calculations to access the per thread
+instances. Applications should not use FS for other purposes when they use
+runtimes or threading libraries which manage the per thread FS.
+
+The GS segment has no common use and can be used freely by
+applications. GCC and Clang support GS based addressing via address space
+identifiers.
+
+Reading and writing the FS/GS base address
+--
+
+There exist two mechanisms to read and write the FS/GS base address:
+
+ - the arch_prctl() system call
+
+ - the FSGSBASE instruction family
+
+Accessing FS/GS base with arch_prctl()
+--
+
+ The arch_prctl(2) based mechanism is available on all 64-bit CPUs and all
+ kernel versions.
+
+ Reading the base:
+
+   arch_prctl(ARCH_GET_FS, );
+   arch_prctl(ARCH_GET_GS, );
+
+ Writing the base:
+
+   arch_prctl(ARCH_SET_FS, fsbase);
+   arch_prctl(ARCH_SET_GS, gsbase);
+
+ The ARCH_SET_GS prctl may be disabled depending on kernel configuration
+ and security settings.
+
+Accessing FS/GS base with the FSGSBASE instructions
+---
+
+ With the Ivy Bridge CPU generation Intel introduced a new set of
+ instructions to access the FS and GS base registers directly from user
+ space. These instructions are also supported on AMD Family 17H CPUs. The
+ following instructions are available:
+
+  === ===
+  RDFSBASE %reg   Read the FS base register
+  RDGSBASE %reg   Read the GS base register
+  WRFSBASE %reg   Write the FS base register
+  WRGSBASE %reg   Write the GS base register
+  === ===
+
+ The instructions avoid the overhead of the arch_prctl() syscall and allow
+ more flexible usage of the FS/GS addressing modes in user space
+ applications. This does not prevent conflicts between threading libraries
+ and runtimes which utilize FS and applications which want to use it for
+ their own purpose.
+
+FSGSBASE instructions enablement
+
+ The instructions are enumerated in CPUID leaf 7, bit 0 of EBX. If
+ available /proc/cpuinfo shows 'fsgsbase' in the flag entry of the CPUs.
+
+ The availability of the instructions does not enable them
+ automatically. The kernel has to enable them explicitly in CR4. The
+ reason for this is that older kernels make assumptions about the values in
+ the GS register and enforce them when GS base is set via
+ arch_prctl(). Allowing user

[PATCH v12 00/18] Enable FSGSBASE instructions

2020-05-10 Thread Sasha Levin

Benefits:
Currently a user process that wishes to read or write the FS/GS base must
make a system call. But recent X86 processors have added new instructions
for use in 64-bit mode that allow direct access to the FS and GS segment
base addresses.  The operating system controls whether applications can
use these instructions with a %cr4 control bit.

In addition to benefits to applications, performance improvements to the
OS context switch code are possible by making use of these instructions. A
third party reported out promising performance numbers out of their
initial benchmarking of the previous version of this patch series [9].

Enablement check:
The kernel provides information about the enabled state of FSGSBASE to
applications using the ELF_AUX vector. If the HWCAP2_FSGSBASE bit is set in
the AUX vector, the kernel has FSGSBASE instructions enabled and
applications can use them.

Kernel changes:
Major changes made in the kernel are in context switch, paranoid path, and
ptrace. In a context switch, a task's FS/GS base will be secured regardless
of its selector. In the paranoid path, GS base is unconditionally
overwritten to the kernel GS base on entry and the original GS base is
restored on exit. Ptrace includes divergence of FS/GS index and base
values.

Security:
For mitigating the Spectre v1 SWAPGS issue, LFENCE instructions were added
on most kernel entries. Those patches are dependent on previous behaviors
that users couldn't load a kernel address into the GS base. These patches
change that assumption since the user can load any address into GS base.
The changes to the kernel entry path in this patch series take account of
the SWAPGS issue.

Changes from v11:

 - Rebase to v5.7-rc5, fix 32bit compilation error.


Andi Kleen (2):
  x86/fsgsbase/64: Add intrinsics for FSGSBASE instructions
  x86/elf: Enumerate kernel FSGSBASE capability in AT_HWCAP2

Andy Lutomirski (4):
  x86/cpu: Add 'unsafe_fsgsbase' to enable CR4.FSGSBASE
  x86/entry/64: Clean up paranoid exit
  x86/fsgsbase/64: Use FSGSBASE in switch_to() if available
  x86/fsgsbase/64: Enable FSGSBASE on 64bit by default and add a chicken
bit

Chang S. Bae (9):
  x86/ptrace: Prevent ptrace from clearing the FS/GS selector
  selftests/x86/fsgsbase: Test GS selector on ptracer-induced GS base
write
  x86/entry/64: Switch CR3 before SWAPGS in paranoid entry
  x86/entry/64: Introduce the FIND_PERCPU_BASE macro
  x86/entry/64: Handle FSGSBASE enabled paranoid entry/exit
  x86/entry/64: Document GSBASE handling in the paranoid path
  x86/fsgsbase/64: Enable FSGSBASE instructions in helper functions
  x86/fsgsbase/64: Use FSGSBASE instructions on thread copy and ptrace
  selftests/x86/fsgsbase: Test ptracer-induced GS base write with
FSGSBASE

Sasha Levin (1):
  x86/fsgsbase/64: move save_fsgs to header file

Thomas Gleixner (1):
  Documentation/x86/64: Add documentation for GS/FS addressing mode

Tony Luck (1):
  x86/speculation/swapgs: Check FSGSBASE in enabling SWAPGS mitigation

 .../admin-guide/kernel-parameters.txt |   2 +
 Documentation/x86/entry_64.rst|   9 +
 Documentation/x86/x86_64/fsgs.rst | 199 ++
 Documentation/x86/x86_64/index.rst|   1 +
 arch/x86/entry/calling.h  |  40 
 arch/x86/entry/entry_64.S | 131 +---
 arch/x86/include/asm/fsgsbase.h   |  45 +++-
 arch/x86/include/asm/inst.h   |  15 ++
 arch/x86/include/uapi/asm/hwcap2.h|   3 +
 arch/x86/kernel/cpu/bugs.c|   6 +-
 arch/x86/kernel/cpu/common.c  |  22 ++
 arch/x86/kernel/process.c |   9 +-
 arch/x86/kernel/process.h |  72 +++
 arch/x86/kernel/process_64.c  | 142 +++--
 arch/x86/kernel/ptrace.c  |  17 +-
 tools/testing/selftests/x86/fsgsbase.c|  24 ++-
 16 files changed, 608 insertions(+), 129 deletions(-)
 create mode 100644 Documentation/x86/x86_64/fsgs.rst

-- 
2.20.1

Re: [PATCH] spi: sun6i: Add support for GPIO chip select lines

2020-05-10 Thread Alistair

On Wed, May 6, 2020, at 1:27 AM, Geert Uytterhoeven wrote:
> Hi Alistair,
> 
> On Wed, May 6, 2020 at 3:41 AM Alistair Francis  
> wrote:
> > Add a setup function that can be used to support using generic GPIO
> > lines for the chip select.
> >
> > Signed-off-by: Alistair Francis 
> > ---
> > drivers/spi/spi-sun6i.c | 27 +++
> > 1 file changed, 27 insertions(+)
> >
> > diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
> > index ec7967be9e2f..fd4e19434942 100644
> > --- a/drivers/spi/spi-sun6i.c
> > +++ b/drivers/spi/spi-sun6i.c
> > @@ -10,6 +10,7 @@
> > #include 
> > #include 
> > #include 
> > +#include 
> > #include 
> > #include 
> > #include 
> > @@ -171,6 +172,31 @@ static inline void sun6i_spi_fill_fifo(struct 
> > sun6i_spi *sspi, int len)
> > }
> > }
> >
> > +static int sun6i_spi_setup(struct spi_device *spi)
> > +{
> > + int ret;
> > +
> > + /* sanity check for native cs */
> > + if (spi->mode & SPI_NO_CS)
> > + return 0;
> > + if (gpio_is_valid(spi->cs_gpio)) {
> > + /* with gpio-cs set the GPIO to the correct level
> > + * and as output (in case the dt has the gpio not configured
> > + * as output but native cs)
> > + */
> > + ret = gpio_direction_output(spi->cs_gpio,
> > + (spi->mode & SPI_CS_HIGH) ? 0 : 1);
> > + if (ret)
> > + dev_err(>dev,
> > + "could not set gpio %i as output: %i\n",
> > + spi->cs_gpio, ret);
> > +
> > + return ret;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > static void sun6i_spi_set_cs(struct spi_device *spi, bool enable)
> > {
> > struct sun6i_spi *sspi = spi_master_get_devdata(spi->master);
> > @@ -470,6 +496,7 @@ static int sun6i_spi_probe(struct platform_device *pdev)
> >
> > master->max_speed_hz = 100 * 1000 * 1000;
> > master->min_speed_hz = 3 * 1000;
> > + master->setup = sun6i_spi_setup;
> > master->set_cs = sun6i_spi_set_cs;
> > master->transfer_one = sun6i_spi_transfer_one;
> > master->num_chipselect = 4;
> 
> Can't you just set
> 
>  master->use_gpio_descriptors = true;
> 
> instead and be done with it?
> Then drivers/spi/spi.c:spi_get_gpio_descs() will configure the GPIO line
> as output for you.

Yep, it looks like that works. Sending a v2.

Alistair

> 
> Gr{oetje,eeting}s,
> 
>  Geert
> 
> -- 
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
>  -- Linus Torvalds
>

[PATCH 06/19] ipv4: do compat setsockopt for MCAST_MSFILTER directly

2020-05-10 Thread Al Viro

From: Al Viro 

Parallel to what the native setsockopt() does, except that unlike
the native setsockopt() we do not use memdup_user() - we want
the sockaddr_storage fields properly aligned, so we allocate
4 bytes more and copy compat_group_filter at the offset 4,
which yields the proper alignments.

Signed-off-by: Al Viro 
---
 net/ipv4/ip_sockglue.c | 48 +++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8c14a474870d..dc1f5276be4e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1286,9 +1286,55 @@ int compat_ip_setsockopt(struct sock *sk, int level, int 
optname,
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
-   case MCAST_MSFILTER:
return compat_mc_setsockopt(sk, level, optname, optval, optlen,
ip_setsockopt);
+   case MCAST_MSFILTER:
+   {
+   const int size0 = offsetof(struct compat_group_filter, 
gf_slist);
+   struct compat_group_filter *gf32;
+   void *p;
+   int n;
+
+   if (optlen < size0)
+   return -EINVAL;
+   if (optlen > sysctl_optmem_max - 4)
+   return -ENOBUFS;
+
+   p = kmalloc(optlen + 4, GFP_KERNEL);
+   if (!p)
+   return -ENOMEM;
+   gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+   if (copy_from_user(gf32, optval, optlen)) {
+   err = -EFAULT;
+   goto mc_msf_out;
+   }
+
+   n = gf32->gf_numsrc;
+   /* numsrc >= (4G-140)/128 overflow in 32 bits */
+   if (n >= 0x1ff) {
+   err = -ENOBUFS;
+   goto mc_msf_out;
+   }
+   if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) 
{
+   err = -EINVAL;
+   goto mc_msf_out;
+   }
+
+   rtnl_lock();
+   lock_sock(sk);
+   /* numsrc >= (4G-140)/128 overflow in 32 bits */
+   if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+   err = -ENOBUFS;
+   else
+   err = set_mcast_msfilter(sk, gf32->gf_interface,
+n, gf32->gf_fmode,
+>gf_group, 
gf32->gf_slist);
+   release_sock(sk);
+   rtnl_unlock();
+mc_msf_out:
+   kfree(p);
+   return err;
+   }
}
 
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-- 
2.11.0

Re: [PATCH v11 00/18] Enable FSGSBASE instructions

2020-05-10 Thread Sasha Levin


On Sun, May 10, 2020 at 05:53:19PM -0700, Andi Kleen wrote:

My interest in this is that we have a few workloads that value the
ability to access FS/GS base directly and show nice performance


Can you please share some rough numbers, Sasha?


I don't have any recent numbers around these - this series effectively
enables certain workloads rather than just improve the performance
somewhat so benchmarking for exact numbers isn't too interesting here.


I would expect everything that does a lot of context switches
to benefit automatically, apart from the new free register (which
requires enabling, but also has great potential)


And even more so when these registers are actually being used for the
purpose they were designed for (this is in the context of secure
computing/enclaves/etc).

--
Thanks,
Sasha

[PATCH 04/19] get rid of compat_mc_getsockopt()

2020-05-10 Thread Al Viro

From: Al Viro 

now we can do MCAST_MSFILTER in compat ->getsockopt() without
playing silly buggers with copying things back and forth.
We can form a native struct group_filter (sans the variable-length
tail) on stack, pass that + pointer to the tail of original request
to the helper doing the bulk of the work, then do the rest of
copyout - same as the native getsockopt() does.

Signed-off-by: Al Viro 
---
 include/net/compat.h |  3 --
 net/compat.c | 79 
 net/ipv4/ip_sockglue.c   | 44 +--
 net/ipv6/ipv6_sockglue.c | 41 +++--
 4 files changed, 79 insertions(+), 88 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index 9f4a56c5671e..b6043e759cde 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -52,9 +52,6 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct 
sock *,
 int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
 int (*)(struct sock *, int, int, char __user *,
 unsigned int));
-int compat_mc_getsockopt(struct sock *, int, int, char __user *, int __user *,
-int (*)(struct sock *, int, int, char __user *,
-int __user *));
 
 struct compat_group_req {
__u32gr_interface;
diff --git a/net/compat.c b/net/compat.c
index 06af69e7b408..6191481b5ef0 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -537,85 +537,6 @@ int compat_mc_setsockopt(struct sock *sock, int level, int 
optname,
 }
 EXPORT_SYMBOL(compat_mc_setsockopt);
 
-int compat_mc_getsockopt(struct sock *sock, int level, int optname,
-   char __user *optval, int __user *optlen,
-   int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
-{
-   struct compat_group_filter __user *gf32 = (void __user *)optval;
-   struct group_filter __user *kgf;
-   int __user  *koptlen;
-   u32 interface, fmode, numsrc;
-   int klen, ulen, err;
-
-   if (optname != MCAST_MSFILTER)
-   return getsockopt(sock, level, optname, optval, optlen);
-
-   koptlen = compat_alloc_user_space(sizeof(*koptlen));
-   if (!access_ok(optlen, sizeof(*optlen)) ||
-   __get_user(ulen, optlen))
-   return -EFAULT;
-
-   /* adjust len for pad */
-   klen = ulen + sizeof(*kgf) - sizeof(*gf32);
-
-   if (klen < GROUP_FILTER_SIZE(0))
-   return -EINVAL;
-
-   if (!access_ok(koptlen, sizeof(*koptlen)) ||
-   __put_user(klen, koptlen))
-   return -EFAULT;
-
-   /* have to allow space for previous compat_alloc_user_space, too */
-   kgf = compat_alloc_user_space(klen+sizeof(*optlen));
-
-   if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
-   __get_user(interface, >gf_interface) ||
-   __get_user(fmode, >gf_fmode) ||
-   __get_user(numsrc, >gf_numsrc) ||
-   __put_user(interface, >gf_interface) ||
-   __put_user(fmode, >gf_fmode) ||
-   __put_user(numsrc, >gf_numsrc) ||
-   copy_in_user(>gf_group, >gf_group, 
sizeof(kgf->gf_group)))
-   return -EFAULT;
-
-   err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
-   if (err)
-   return err;
-
-   if (!access_ok(koptlen, sizeof(*koptlen)) ||
-   __get_user(klen, koptlen))
-   return -EFAULT;
-
-   ulen = klen - (sizeof(*kgf)-sizeof(*gf32));
-
-   if (!access_ok(optlen, sizeof(*optlen)) ||
-   __put_user(ulen, optlen))
-   return -EFAULT;
-
-   if (!access_ok(kgf, klen) ||
-   !access_ok(gf32, ulen) ||
-   __get_user(interface, >gf_interface) ||
-   __get_user(fmode, >gf_fmode) ||
-   __get_user(numsrc, >gf_numsrc) ||
-   __put_user(interface, >gf_interface) ||
-   __put_user(fmode, >gf_fmode) ||
-   __put_user(numsrc, >gf_numsrc))
-   return -EFAULT;
-   if (numsrc) {
-   int copylen;
-
-   klen -= GROUP_FILTER_SIZE(0);
-   copylen = numsrc * sizeof(gf32->gf_slist[0]);
-   if (copylen > klen)
-   copylen = klen;
-   if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
-   return -EFAULT;
-   }
-   return err;
-}
-EXPORT_SYMBOL(compat_mc_getsockopt);
-
-
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
 static unsigned char nas[21] = {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8616c38bd420..6bdaf43236ea 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1606,9 +1606,47 @@ int compat_ip_getsockopt(struct sock *sk, int level, int 
optname,
 {
int err;
 
-   if (optname == MCAST_MSFILTER)
-   return compat_mc_getsockopt(sk, level, optname,

[PATCH 03/19] ip*_mc_gsfget(): lift copyout of struct group_filter into callers

2020-05-10 Thread Al Viro

From: Al Viro 

pass the userland pointer to the array in its tail, so that part
gets copied out by our functions; copyout of everything else is
done in the callers.  Rationale: reuse for compat; the array
is the same in native and compat, the layout of parts before it
is different for compat.

Signed-off-by: Al Viro 
---
 include/linux/igmp.h |  2 +-
 include/net/ipv6.h   |  2 +-
 net/ipv4/igmp.c  | 18 +-
 net/ipv4/ip_sockglue.c   | 19 ++-
 net/ipv6/ipv6_sockglue.c | 18 ++
 net/ipv6/mcast.c | 10 +++---
 6 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index faa6586a5783..64ce8cd1cfaf 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -123,7 +123,7 @@ extern int ip_mc_msfilter(struct sock *sk, struct 
ip_msfilter *msf,int ifindex);
 extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
struct ip_msfilter __user *optval, int __user *optlen);
 extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
-   struct group_filter __user *optval, int __user *optlen);
+   struct sockaddr_storage __user *p);
 extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
  int dif, int sdif);
 extern void ip_mc_init_dev(struct in_device *);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 955badd1e8ff..900cc66311c4 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1136,7 +1136,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
  struct group_source_req *pgsr);
 int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
 int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
- struct group_filter __user *optval, int __user *optlen);
+ struct sockaddr_storage __user *p);
 
 #ifdef CONFIG_PROC_FS
 int ac6_proc_init(struct net *net);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 47f0502b2101..7b272bbed2b4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2565,9 +2565,9 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 }
 
 int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
-   struct group_filter __user *optval, int __user *optlen)
+   struct sockaddr_storage __user *p)
 {
-   int err, i, count, copycount;
+   int i, count, copycount;
struct sockaddr_in *psin;
__be32 addr;
struct ip_mc_socklist *pmc;
@@ -2583,37 +2583,29 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter 
*gsf,
if (!ipv4_is_multicast(addr))
return -EINVAL;
 
-   err = -EADDRNOTAVAIL;
-
for_each_pmc_rtnl(inet, pmc) {
if (pmc->multi.imr_multiaddr.s_addr == addr &&
pmc->multi.imr_ifindex == gsf->gf_interface)
break;
}
if (!pmc)   /* must have a prior join */
-   goto done;
+   return -EADDRNOTAVAIL;
gsf->gf_fmode = pmc->sfmode;
psl = rtnl_dereference(pmc->sflist);
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
-   if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
-   copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
-   return -EFAULT;
-   }
-   for (i = 0; i < copycount; i++) {
+   for (i = 0; i < copycount; i++, p++) {
struct sockaddr_storage ss;
 
psin = (struct sockaddr_in *)
memset(, 0, sizeof(ss));
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = psl->sl_addr[i];
-   if (copy_to_user(>gf_slist[i], , sizeof(ss)))
+   if (copy_to_user(p, , sizeof(ss)))
return -EFAULT;
}
return 0;
-done:
-   return err;
 }
 
 /*
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8f550cf4c1c0..8616c38bd420 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1473,19 +1473,28 @@ static int do_ip_getsockopt(struct sock *sk, int level, 
int optname,
}
case MCAST_MSFILTER:
{
+   struct group_filter __user *p = (void __user *)optval;
struct group_filter gsf;
+   const int size0 = offsetof(struct group_filter, gf_slist);
+   int num;
 
-   if (len < GROUP_FILTER_SIZE(0)) {
+   if (len < size0) {
err = -EINVAL;
goto out;
}
-   if (copy_from_user(, optval, GROUP_FILTER_SIZE(0))) {
+   if (copy_from_user(, p, size0)) {
err = -EFAULT;
goto out;
}
-   err = ip_mc_gsfget(sk, ,
-  (struct group_filter __user

Re: [PATCH] selftests:mptcp: fix empty optstring

2020-05-10 Thread Li Zhijian


ping


On 4/2/20 2:52 PM, Li Zhijian wrote:

From: Li Zhijian 

Signed-off-by: Li Zhijian 
---
  tools/testing/selftests/net/mptcp/pm_netlink.sh | 3 +--
  1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh 
b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 9172746b6cf0..8c7998c64d9e 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -8,8 +8,7 @@ usage() {
echo "Usage: $0 [ -h ]"
  }
  
-

-while getopts "$optstring" option;do
+while getopts "h" option;do
case "$option" in
"h")
usage $0

[PATCH 01/19] lift compat definitions of mcast [sg]etsockopt requests into net/compat.h

2020-05-10 Thread Al Viro

From: Al Viro 

We want to get rid of compat_mc_[sg]etsockopt() and to have that stuff
handled without compat_alloc_user_space(), extra copying through
userland, etc.  To do that we'll need ipv4 and ipv6 instances of
->compat_[sg]etsockopt() to manipulate the 32bit variants of mcast
requests, so we need to move the definitions of those out of net/compat.c
and into a public header.

This patch just does a mechanical move to include/net/compat.h

Signed-off-by: Al Viro 
---
 include/net/compat.h | 24 
 net/compat.c | 25 -
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index e341260642fe..9f4a56c5671e 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -56,4 +56,28 @@ int compat_mc_getsockopt(struct sock *, int, int, char 
__user *, int __user *,
 int (*)(struct sock *, int, int, char __user *,
 int __user *));
 
+struct compat_group_req {
+   __u32gr_interface;
+   struct __kernel_sockaddr_storage gr_group
+   __aligned(4);
+} __packed;
+
+struct compat_group_source_req {
+   __u32gsr_interface;
+   struct __kernel_sockaddr_storage gsr_group
+   __aligned(4);
+   struct __kernel_sockaddr_storage gsr_source
+   __aligned(4);
+} __packed;
+
+struct compat_group_filter {
+   __u32gf_interface;
+   struct __kernel_sockaddr_storage gf_group
+   __aligned(4);
+   __u32gf_fmode;
+   __u32gf_numsrc;
+   struct __kernel_sockaddr_storage gf_slist[1]
+   __aligned(4);
+} __packed;
+
 #endif /* NET_COMPAT_H */
diff --git a/net/compat.c b/net/compat.c
index 4bed96e84d9a..06af69e7b408 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -447,34 +447,9 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, 
int, optname,
return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
 }
 
-struct compat_group_req {
-   __u32gr_interface;
-   struct __kernel_sockaddr_storage gr_group
-   __aligned(4);
-} __packed;
-
-struct compat_group_source_req {
-   __u32gsr_interface;
-   struct __kernel_sockaddr_storage gsr_group
-   __aligned(4);
-   struct __kernel_sockaddr_storage gsr_source
-   __aligned(4);
-} __packed;
-
-struct compat_group_filter {
-   __u32gf_interface;
-   struct __kernel_sockaddr_storage gf_group
-   __aligned(4);
-   __u32gf_fmode;
-   __u32gf_numsrc;
-   struct __kernel_sockaddr_storage gf_slist[1]
-   __aligned(4);
-} __packed;
-
 #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
sizeof(struct __kernel_sockaddr_storage))
 
-
 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
char __user *optval, unsigned int optlen,
int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
-- 
2.11.0

[PATCH 07/19] ip6_mc_msfilter(): pass the address list separately

2020-05-10 Thread Al Viro

From: Al Viro 

that way we'll be able to reuse it for compat case

Signed-off-by: Al Viro 
---
 include/net/ipv6.h   | 3 ++-
 net/ipv6/ipv6_sockglue.c | 2 +-
 net/ipv6/mcast.c | 7 ---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 900cc66311c4..901c78b117a1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1134,7 +1134,8 @@ struct group_filter;
 
 int ip6_mc_source(int add, int omode, struct sock *sk,
  struct group_source_req *pgsr);
-int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+ struct sockaddr_storage *list);
 int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
  struct sockaddr_storage __user *p);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3f127c6b3c22..33efc9112259 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -780,7 +780,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
retv = -EINVAL;
break;
}
-   retv = ip6_mc_msfilter(sk, gsf);
+   retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
kfree(gsf);
 
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 97d796c7d6c0..7e12d2114158 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -457,7 +457,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
return err;
 }
 
-int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+   struct sockaddr_storage *list)
 {
const struct in6_addr *group;
struct ipv6_mc_socklist *pmc;
@@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter 
*gsf)
goto done;
}
newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
-   for (i = 0; i < newpsl->sl_count; ++i) {
+   for (i = 0; i < newpsl->sl_count; ++i, ++list) {
struct sockaddr_in6 *psin6;
 
-   psin6 = (struct sockaddr_in6 *)>gf_slist[i];
+   psin6 = (struct sockaddr_in6 *)list;
newpsl->sl_addr[i] = psin6->sin6_addr;
}
err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
-- 
2.11.0

[PATCH 09/19] ipv[46]: do compat setsockopt for MCAST_{JOIN,LEAVE}_GROUP directly

2020-05-10 Thread Al Viro

From: Al Viro 

direct parallel to the way these two are handled in the native
->setsockopt() instances - the helpers that do the real work
are already separated and can be reused as-is in this case.

Signed-off-by: Al Viro 
---
 net/ipv4/ip_sockglue.c   | 31 +++
 net/ipv6/ipv6_sockglue.c | 28 
 2 files changed, 59 insertions(+)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index dc1f5276be4e..937f39906419 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1282,6 +1282,37 @@ int compat_ip_setsockopt(struct sock *sk, int level, int 
optname,
switch (optname) {
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
+   {
+   struct compat_group_req __user *gr32 = (void __user *)optval;
+   struct group_req greq;
+   struct sockaddr_in *psin = (struct sockaddr_in *)_group;
+   struct ip_mreqn mreq;
+
+   if (optlen < sizeof(struct compat_group_req))
+   return -EINVAL;
+
+   if (get_user(greq.gr_interface, >gr_interface) ||
+   copy_from_user(_group, >gr_group,
+   sizeof(greq.gr_group)))
+   return -EFAULT;
+
+   if (psin->sin_family != AF_INET)
+   return -EINVAL;
+
+   memset(, 0, sizeof(mreq));
+   mreq.imr_multiaddr = psin->sin_addr;
+   mreq.imr_ifindex = greq.gr_interface;
+
+   rtnl_lock();
+   lock_sock(sk);
+   if (optname == MCAST_JOIN_GROUP)
+   err = ip_mc_join_group(sk, );
+   else
+   err = ip_mc_leave_group(sk, );
+   release_sock(sk);
+   rtnl_unlock();
+   return err;
+   }
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 220087bfd17c..b386a2b3668c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -976,6 +976,34 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int 
optname,
switch (optname) {
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
+   {
+   struct compat_group_req __user *gr32 = (void __user *)optval;
+   struct group_req greq;
+   struct sockaddr_in6 *psin6 = (struct sockaddr_in6 
*)_group;
+
+   if (optlen < sizeof(struct compat_group_req))
+   return -EINVAL;
+
+   if (get_user(greq.gr_interface, >gr_interface) ||
+   copy_from_user(_group, >gr_group,
+   sizeof(greq.gr_group)))
+   return -EFAULT;
+
+   if (greq.gr_group.ss_family != AF_INET6)
+   return -EADDRNOTAVAIL;
+
+   rtnl_lock();
+   lock_sock(sk);
+   if (optname == MCAST_JOIN_GROUP)
+   err = ipv6_sock_mc_join(sk, greq.gr_interface,
+>sin6_addr);
+   else
+   err = ipv6_sock_mc_drop(sk, greq.gr_interface,
+>sin6_addr);
+   release_sock(sk);
+   rtnl_unlock();
+   return err;
+   }
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
-- 
2.11.0

[PATCH 11/19] ipv6: take handling of group_source_req options into a helper

2020-05-10 Thread Al Viro

From: Al Viro 

Signed-off-by: Al Viro 
---
 net/ipv6/ipv6_sockglue.c | 65 +++-
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b386a2b3668c..fc525ad9ed3c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname)
return false;
 }
 
+static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
+ struct group_source_req *greqs)
+{
+   int omode, add;
+
+   if (greqs->gsr_group.ss_family != AF_INET6 ||
+   greqs->gsr_source.ss_family != AF_INET6)
+   return -EADDRNOTAVAIL;
+
+   if (optname == MCAST_BLOCK_SOURCE) {
+   omode = MCAST_EXCLUDE;
+   add = 1;
+   } else if (optname == MCAST_UNBLOCK_SOURCE) {
+   omode = MCAST_EXCLUDE;
+   add = 0;
+   } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+   struct sockaddr_in6 *psin6;
+   int retv;
+
+   psin6 = (struct sockaddr_in6 *)>gsr_group;
+   retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface,
+>sin6_addr,
+MCAST_INCLUDE);
+   /* prior join w/ different source is ok */
+   if (retv && retv != -EADDRINUSE)
+   return retv;
+   omode = MCAST_INCLUDE;
+   add = 1;
+   } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+   omode = MCAST_INCLUDE;
+   add = 0;
+   }
+   return ip6_mc_source(add, omode, sk, greqs);
+}
+
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
 {
@@ -715,7 +750,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
case MCAST_UNBLOCK_SOURCE:
{
struct group_source_req greqs;
-   int omode, add;
 
if (optlen < sizeof(struct group_source_req))
goto e_inval;
@@ -723,34 +757,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
retv = -EFAULT;
break;
}
-   if (greqs.gsr_group.ss_family != AF_INET6 ||
-   greqs.gsr_source.ss_family != AF_INET6) {
-   retv = -EADDRNOTAVAIL;
-   break;
-   }
-   if (optname == MCAST_BLOCK_SOURCE) {
-   omode = MCAST_EXCLUDE;
-   add = 1;
-   } else if (optname == MCAST_UNBLOCK_SOURCE) {
-   omode = MCAST_EXCLUDE;
-   add = 0;
-   } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
-   struct sockaddr_in6 *psin6;
-
-   psin6 = (struct sockaddr_in6 *)_group;
-   retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
->sin6_addr,
-MCAST_INCLUDE);
-   /* prior join w/ different source is ok */
-   if (retv && retv != -EADDRINUSE)
-   break;
-   omode = MCAST_INCLUDE;
-   add = 1;
-   } else /* MCAST_LEAVE_SOURCE_GROUP */ {
-   omode = MCAST_INCLUDE;
-   add = 0;
-   }
-   retv = ip6_mc_source(add, omode, sk, );
+   retv = do_ipv6_mcast_group_source(sk, optname, );
break;
}
case MCAST_MSFILTER:
-- 
2.11.0

[PATCH 19/19] atm: switch do_atmif_sioc() to direct use of atm_dev_ioctl()

2020-05-10 Thread Al Viro

From: Al Viro 

Signed-off-by: Al Viro 
---
 net/atm/ioctl.c | 25 -
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 52f2c77e656f..838ebf0cabbf 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -286,30 +286,13 @@ static int do_atm_iobuf(struct socket *sock, unsigned int 
cmd,
 static int do_atmif_sioc(struct socket *sock, unsigned int cmd,
 unsigned long arg)
 {
-   struct atmif_sioc __user *sioc;
-   struct compat_atmif_sioc __user *sioc32;
+   struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg);
+   int number;
u32 data;
-   void __user *datap;
-   int err;
 
-   sioc = compat_alloc_user_space(sizeof(*sioc));
-   sioc32 = compat_ptr(arg);
-
-   if (copy_in_user(>number, >number, 2 * sizeof(int)) ||
-   get_user(data, >arg))
-   return -EFAULT;
-   datap = compat_ptr(data);
-   if (put_user(datap, >arg))
+   if (get_user(data, >arg) || get_user(number, >number))
return -EFAULT;
-
-   err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0);
-
-   if (!err) {
-   if (copy_in_user(>length, >length,
-sizeof(int)))
-   err = -EFAULT;
-   }
-   return err;
+   return atm_dev_ioctl(cmd, compat_ptr(data), >length, number, 0);
 }
 
 static int do_atm_ioctl(struct socket *sock, unsigned int cmd32,
-- 
2.11.0

[PATCH 10/19] ipv4: take handling of group_source_req options into a helper

2020-05-10 Thread Al Viro

From: Al Viro 

Signed-off-by: Al Viro 
---
 net/ipv4/ip_sockglue.c | 83 ++
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 937f39906419..4f412b0bdda4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -624,6 +624,49 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
return -EADDRNOTAVAIL;
 }
 
+static int do_mcast_group_source(struct sock *sk, int optname,
+struct group_source_req *greqs)
+{
+   struct ip_mreq_source mreqs;
+   struct sockaddr_in *psin;
+   int omode, add, err;
+
+   if (greqs->gsr_group.ss_family != AF_INET ||
+   greqs->gsr_source.ss_family != AF_INET)
+   return -EADDRNOTAVAIL;
+
+   psin = (struct sockaddr_in *)>gsr_group;
+   mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+   psin = (struct sockaddr_in *)>gsr_source;
+   mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+   mreqs.imr_interface = 0; /* use index for mc_source */
+
+   if (optname == MCAST_BLOCK_SOURCE) {
+   omode = MCAST_EXCLUDE;
+   add = 1;
+   } else if (optname == MCAST_UNBLOCK_SOURCE) {
+   omode = MCAST_EXCLUDE;
+   add = 0;
+   } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+   struct ip_mreqn mreq;
+
+   psin = (struct sockaddr_in *)>gsr_group;
+   mreq.imr_multiaddr = psin->sin_addr;
+   mreq.imr_address.s_addr = 0;
+   mreq.imr_ifindex = greqs->gsr_interface;
+   err = ip_mc_join_group_ssm(sk, , MCAST_INCLUDE);
+   if (err && err != -EADDRINUSE)
+   return err;
+   greqs->gsr_interface = mreq.imr_ifindex;
+   omode = MCAST_INCLUDE;
+   add = 1;
+   } else /* MCAST_LEAVE_SOURCE_GROUP */ {
+   omode = MCAST_INCLUDE;
+   add = 0;
+   }
+   return ip_mc_source(add, omode, sk, , greqs->gsr_interface);
+}
+
 static int do_ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int 
optlen)
 {
@@ -1066,9 +1109,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case MCAST_UNBLOCK_SOURCE:
{
struct group_source_req greqs;
-   struct ip_mreq_source mreqs;
-   struct sockaddr_in *psin;
-   int omode, add;
 
if (optlen != sizeof(struct group_source_req))
goto e_inval;
@@ -1076,42 +1116,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EFAULT;
break;
}
-   if (greqs.gsr_group.ss_family != AF_INET ||
-   greqs.gsr_source.ss_family != AF_INET) {
-   err = -EADDRNOTAVAIL;
-   break;
-   }
-   psin = (struct sockaddr_in *)_group;
-   mreqs.imr_multiaddr = psin->sin_addr.s_addr;
-   psin = (struct sockaddr_in *)_source;
-   mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
-   mreqs.imr_interface = 0; /* use index for mc_source */
-
-   if (optname == MCAST_BLOCK_SOURCE) {
-   omode = MCAST_EXCLUDE;
-   add = 1;
-   } else if (optname == MCAST_UNBLOCK_SOURCE) {
-   omode = MCAST_EXCLUDE;
-   add = 0;
-   } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
-   struct ip_mreqn mreq;
-
-   psin = (struct sockaddr_in *)_group;
-   mreq.imr_multiaddr = psin->sin_addr;
-   mreq.imr_address.s_addr = 0;
-   mreq.imr_ifindex = greqs.gsr_interface;
-   err = ip_mc_join_group_ssm(sk, , MCAST_INCLUDE);
-   if (err && err != -EADDRINUSE)
-   break;
-   greqs.gsr_interface = mreq.imr_ifindex;
-   omode = MCAST_INCLUDE;
-   add = 1;
-   } else /* MCAST_LEAVE_SOURCE_GROUP */ {
-   omode = MCAST_INCLUDE;
-   add = 0;
-   }
-   err = ip_mc_source(add, omode, sk, ,
-  greqs.gsr_interface);
+   err = do_mcast_group_source(sk, optname, );
break;
}
case MCAST_MSFILTER:
-- 
2.11.0

[PATCH 14/19] batadv_socket_read(): get rid of pointless access_ok()

2020-05-10 Thread Al Viro

From: Al Viro 

address is passed only to copy_to_user()

Signed-off-by: Al Viro 
---
 net/batman-adv/icmp_socket.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ccb535c77e5d..8bdabc03b0b2 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -135,9 +135,6 @@ static ssize_t batadv_socket_read(struct file *file, char 
__user *buf,
if (!buf || count < sizeof(struct batadv_icmp_packet))
return -EINVAL;
 
-   if (!access_ok(buf, count))
-   return -EFAULT;
-
error = wait_event_interruptible(socket_client->queue_wait,
 socket_client->queue_len);
 
-- 
2.11.0

[PATCH 13/19] get rid of compat_mc_setsockopt()

2020-05-10 Thread Al Viro

From: Al Viro 

not used anymore

Signed-off-by: Al Viro 
---
 include/net/compat.h |  4 ---
 net/compat.c | 90 
 2 files changed, 94 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index b6043e759cde..2de4dfcdc11f 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -49,10 +49,6 @@ int put_cmsg_compat(struct msghdr*, int, int, int, void *);
 int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *,
 unsigned char *, int);
 
-int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
-int (*)(struct sock *, int, int, char __user *,
-unsigned int));
-
 struct compat_group_req {
__u32gr_interface;
struct __kernel_sockaddr_storage gr_group
diff --git a/net/compat.c b/net/compat.c
index 6191481b5ef0..a47ce3a6e7af 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -447,96 +447,6 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, 
int, optname,
return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
 }
 
-#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
-   sizeof(struct __kernel_sockaddr_storage))
-
-int compat_mc_setsockopt(struct sock *sock, int level, int optname,
-   char __user *optval, unsigned int optlen,
-   int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
-{
-   char __user *koptval = optval;
-   int koptlen = optlen;
-
-   switch (optname) {
-   case MCAST_JOIN_GROUP:
-   case MCAST_LEAVE_GROUP:
-   {
-   struct compat_group_req __user *gr32 = (void __user *)optval;
-   struct group_req __user *kgr =
-   compat_alloc_user_space(sizeof(struct group_req));
-   u32 interface;
-
-   if (!access_ok(gr32, sizeof(*gr32)) ||
-   !access_ok(kgr, sizeof(struct group_req)) ||
-   __get_user(interface, >gr_interface) ||
-   __put_user(interface, >gr_interface) ||
-   copy_in_user(>gr_group, >gr_group,
-   sizeof(kgr->gr_group)))
-   return -EFAULT;
-   koptval = (char __user *)kgr;
-   koptlen = sizeof(struct group_req);
-   break;
-   }
-   case MCAST_JOIN_SOURCE_GROUP:
-   case MCAST_LEAVE_SOURCE_GROUP:
-   case MCAST_BLOCK_SOURCE:
-   case MCAST_UNBLOCK_SOURCE:
-   {
-   struct compat_group_source_req __user *gsr32 = (void __user 
*)optval;
-   struct group_source_req __user *kgsr = compat_alloc_user_space(
-   sizeof(struct group_source_req));
-   u32 interface;
-
-   if (!access_ok(gsr32, sizeof(*gsr32)) ||
-   !access_ok(kgsr,
-   sizeof(struct group_source_req)) ||
-   __get_user(interface, >gsr_interface) ||
-   __put_user(interface, >gsr_interface) ||
-   copy_in_user(>gsr_group, >gsr_group,
-   sizeof(kgsr->gsr_group)) ||
-   copy_in_user(>gsr_source, >gsr_source,
-   sizeof(kgsr->gsr_source)))
-   return -EFAULT;
-   koptval = (char __user *)kgsr;
-   koptlen = sizeof(struct group_source_req);
-   break;
-   }
-   case MCAST_MSFILTER:
-   {
-   struct compat_group_filter __user *gf32 = (void __user *)optval;
-   struct group_filter __user *kgf;
-   u32 interface, fmode, numsrc;
-
-   if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
-   __get_user(interface, >gf_interface) ||
-   __get_user(fmode, >gf_fmode) ||
-   __get_user(numsrc, >gf_numsrc))
-   return -EFAULT;
-   koptlen = optlen + sizeof(struct group_filter) -
-   sizeof(struct compat_group_filter);
-   if (koptlen < GROUP_FILTER_SIZE(numsrc))
-   return -EINVAL;
-   kgf = compat_alloc_user_space(koptlen);
-   if (!access_ok(kgf, koptlen) ||
-   __put_user(interface, >gf_interface) ||
-   __put_user(fmode, >gf_fmode) ||
-   __put_user(numsrc, >gf_numsrc) ||
-   copy_in_user(>gf_group, >gf_group,
-   sizeof(kgf->gf_group)) ||
-   (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist,
-   numsrc * sizeof(kgf->gf_slist[0]
-   return -EFAULT;
-   koptval = (char __user *)kgf;
-   break;
-   }
-
-   default:
-   break;
-   }
-

[PATCH 15/19] atm: separate ATM_GETNAMES handling from the rest of atm_dev_ioctl()

2020-05-10 Thread Al Viro

From: Al Viro 

atm_dev_ioctl() does copyin in two different ways - one for
ATM_GETNAMES, another for everything else.  Start with separating
the former into a new helper (atm_getnames()).  The next step
will be to lift the copyin into the callers.

Signed-off-by: Al Viro 
---
 net/atm/ioctl.c |  6 +++-
 net/atm/resources.c | 88 +++--
 net/atm/resources.h |  1 +
 3 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index d955b683aa7c..0b4b07740fe4 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -162,7 +162,11 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int 
cmd,
if (error != -ENOIOCTLCMD)
goto done;
 
-   error = atm_dev_ioctl(cmd, argp, compat);
+   if (cmd == ATM_GETNAMES) {
+   error = atm_getnames(argp, compat);
+   } else {
+   error = atm_dev_ioctl(cmd, argp, compat);
+   }
 
 done:
return error;
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 889349c6d90d..a2ab75929eec 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -193,61 +193,63 @@ static int fetch_stats(struct atm_dev *dev, struct 
atm_dev_stats __user *arg,
return error ? -EFAULT : 0;
 }
 
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+int atm_getnames(void __user *arg, int compat)
 {
void __user *buf;
-   int error, len, number, size = 0;
+   int error, len, size = 0;
struct atm_dev *dev;
struct list_head *p;
int *tmp_buf, *tmp_p;
-   int __user *sioc_len;
int __user *iobuf_len;
 
-   switch (cmd) {
-   case ATM_GETNAMES:
-   if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+   if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
-   struct compat_atm_iobuf __user *ciobuf = arg;
-   compat_uptr_t cbuf;
-   iobuf_len = >length;
-   if (get_user(cbuf, >buffer))
-   return -EFAULT;
-   buf = compat_ptr(cbuf);
+   struct compat_atm_iobuf __user *ciobuf = arg;
+   compat_uptr_t cbuf;
+   iobuf_len = >length;
+   if (get_user(cbuf, >buffer))
+   return -EFAULT;
+   buf = compat_ptr(cbuf);
 #endif
-   } else {
-   struct atm_iobuf __user *iobuf = arg;
-   iobuf_len = >length;
-   if (get_user(buf, >buffer))
-   return -EFAULT;
-   }
-   if (get_user(len, iobuf_len))
+   } else {
+   struct atm_iobuf __user *iobuf = arg;
+   iobuf_len = >length;
+   if (get_user(buf, >buffer))
return -EFAULT;
-   mutex_lock(_dev_mutex);
-   list_for_each(p, _devs)
-   size += sizeof(int);
-   if (size > len) {
-   mutex_unlock(_dev_mutex);
-   return -E2BIG;
-   }
-   tmp_buf = kmalloc(size, GFP_ATOMIC);
-   if (!tmp_buf) {
-   mutex_unlock(_dev_mutex);
-   return -ENOMEM;
-   }
-   tmp_p = tmp_buf;
-   list_for_each(p, _devs) {
-   dev = list_entry(p, struct atm_dev, dev_list);
-   *tmp_p++ = dev->number;
-   }
+   }
+   if (get_user(len, iobuf_len))
+   return -EFAULT;
+   mutex_lock(_dev_mutex);
+   list_for_each(p, _devs)
+   size += sizeof(int);
+   if (size > len) {
mutex_unlock(_dev_mutex);
-   error = ((copy_to_user(buf, tmp_buf, size)) ||
-put_user(size, iobuf_len))
-   ? -EFAULT : 0;
-   kfree(tmp_buf);
-   return error;
-   default:
-   break;
+   return -E2BIG;
}
+   tmp_buf = kmalloc(size, GFP_ATOMIC);
+   if (!tmp_buf) {
+   mutex_unlock(_dev_mutex);
+   return -ENOMEM;
+   }
+   tmp_p = tmp_buf;
+   list_for_each(p, _devs) {
+   dev = list_entry(p, struct atm_dev, dev_list);
+   *tmp_p++ = dev->number;
+   }
+   mutex_unlock(_dev_mutex);
+   error = ((copy_to_user(buf, tmp_buf, size)) ||
+put_user(size, iobuf_len))
+   ? -EFAULT : 0;
+   kfree(tmp_buf);
+   return error;
+}
+
+int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+{
+   void __user *buf;
+   int error, len, number, size = 0;
+   struct atm_dev *dev;
+   int __user *sioc_len;
 
if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
diff --git a/net/atm/resources.h b/net/atm/resources.h
index

[PATCH 12/19] handle the group_source_req options directly

2020-05-10 Thread Al Viro

From: Al Viro 

Native ->setsockopt() handling of these options (MCAST_..._SOURCE_GROUP
and MCAST_{,UN}BLOCK_SOURCE) consists of copyin + call of a helper that
does the actual work.  The only change needed for ->compat_setsockopt()
is a slightly different copyin - the helpers can be reused as-is.

Signed-off-by: Al Viro 
---
 net/ipv4/ip_sockglue.c   | 23 +--
 net/ipv6/ipv6_sockglue.c | 23 +--
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f412b0bdda4..8a3b879dc0f5 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1322,8 +1322,27 @@ int compat_ip_setsockopt(struct sock *sk, int level, int 
optname,
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
-   return compat_mc_setsockopt(sk, level, optname, optval, optlen,
-   ip_setsockopt);
+   {
+   struct compat_group_source_req __user *gsr32 = (void __user 
*)optval;
+   struct group_source_req greqs;
+
+   if (optlen != sizeof(struct compat_group_source_req))
+   return -EINVAL;
+
+   if (get_user(greqs.gsr_interface, >gsr_interface) ||
+   copy_from_user(_group, >gsr_group,
+   sizeof(greqs.gsr_group)) ||
+   copy_from_user(_source, >gsr_source,
+   sizeof(greqs.gsr_source)))
+   return -EFAULT;
+
+   rtnl_lock();
+   lock_sock(sk);
+   err = do_mcast_group_source(sk, optname, );
+   release_sock(sk);
+   rtnl_unlock();
+   return err;
+   }
case MCAST_MSFILTER:
{
const int size0 = offsetof(struct compat_group_filter, 
gf_slist);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index fc525ad9ed3c..4c9a9f2f83cf 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1015,8 +1015,27 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, 
int optname,
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
-   return compat_mc_setsockopt(sk, level, optname, optval, optlen,
-   ipv6_setsockopt);
+   {
+   struct compat_group_source_req __user *gsr32 = (void __user 
*)optval;
+   struct group_source_req greqs;
+
+   if (optlen < sizeof(struct compat_group_source_req))
+   return -EINVAL;
+
+   if (get_user(greqs.gsr_interface, >gsr_interface) ||
+   copy_from_user(_group, >gsr_group,
+   sizeof(greqs.gsr_group)) ||
+   copy_from_user(_source, >gsr_source,
+   sizeof(greqs.gsr_source)))
+   return -EFAULT;
+
+   rtnl_lock();
+   lock_sock(sk);
+   err = do_ipv6_mcast_group_source(sk, optname, );
+   release_sock(sk);
+   rtnl_unlock();
+   return err;
+   }
case MCAST_MSFILTER:
{
const int size0 = offsetof(struct compat_group_filter, 
gf_slist);
-- 
2.11.0

[PATCH 18/19] atm: lift copyin from atm_dev_ioctl()

2020-05-10 Thread Al Viro

From: Al Viro 

Signed-off-by: Al Viro 
---
 net/atm/ioctl.c | 25 -
 net/atm/resources.c | 35 +--
 net/atm/resources.h |  4 ++--
 3 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index fdd0e3434523..52f2c77e656f 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -182,7 +182,30 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int 
cmd,
}
error = atm_getnames(buf, len);
} else {
-   error = atm_dev_ioctl(cmd, argp, compat);
+   int number;
+
+   if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+#ifdef CONFIG_COMPAT
+   struct compat_atmif_sioc __user *csioc = argp;
+   compat_uptr_t carg;
+
+   len = >length;
+   if (get_user(carg, >arg))
+   return -EFAULT;
+   buf = compat_ptr(carg);
+   if (get_user(number, >number))
+   return -EFAULT;
+#endif
+   } else {
+   struct atmif_sioc __user *sioc = argp;
+
+   len = >length;
+   if (get_user(buf, >arg))
+   return -EFAULT;
+   if (get_user(number, >number))
+   return -EFAULT;
+   }
+   error = atm_dev_ioctl(cmd, buf, len, number, compat);
}
 
 done:
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 5507cc608969..94bdc6527ee8 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -227,39 +227,14 @@ int atm_getnames(void __user *buf, int __user *iobuf_len)
return error;
 }
 
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len,
+ int number, int compat)
 {
-   void __user *buf;
-   int error, len, number, size = 0;
+   int error, len, size = 0;
struct atm_dev *dev;
-   int __user *sioc_len;
 
-   if (IS_ENABLED(CONFIG_COMPAT) && compat) {
-#ifdef CONFIG_COMPAT
-   struct compat_atmif_sioc __user *csioc = arg;
-   compat_uptr_t carg;
-
-   sioc_len = >length;
-   if (get_user(carg, >arg))
-   return -EFAULT;
-   buf = compat_ptr(carg);
-
-   if (get_user(len, >length))
-   return -EFAULT;
-   if (get_user(number, >number))
-   return -EFAULT;
-#endif
-   } else {
-   struct atmif_sioc __user *sioc = arg;
-
-   sioc_len = >length;
-   if (get_user(buf, >arg))
-   return -EFAULT;
-   if (get_user(len, >length))
-   return -EFAULT;
-   if (get_user(number, >number))
-   return -EFAULT;
-   }
+   if (get_user(len, sioc_len))
+   return -EFAULT;
 
dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d",
  number);
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 5e2c68d37d63..4a0839e92ff3 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -15,8 +15,8 @@ extern struct list_head atm_devs;
 extern struct mutex atm_dev_mutex;
 
 int atm_getnames(void __user *buf, int __user *iobuf_len);
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
-
+int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len,
+ int number, int compat);
 
 #ifdef CONFIG_PROC_FS
 
-- 
2.11.0

[PATCH 17/19] atm: switch do_atm_iobuf() to direct use of atm_getnames()

2020-05-10 Thread Al Viro

From: Al Viro 

... and sod the compat_alloc_user_space() with its complications

Signed-off-by: Al Viro 
---
 net/atm/ioctl.c | 25 +++--
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index e239cebf48da..fdd0e3434523 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -251,32 +251,13 @@ static struct {
 static int do_atm_iobuf(struct socket *sock, unsigned int cmd,
unsigned long arg)
 {
-   struct atm_iobuf __user *iobuf;
-   struct compat_atm_iobuf __user *iobuf32;
+   struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg);
u32 data;
-   void __user *datap;
-   int len, err;
-
-   iobuf = compat_alloc_user_space(sizeof(*iobuf));
-   iobuf32 = compat_ptr(arg);
 
-   if (get_user(len, >length) ||
-   get_user(data, >buffer))
-   return -EFAULT;
-   datap = compat_ptr(data);
-   if (put_user(len, >length) ||
-   put_user(datap, >buffer))
+   if (get_user(data, >buffer))
return -EFAULT;
 
-   err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0);
-
-   if (!err) {
-   if (copy_in_user(>length, >length,
-sizeof(int)))
-   err = -EFAULT;
-   }
-
-   return err;
+   return atm_getnames(>length, compat_ptr(data));
 }
 
 static int do_atmif_sioc(struct socket *sock, unsigned int cmd,
-- 
2.11.0

[PATCH 05/19] set_mcast_msfilter(): take the guts of setsockopt(MCAST_MSFILTER) into a helper

2020-05-10 Thread Al Viro

From: Al Viro 

Signed-off-by: Al Viro 
---
 net/ipv4/ip_sockglue.c | 73 +++---
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 6bdaf43236ea..8c14a474870d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -587,6 +587,43 @@ static bool setsockopt_needs_rtnl(int optname)
return false;
 }
 
+static int set_mcast_msfilter(struct sock *sk, int ifindex,
+ int numsrc, int fmode,
+ struct sockaddr_storage *group,
+ struct sockaddr_storage *list)
+{
+   int msize = IP_MSFILTER_SIZE(numsrc);
+   struct ip_msfilter *msf;
+   struct sockaddr_in *psin;
+   int err, i;
+
+   msf = kmalloc(msize, GFP_KERNEL);
+   if (!msf)
+   return -ENOBUFS;
+
+   psin = (struct sockaddr_in *)group;
+   if (psin->sin_family != AF_INET)
+   goto Eaddrnotavail;
+   msf->imsf_multiaddr = psin->sin_addr.s_addr;
+   msf->imsf_interface = 0;
+   msf->imsf_fmode = fmode;
+   msf->imsf_numsrc = numsrc;
+   for (i = 0; i < numsrc; ++i) {
+   psin = (struct sockaddr_in *)[i];
+
+   if (psin->sin_family != AF_INET)
+   goto Eaddrnotavail;
+   msf->imsf_slist[i] = psin->sin_addr.s_addr;
+   }
+   err = ip_mc_msfilter(sk, msf, ifindex);
+   kfree(msf);
+   return err;
+
+Eaddrnotavail:
+   kfree(msf);
+   return -EADDRNOTAVAIL;
+}
+
 static int do_ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int 
optlen)
 {
@@ -1079,10 +1116,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
}
case MCAST_MSFILTER:
{
-   struct sockaddr_in *psin;
-   struct ip_msfilter *msf = NULL;
struct group_filter *gsf = NULL;
-   int msize, i, ifindex;
 
if (optlen < GROUP_FILTER_SIZE(0))
goto e_inval;
@@ -1095,7 +1129,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = PTR_ERR(gsf);
break;
}
-
/* numsrc >= (4G-140)/128 overflow in 32 bits */
if (gsf->gf_numsrc >= 0x1ff ||
gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
@@ -1106,36 +1139,10 @@ static int do_ip_setsockopt(struct sock *sk, int level,
err = -EINVAL;
goto mc_msf_out;
}
-   msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
-   msf = kmalloc(msize, GFP_KERNEL);
-   if (!msf) {
-   err = -ENOBUFS;
-   goto mc_msf_out;
-   }
-   ifindex = gsf->gf_interface;
-   psin = (struct sockaddr_in *)>gf_group;
-   if (psin->sin_family != AF_INET) {
-   err = -EADDRNOTAVAIL;
-   goto mc_msf_out;
-   }
-   msf->imsf_multiaddr = psin->sin_addr.s_addr;
-   msf->imsf_interface = 0;
-   msf->imsf_fmode = gsf->gf_fmode;
-   msf->imsf_numsrc = gsf->gf_numsrc;
-   err = -EADDRNOTAVAIL;
-   for (i = 0; i < gsf->gf_numsrc; ++i) {
-   psin = (struct sockaddr_in *)>gf_slist[i];
-
-   if (psin->sin_family != AF_INET)
-   goto mc_msf_out;
-   msf->imsf_slist[i] = psin->sin_addr.s_addr;
-   }
-   kfree(gsf);
-   gsf = NULL;
-
-   err = ip_mc_msfilter(sk, msf, ifindex);
+   err = set_mcast_msfilter(sk, gsf->gf_interface,
+gsf->gf_numsrc, gsf->gf_fmode,
+>gf_group, gsf->gf_slist);
 mc_msf_out:
-   kfree(msf);
kfree(gsf);
break;
}
-- 
2.11.0

[PATCH 08/19] ipv6: do compat setsockopt for MCAST_MSFILTER directly

2020-05-10 Thread Al Viro

From: Al Viro 

similar to the ipv4 counterpart of that patch - the same
trick used to align the tail array properly.

Signed-off-by: Al Viro 
---
 net/ipv6/ipv6_sockglue.c | 48 +++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 33efc9112259..220087bfd17c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -980,9 +980,55 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int 
optname,
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
-   case MCAST_MSFILTER:
return compat_mc_setsockopt(sk, level, optname, optval, optlen,
ipv6_setsockopt);
+   case MCAST_MSFILTER:
+   {
+   const int size0 = offsetof(struct compat_group_filter, 
gf_slist);
+   struct compat_group_filter *gf32;
+   void *p;
+   int n;
+
+   if (optlen < size0)
+   return -EINVAL;
+   if (optlen > sysctl_optmem_max - 4)
+   return -ENOBUFS;
+
+   p = kmalloc(optlen + 4, GFP_KERNEL);
+   if (!p)
+   return -ENOMEM;
+
+   gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+   if (copy_from_user(gf32, optval, optlen)) {
+   err = -EFAULT;
+   goto mc_msf_out;
+   }
+
+   n = gf32->gf_numsrc;
+   /* numsrc >= (4G-140)/128 overflow in 32 bits */
+   if (n >= 0x1ffU ||
+   n > sysctl_mld_max_msf) {
+   err = -ENOBUFS;
+   goto mc_msf_out;
+   }
+   if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) 
{
+   err = -EINVAL;
+   goto mc_msf_out;
+   }
+
+   rtnl_lock();
+   lock_sock(sk);
+   err = ip6_mc_msfilter(sk, &(struct group_filter){
+   .gf_interface = gf32->gf_interface,
+   .gf_group = gf32->gf_group,
+   .gf_fmode = gf32->gf_fmode,
+   .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+   release_sock(sk);
+   rtnl_unlock();
+mc_msf_out:
+   kfree(p);
+   return err;
+   }
}
 
err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
-- 
2.11.0

[PATCH 16/19] atm: move copyin from atm_getnames() into the caller

2020-05-10 Thread Al Viro

From: Al Viro 

Signed-off-by: Al Viro 
---
 net/atm/ioctl.c | 19 ++-
 net/atm/resources.c | 19 +--
 net/atm/resources.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 0b4b07740fe4..e239cebf48da 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -56,6 +56,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
int error;
struct list_head *pos;
void __user *argp = (void __user *)arg;
+   void __user *buf;
+   int __user *len;
 
vcc = ATM_SD(sock);
switch (cmd) {
@@ -163,7 +165,22 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int 
cmd,
goto done;
 
if (cmd == ATM_GETNAMES) {
-   error = atm_getnames(argp, compat);
+   if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+#ifdef CONFIG_COMPAT
+   struct compat_atm_iobuf __user *ciobuf = argp;
+   compat_uptr_t cbuf;
+   len = >length;
+   if (get_user(cbuf, >buffer))
+   return -EFAULT;
+   buf = compat_ptr(cbuf);
+#endif
+   } else {
+   struct atm_iobuf __user *iobuf = argp;
+   len = >length;
+   if (get_user(buf, >buffer))
+   return -EFAULT;
+   }
+   error = atm_getnames(buf, len);
} else {
error = atm_dev_ioctl(cmd, argp, compat);
}
diff --git a/net/atm/resources.c b/net/atm/resources.c
index a2ab75929eec..5507cc608969 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -193,30 +193,13 @@ static int fetch_stats(struct atm_dev *dev, struct 
atm_dev_stats __user *arg,
return error ? -EFAULT : 0;
 }
 
-int atm_getnames(void __user *arg, int compat)
+int atm_getnames(void __user *buf, int __user *iobuf_len)
 {
-   void __user *buf;
int error, len, size = 0;
struct atm_dev *dev;
struct list_head *p;
int *tmp_buf, *tmp_p;
-   int __user *iobuf_len;
 
-   if (IS_ENABLED(CONFIG_COMPAT) && compat) {
-#ifdef CONFIG_COMPAT
-   struct compat_atm_iobuf __user *ciobuf = arg;
-   compat_uptr_t cbuf;
-   iobuf_len = >length;
-   if (get_user(cbuf, >buffer))
-   return -EFAULT;
-   buf = compat_ptr(cbuf);
-#endif
-   } else {
-   struct atm_iobuf __user *iobuf = arg;
-   iobuf_len = >length;
-   if (get_user(buf, >buffer))
-   return -EFAULT;
-   }
if (get_user(len, iobuf_len))
return -EFAULT;
mutex_lock(_dev_mutex);
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 18f8e5948ce4..5e2c68d37d63 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -14,7 +14,7 @@
 extern struct list_head atm_devs;
 extern struct mutex atm_dev_mutex;
 
-int atm_getnames(void __user *arg, int compat);
+int atm_getnames(void __user *buf, int __user *iobuf_len);
 int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
 
 
-- 
2.11.0

[PATCH 02/19] compat_ip{,v6}_setsockopt(): enumerate MCAST_... options explicitly

2020-05-10 Thread Al Viro

From: Al Viro 

We want to check if optname is among the MCAST_... ones; do that as
an explicit switch.

Signed-off-by: Al Viro 
---
 net/ipv4/ip_sockglue.c   | 10 +-
 net/ipv6/ipv6_sockglue.c | 10 +-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index aa3fd61818c4..8f550cf4c1c0 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1272,9 +1272,17 @@ int compat_ip_setsockopt(struct sock *sk, int level, int 
optname,
if (level != SOL_IP)
return -ENOPROTOOPT;
 
-   if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+   switch (optname) {
+   case MCAST_JOIN_GROUP:
+   case MCAST_LEAVE_GROUP:
+   case MCAST_JOIN_SOURCE_GROUP:
+   case MCAST_LEAVE_SOURCE_GROUP:
+   case MCAST_BLOCK_SOURCE:
+   case MCAST_UNBLOCK_SOURCE:
+   case MCAST_MSFILTER:
return compat_mc_setsockopt(sk, level, optname, optval, optlen,
ip_setsockopt);
+   }
 
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
 #ifdef CONFIG_NETFILTER
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 18d05403d3b5..1b4ad4f8dc42 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -973,9 +973,17 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int 
optname,
if (level != SOL_IPV6)
return -ENOPROTOOPT;
 
-   if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+   switch (optname) {
+   case MCAST_JOIN_GROUP:
+   case MCAST_LEAVE_GROUP:
+   case MCAST_JOIN_SOURCE_GROUP:
+   case MCAST_LEAVE_SOURCE_GROUP:
+   case MCAST_BLOCK_SOURCE:
+   case MCAST_UNBLOCK_SOURCE:
+   case MCAST_MSFILTER:
return compat_mc_setsockopt(sk, level, optname, optval, optlen,
ipv6_setsockopt);
+   }
 
err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
 #ifdef CONFIG_NETFILTER
-- 
2.11.0

[RFC][PATCHES] uaccess-related stuff in net/*

2020-05-10 Thread Al Viro

Assorted uaccess-related work in net/*.  First, there's
getting rid of compat_alloc_user_space() mess in MCAST_...
[gs]etsockopt() - no need to play with copying to/from temporary
object on userland stack, etc., when ->compat_[sg]etsockopt()
instances in question can easly do everything without that.
That's the first 13 patches.  Then there's a trivial bit in
net/batman-adv (completely unrelated to everything else) and
finally getting the atm compat ioctls into simpler shape.

Please, review and comment.  Individual patches in followups,
the entire branch (on top of current net/master) is in
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git #uaccess.net

Shortlog:
Al Viro (19):
  lift compat definitions of mcast [sg]etsockopt requests into net/compat.h
  compat_ip{,v6}_setsockopt(): enumerate MCAST_... options explicitly
  ip*_mc_gsfget(): lift copyout of struct group_filter into callers
  get rid of compat_mc_getsockopt()
  set_mcast_msfilter(): take the guts of setsockopt(MCAST_MSFILTER) into a 
helper
  ipv4: do compat setsockopt for MCAST_MSFILTER directly
  ip6_mc_msfilter(): pass the address list separately
  ipv6: do compat setsockopt for MCAST_MSFILTER directly
  ipv[46]: do compat setsockopt for MCAST_{JOIN,LEAVE}_GROUP directly
  ipv4: take handling of group_source_req options into a helper
  ipv6: take handling of group_source_req options into a helper
  handle the group_source_req options directly
  get rid of compat_mc_setsockopt()
  batadv_socket_read(): get rid of pointless access_ok()
  atm: separate ATM_GETNAMES handling from the rest of atm_dev_ioctl()
  atm: move copyin from atm_getnames() into the caller
  atm: switch do_atm_iobuf() to direct use of atm_getnames()
  atm: lift copyin from atm_dev_ioctl()
  atm: switch do_atmif_sioc() to direct use of atm_dev_ioctl()
Diffstat:
 include/linux/igmp.h |   2 +-
 include/net/compat.h |  29 +++-
 include/net/ipv6.h   |   5 +-
 net/atm/ioctl.c  |  96 +++--
 net/atm/resources.c  | 108 +-
 net/atm/resources.h  |   5 +-
 net/batman-adv/icmp_socket.c |   3 -
 net/compat.c | 194 -
 net/ipv4/igmp.c  |  18 +--
 net/ipv4/ip_sockglue.c   | 329 ---
 net/ipv6/ipv6_sockglue.c | 233 --
 net/ipv6/mcast.c |  17 +--
 12 files changed, 567 insertions(+), 472 deletions(-)

[PATCH] vfio/pci: fix memory leaks of eventfd ctx

2020-05-10 Thread Qian Cai

Finished a qemu-kvm (-device vfio-pci,host=0001:01:00.0) triggers a few
memory leaks after a while because vfio_pci_set_ctx_trigger_single()
calls eventfd_ctx_fdget() without the matching eventfd_ctx_put() later.
Fix it by calling eventfd_ctx_put() for those memory in
vfio_pci_release() before vfio_device_release().

unreferenced object 0xebff008981cc2b00 (size 128):
  comm "qemu-kvm", pid 4043, jiffies 4294994816 (age 9796.310s)
  hex dump (first 32 bytes):
01 00 00 00 6b 6b 6b 6b 00 00 00 00 ad 4e ad de  .N..
ff ff ff ff 6b 6b 6b 6b ff ff ff ff ff ff ff ff  
  backtrace:
[<917e8f8d>] slab_post_alloc_hook+0x74/0x9c
[] kmem_cache_alloc_trace+0x2b4/0x3d4
[<5fcec025>] do_eventfd+0x54/0x1ac
[<82791a69>] __arm64_sys_eventfd2+0x34/0x44
[] do_el0_svc+0x128/0x1dc
[] el0_sync_handler+0xd0/0x268
[] el0_sync+0x164/0x180
unreferenced object 0x29ff008981cc4180 (size 128):
  comm "qemu-kvm", pid 4043, jiffies 4294994818 (age 9796.290s)
  hex dump (first 32 bytes):
01 00 00 00 6b 6b 6b 6b 00 00 00 00 ad 4e ad de  .N..
ff ff ff ff 6b 6b 6b 6b ff ff ff ff ff ff ff ff  
  backtrace:
[<917e8f8d>] slab_post_alloc_hook+0x74/0x9c
[] kmem_cache_alloc_trace+0x2b4/0x3d4
[<5fcec025>] do_eventfd+0x54/0x1ac
[<82791a69>] __arm64_sys_eventfd2+0x34/0x44
[] do_el0_svc+0x128/0x1dc
[] el0_sync_handler+0xd0/0x268
[] el0_sync+0x164/0x180

Signed-off-by: Qian Cai 
---
 drivers/vfio/pci/vfio_pci.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 6c6b37b5c04e..080e6608f297 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -519,6 +519,10 @@ static void vfio_pci_release(void *device_data)
vfio_pci_vf_token_user_add(vdev, -1);
vfio_spapr_pci_eeh_release(vdev->pdev);
vfio_pci_disable(vdev);
+   if (vdev->err_trigger)
+   eventfd_ctx_put(vdev->err_trigger);
+   if (vdev->req_trigger)
+   eventfd_ctx_put(vdev->req_trigger);
}
 
mutex_unlock(>reflck->lock);
-- 
2.21.0 (Apple Git-122.2)

Re: [patch V4 part 3 12/29] x86/entry/common: Provide idtentry_enter/exit()

2020-05-10 Thread Andy Lutomirski

On Tue, May 5, 2020 at 7:15 AM Thomas Gleixner  wrote:
>
> Provide functions which handle the low level entry and exit similiar to
> enter/exit from user mode.
>

> +
> +/**
> + * idtentry_exit - Common code to handle return from exceptions
> + * @regs:  Pointer to pt_regs (exception entry regs)
> + *
> + * Depending on the return target (kernel/user) this runs the necessary
> + * preemption and work checks if possible and reguired and returns to
> + * the caller with interrupts disabled and no further work pending.
> + *
> + * This is the last action before returning to the low level ASM code which
> + * just needs to return to the appropriate context.
> + *
> + * Invoked by all exception/interrupt IDTENTRY handlers which are not
> + * returning through the paranoid exit path (all except NMI, #DF and the IST
> + * variants of #MC and #DB).

The paranoid-exit bit is not really relevant.  The important part is
which stack we're on.  See below.

> + */
> +void noinstr idtentry_exit(struct pt_regs *regs)
> +{
> +   lockdep_assert_irqs_disabled();

How about:

#ifdef CONFIG_DEBUG_ENTRY
WARN_ON_ONCE(!on_thread_stack());
#endif

> +
> +   /* Check whether this returns to user mode */
> +   if (user_mode(regs)) {
> +   prepare_exit_to_usermode(regs);
> +   } else if (regs->flags & X86_EFLAGS_IF) {
> +   /* Check kernel preemption, if enabled */
> +   if (IS_ENABLED(CONFIG_PREEMPTION)) {
> +   /*
> +* This needs to be done very carefully.
> +* idtentry_enter() invoked rcu_irq_enter(). This
> +* needs to undone before scheduling.
> +*
> +* Preemption is disabled inside of RCU idle
> +* sections. When the task returns from
> +* preempt_schedule_irq(), RCU is still watching.
> +*
> +* rcu_irq_exit_preempt() has additional state
> +* checking if CONFIG_PROVE_RCU=y
> +*/
> +   if (!preempt_count()) {
> +   instr_begin();
> +   rcu_irq_exit_preempt();
> +   if (need_resched())
> +   preempt_schedule_irq();

This is an excellent improvement.  Thanks!

> +   /* Covers both tracing and lockdep */
> +   trace_hardirqs_on();
> +   instr_end();
> +   return;
> +   }
> +   }
> +   instr_begin();
> +   /* Tell the tracer that IRET will enable interrupts */
> +   trace_hardirqs_on_prepare();

Why is trace_hardirqs_on() okay above but not here?  Is it that we
know we weren't RCU-quiescent if we had preemption and IF on?  But
even this code path came from an IF-on context.  I'm confused.  Maybe
some comments as to why this case seems to be ordered so differently
from the !preempt_count() case would be helpful.

> +   lockdep_hardirqs_on_prepare(CALLER_ADDR0);
> +   instr_end();
> +   rcu_irq_exit();
> +   lockdep_hardirqs_on(CALLER_ADDR0);
> +   } else {
> +   /* IRQ flags state is correct already. Just tell RCU */
> +   rcu_irq_exit();
> +   }
> +}
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -7,6 +7,9 @@
>
>  #ifndef __ASSEMBLY__
>
> +void idtentry_enter(struct pt_regs *regs);
> +void idtentry_exit(struct pt_regs *regs);
> +
>  /**
>   * DECLARE_IDTENTRY - Declare functions for simple IDT entry points
>   *   No error code pushed by hardware
>

linux-next: build failure after merge of the block tree

2020-05-10 Thread Stephen Rothwell

Hi all,

After merging the block tree, today's linux-next build (x86_64
allmodconfig) failed like this:

drivers/block/aoe/aoeblk.c: In function 'aoeblk_gdalloc':
drivers/block/aoe/aoeblk.c:410:21: error: 'struct backing_dev_info' has no 
member named 'name'
  410 |  q->backing_dev_info->name = "aoe";
  | ^~

Caused by commit

  1cd925d58385 ("bdi: remove the name field in struct backing_dev_info")

I applied the following patch for today.

From: Stephen Rothwell 
Date: Mon, 11 May 2020 14:19:30 +1000
Subject: [PATCH] bdi: fix up for "remove the name field in struct
 backing_dev_info"

Signed-off-by: Stephen Rothwell 
---
 drivers/block/aoe/aoeblk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index a27804d71e12..5ca7216e9e01 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -407,7 +407,6 @@ aoeblk_gdalloc(void *vp)
WARN_ON(d->gd);
WARN_ON(d->flags & DEVFL_UP);
blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
-   q->backing_dev_info->name = "aoe";
q->backing_dev_info->ra_pages = READ_AHEAD / PAGE_SIZE;
d->bufpool = mp;
d->blkq = gd->queue = q;
-- 
2.26.2

-- 
Cheers,
Stephen Rothwell


pgpnLWgiYehwS.pgp
Description: OpenPGP digital signature

[PATCH] kbuild: make module name conflict fatal error

2020-05-10 Thread Masahiro Yamada

I think all the warnings have been fixed by now. Make it a fatal error.

Check it before modpost because we need to stop building *.ko files.
Also, pass modules.order via a script parameter.

Signed-off-by: Masahiro Yamada 
---

 Makefile |  7 +--
 scripts/modules-check.sh | 16 +---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 1162cb04860c..a2e4d0945500 100644
--- a/Makefile
+++ b/Makefile
@@ -1328,9 +1328,12 @@ all: modules
 # using awk while concatenating to the final file.
 
 PHONY += modules
-modules: $(if $(KBUILD_BUILTIN),vmlinux) modules.order
+modules: $(if $(KBUILD_BUILTIN),vmlinux) modules_check
$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost
-   $(Q)$(CONFIG_SHELL) $(srctree)/scripts/modules-check.sh
+
+PHONY += modules_check
+modules_check: modules.order
+   $(Q)$(CONFIG_SHELL) $(srctree)/scripts/modules-check.sh $<
 
 modules.order: descend
$(Q)$(AWK) '!x[$$0]++' $(addsuffix /$@, $(build-dirs)) > $@
diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh
index f51f446707b8..43de226071ae 100755
--- a/scripts/modules-check.sh
+++ b/scripts/modules-check.sh
@@ -3,14 +3,24 @@
 
 set -e
 
+if [ $# != 1 ]; then
+   echo "Usage: $0 " >& 2
+   exit 1
+fi
+
+exit_code=0
+
 # Check uniqueness of module names
 check_same_name_modules()
 {
-   for m in $(sed 's:.*/::' modules.order | sort | uniq -d)
+   for m in $(sed 's:.*/::' $1 | sort | uniq -d)
do
-   echo "warning: same module names found:" >&2
+   echo "error: the following would cause module name conflict:" 
>&2
sed -n "/\/$m/s:^:  :p" modules.order >&2
+   exit_code=1
done
 }
 
-check_same_name_modules
+check_same_name_modules "$1"
+
+exit $exit_code
-- 
2.25.1

Re: [LKP] Re: [pipe] f2af7d90e2: xfstests.btrfs.052.fail

2020-05-10 Thread Li Zhijian


Hi Matthew

with a quick look into the dmesg
looks this commit broke the preparation of LKP tests

[   32.677588] install debs round two: dpkg -i --force-confdef 
--force-depends /opt/deb/gawk_1%3a4.1.4+dfsg-1_amd64.deb

[ 32.677593]-
[   32.697180] tar: ./control: Cannot write: Invalid argument
[ 32.697184]-
[   32.705025] tar: ./md5sums: Cannot write: Invalid argument
[ 32.705030]-
[   32.710034] tar: ./postinst: Cannot write: Invalid argument
[ 32.710039]-
[   32.743721] tar: ./prerm: Cannot write: Invalid argument

i tried apt command, it also failed with this commit
root@vm-snb-186 ~# apt update
Ign:1 http://linux-ftp.sh.intel.com/pub/mirrors/debian stretch InRelease
Get:2 http://linux-ftp.sh.intel.com/pub/mirrors/debian testing InRelease 
[116 kB]

Ign:2 http://linux-ftp.sh.intel.com/pub/mirrors/debian testing InRelease
Get:3 http://linux-ftp.sh.intel.com/pub/mirrors/debian unstable 
InRelease [146 kB]

Ign:3 http://linux-ftp.sh.intel.com/pub/mirrors/debian unstable InRelease
Get:4 http://linux-ftp.sh.intel.com/pub/mirrors/debian stretch Release 
[118 kB]

Err:4 http://linux-ftp.sh.intel.com/pub/mirrors/debian stretch Release
  Error writing to output file - write (22: Invalid argument)
Get:5 http://linux-ftp.sh.intel.com/pub/mirrors/debian testing Release 
[115 kB]

Err:5 http://linux-ftp.sh.intel.com/pub/mirrors/debian testing Release
  Error writing to output file - write (22: Invalid argument)
Get:6 http://linux-ftp.sh.intel.com/pub/mirrors/debian unstable Release 
[145 kB]
Get:7 http://linux-ftp.sh.intel.com/pub/mirrors/debian unstable 
Release.gpg [1601 B]

Err:7 http://linux-ftp.sh.intel.com/pub/mirrors/debian unstable Release.gpg
  Error writing to output file - write (22: Invalid argument)
Reading package lists... Done
E: The repository 'http://linux-ftp.sh.intel.com/pub/mirrors/debian 
stretch Release' does not have a Release file.
N: Updating from such a repository can't be done securely, and is 
therefore disabled by default.
N: See apt-secure(8) manpage for repository creation and user 
configuration details.
E: The repository 'http://linux-ftp.sh.intel.com/pub/mirrors/debian 
testing Release' does not have a Release file.
N: Updating from such a repository can't be done securely, and is 
therefore disabled by default.
N: See apt-secure(8) manpage for repository creation and user 
configuration details.
E: The repository 'http://linux-ftp.sh.intel.com/pub/mirrors/debian 
unstable Release' is not signed.
N: Updating from such a repository can't be done securely, and is 
therefore disabled by default.
N: See apt-secure(8) manpage for repository creation and user 
configuration details.


everything works well without f2af7d90e2 in our environment


Thanks


On 5/11/20 9:16 AM, Matthew Wilcox wrote:

On Mon, May 11, 2020 at 09:09:57AM +0800, kernel test robot wrote:

 --- tests/btrfs/095.out2020-04-09 10:45:28.0 +0800
 +++ /lkp/benchmarks/xfstests/results//btrfs/095.out.bad2020-05-06 
21:13:51.276485703 +0800
 @@ -1,35 +1,9 @@
  QA output created by 095
 -Blocks modified: [135 - 164]
 -Blocks modified: [768 - 792]
 +awk: line 19: function strtonum never defined
 +awk: line 19: function strtonum never defined
 +awk: line 19: function strtonum never defined
 +awk: line 19: function strtonum never defined

This looks like a problem with the test setup.
___
LKP mailing list -- l...@lists.01.org
To unsubscribe send an email to lkp-le...@lists.01.org

Re: linux-next: build warning after merge of the drm tree

2020-05-10 Thread Randy Dunlap

On 5/10/20 8:47 PM, Stephen Rothwell wrote:
> Hi all,
> 
> After merging the drm tree, today's linux-next build (x86_64 allmodconfig)
> produced this warning:
> 
> WARNING: modpost: missing MODULE_LICENSE() in 
> drivers/gpu/drm/panel/panel-visionox-rm69299.o
> 
> Introduced by commit
> 
>   c7f66d32dd43 ("drm/panel: add support for rm69299 visionox panel")
> 

I posted a patch last week:

https://lore.kernel.org/dri-devel/bbb7b3b3-9968-9a1f-8ef6-2e8e3be99...@infradead.org/

-- 
~Randy

Re: [PATCH V3 1/3] arm64/mm: Drop __HAVE_ARCH_HUGE_PTEP_GET

2020-05-10 Thread Anshuman Khandual




On 05/09/2020 03:39 AM, Mike Kravetz wrote:
> On 5/7/20 8:07 PM, Anshuman Khandual wrote:
>> Platform specific huge_ptep_get() is required only when fetching the huge
>> PTE involves more than just dereferencing the page table pointer. This is
>> not the case on arm64 platform. Hence huge_ptep_pte() can be dropped along
>> with it's __HAVE_ARCH_HUGE_PTEP_GET subscription. Before that, it updates
>> the generic huge_ptep_get() with READ_ONCE() which will prevent known page
>> table issues with THP on arm64.
>>
>> https://lore.kernel.org/r/1506527369-19535-1-git-send-email-will.dea...@arm.com/
>>
>> Cc: Catalin Marinas 
>> Cc: Will Deacon 
>> Cc: Andrew Morton 
>> Cc: linux-arm-ker...@lists.infradead.org
>> Cc: linux...@kvack.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual 
>> ---
>>  arch/arm64/include/asm/hugetlb.h | 6 --
>>  include/asm-generic/hugetlb.h| 2 +-
>>  2 files changed, 1 insertion(+), 7 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/hugetlb.h 
>> b/arch/arm64/include/asm/hugetlb.h
>> index 2eb6c234d594..b88878ddc88b 100644
>> --- a/arch/arm64/include/asm/hugetlb.h
>> +++ b/arch/arm64/include/asm/hugetlb.h
>> @@ -17,12 +17,6 @@
>>  extern bool arch_hugetlb_migration_supported(struct hstate *h);
>>  #endif
>>  
>> -#define __HAVE_ARCH_HUGE_PTEP_GET
>> -static inline pte_t huge_ptep_get(pte_t *ptep)
>> -{
>> -return READ_ONCE(*ptep);
>> -}
>> -
>>  static inline int is_hugepage_only_range(struct mm_struct *mm,
>>   unsigned long addr, unsigned long len)
>>  {
>> diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
>> index 822f433ac95c..40f85decc2ee 100644
>> --- a/include/asm-generic/hugetlb.h
>> +++ b/include/asm-generic/hugetlb.h
>> @@ -122,7 +122,7 @@ static inline int huge_ptep_set_access_flags(struct 
>> vm_area_struct *vma,
>>  #ifndef __HAVE_ARCH_HUGE_PTEP_GET
>>  static inline pte_t huge_ptep_get(pte_t *ptep)
>>  {
>> -return *ptep;
>> +return READ_ONCE(*ptep);
>>  }
>>  #endif
> 
> I know you made this change in response to Will's comment.  And, since
> changes were made to consistently use READ_ONCE in arm64 code, it makes
> sense for that architecture.
> 
> However, with this change to generic code, you introduce READ_ONCE to
> other architectures where it was not used before.  Could this possibly
> introduce inconsistencies in their use of READ_ONCE?  To be honest, I
> am not very good at identifying any possible issues this could cause.
> However, it does seem possible.

Could you please give some more details. Is there any particular problem
which might be caused by this new READ_ONCE() here, that you you are
concerned about. READ_ONCE() is already getting used in multiple places
in core MM which can not be configured out (like mm/gup.c). It is getting
used in core HugeTLB (mm/hugetlb.c) as well. AFAICS, there is no standard
for using READ_ONCE() while walking page tables entries. We have examples
in core MM for both ways.

> 
> Will was nervous about dropping this from arm64.  I'm just a little nervous
> about adding it to other architectures.
> 
AFAICS, __HAVE_ARCH_HUGE_PTEP_GET should be used on a platform only when
a HugeTLB entry could not constructed by dereferencing a page table entry
as in the case with ARM (32 bit). Using READ_ONCE() while dereferencing is
really not a special case that will need __HAVE_ARCH_HUGE_PTEP_GET. Moving
READ_ONCE() into generic definition solves the problem while also taking
care of a known problem on arm64. IMHO, it seems like the right thing to
do unless there is another problem that pops up some where else because of
READ_ONCE().

[PATCH 1/2] kbuild: add this-makefile as a shorthand for $(lastword $(MAKEFILE_LIST))

2020-05-10 Thread Masahiro Yamada

Make it clearer, and self-documenting.

Signed-off-by: Masahiro Yamada 
---

 Makefile | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 119a08c90abb..1a4977ad7cd9 100644
--- a/Makefile
+++ b/Makefile
@@ -157,12 +157,14 @@ MAKEFLAGS += --include-dir=$(abs_srctree)
 need-sub-make := 1
 endif
 
+this-makefile := $(lastword $(MAKEFILE_LIST))
+
 ifneq ($(filter 3.%,$(MAKE_VERSION)),)
 # 'MAKEFLAGS += -rR' does not immediately become effective for GNU Make 3.x
 # We need to invoke sub-make to avoid implicit rules in the top Makefile.
 need-sub-make := 1
 # Cancel implicit rules for this Makefile.
-$(lastword $(MAKEFILE_LIST)): ;
+$(this-makefile): ;
 endif
 
 export abs_srctree abs_objtree
@@ -172,7 +174,7 @@ ifeq ($(need-sub-make),1)
 
 PHONY += $(MAKECMDGOALS) sub-make
 
-$(filter-out _all sub-make $(lastword $(MAKEFILE_LIST)), $(MAKECMDGOALS)) 
_all: sub-make
+$(filter-out _all sub-make $(this-makefile), $(MAKECMDGOALS)) _all: sub-make
@:
 
 # Invoke a second make in the output directory, passing relevant variables
-- 
2.25.1

[PATCH 2/2] kbuild: error out if targets prefixed with '__' are directly run

2020-05-10 Thread Masahiro Yamada

Some targets are internal-use only.

It is tedious to care about "what if __build_one_by_one is contained
in $(MAKECMDGOALS)?" etc.

Prefix internal targets with double underscores. Stop parsing Makefile
if they are directly run.

Signed-off-by: Masahiro Yamada 
---

 Makefile | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 1a4977ad7cd9..1162cb04860c 100644
--- a/Makefile
+++ b/Makefile
@@ -11,9 +11,12 @@ NAME = Kleptomaniac Octopus
 # Comments in this file are targeted only to the developer, do not
 # expect to learn how to build the kernel reading this file.
 
+$(if $(filter __%, $(MAKECMDGOALS)), \
+   $(error targets prefixed with '__' are only for internal use))
+
 # That's our default target when none is given on the command line
-PHONY := _all
-_all:
+PHONY := __all
+__all:
 
 # We are using a recursive build, so we need to do a little thinking
 # to get the ordering right.
@@ -172,13 +175,13 @@ export sub_make_done := 1
 
 ifeq ($(need-sub-make),1)
 
-PHONY += $(MAKECMDGOALS) sub-make
+PHONY += $(MAKECMDGOALS) __sub-make
 
-$(filter-out _all sub-make $(this-makefile), $(MAKECMDGOALS)) _all: sub-make
+$(filter-out $(this-makefile), $(MAKECMDGOALS)) __all: __sub-make
@:
 
 # Invoke a second make in the output directory, passing relevant variables
-sub-make:
+__sub-make:
$(Q)$(MAKE) -C $(abs_objtree) -f $(abs_srctree)/Makefile $(MAKECMDGOALS)
 
 endif # need-sub-make
@@ -323,7 +326,7 @@ ifdef mixed-build
 
 PHONY += $(MAKECMDGOALS) __build_one_by_one
 
-$(filter-out __build_one_by_one, $(MAKECMDGOALS)): __build_one_by_one
+$(MAKECMDGOALS): __build_one_by_one
@:
 
 __build_one_by_one:
@@ -598,12 +601,12 @@ else #!config-build
 # targets and others. In general all targets except *config targets.
 
 # If building an external module we do not care about the all: rule
-# but instead _all depend on modules
+# but instead __all depend on modules
 PHONY += all
 ifeq ($(KBUILD_EXTMOD),)
-_all: all
+__all: all
 else
-_all: modules
+__all: modules
 endif
 
 # Decide whether to build built-in, modular, or both.
@@ -625,7 +628,7 @@ endif
 # in addition to whatever we do anyway.
 # Just "make" or "make all" shall build modules as well
 
-ifneq ($(filter all _all modules nsdeps,$(MAKECMDGOALS)),)
+ifneq ($(filter all modules nsdeps,$(MAKECMDGOALS)),)
   KBUILD_MODULES := 1
 endif
 
-- 
2.25.1

linux-next: build warning after merge of the drm tree

2020-05-10 Thread Stephen Rothwell

Hi all,

After merging the drm tree, today's linux-next build (x86_64 allmodconfig)
produced this warning:

WARNING: modpost: missing MODULE_LICENSE() in 
drivers/gpu/drm/panel/panel-visionox-rm69299.o

Introduced by commit

  c7f66d32dd43 ("drm/panel: add support for rm69299 visionox panel")

-- 
Cheers,
Stephen Rothwell


pgp28GH6j19De.pgp
Description: OpenPGP digital signature

[PATCH] driver/hwmon/nct6775: Use kobj_to_dev() API

2020-05-10 Thread zhouchuangao

Use kobj_to_dev() API instead of container_of().

Signed-off-by: zhouchuangao 
---
 drivers/hwmon/nct6775.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c
index 7efa6bf..e7e1ddc 100644
--- a/drivers/hwmon/nct6775.c
+++ b/drivers/hwmon/nct6775.c
@@ -2047,7 +2047,7 @@ store_temp_beep(struct device *dev, struct 
device_attribute *attr,
 static umode_t nct6775_in_is_visible(struct kobject *kobj,
 struct attribute *attr, int index)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct nct6775_data *data = dev_get_drvdata(dev);
int in = index / 5; /* voltage index */
 
@@ -2253,7 +2253,7 @@ store_fan_pulses(struct device *dev, struct 
device_attribute *attr,
 static umode_t nct6775_fan_is_visible(struct kobject *kobj,
  struct attribute *attr, int index)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct nct6775_data *data = dev_get_drvdata(dev);
int fan = index / 6;/* fan index */
int nr = index % 6; /* attribute index */
@@ -2440,7 +2440,7 @@ store_temp_type(struct device *dev, struct 
device_attribute *attr,
 static umode_t nct6775_temp_is_visible(struct kobject *kobj,
   struct attribute *attr, int index)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct nct6775_data *data = dev_get_drvdata(dev);
int temp = index / 10;  /* temp index */
int nr = index % 10;/* attribute index */
@@ -3257,7 +3257,7 @@ store_auto_temp(struct device *dev, struct 
device_attribute *attr,
 static umode_t nct6775_pwm_is_visible(struct kobject *kobj,
  struct attribute *attr, int index)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct nct6775_data *data = dev_get_drvdata(dev);
int pwm = index / 36;   /* pwm index */
int nr = index % 36;/* attribute index */
@@ -3459,7 +3459,7 @@ static SENSOR_DEVICE_ATTR(beep_enable, S_IWUSR | S_IRUGO, 
show_beep,
 static umode_t nct6775_other_is_visible(struct kobject *kobj,
struct attribute *attr, int index)
 {
-   struct device *dev = container_of(kobj, struct device, kobj);
+   struct device *dev = kobj_to_dev(kobj);
struct nct6775_data *data = dev_get_drvdata(dev);
 
if (index == 0 && !data->have_vid)
-- 
2.7.4

Re: [PATCH v6 1/9] w1_therm: adding code comments and code reordering

2020-05-10 Thread Randy Dunlap

Hi,

A few more comments here (inline):

On 5/10/20 7:15 AM, Akira Shimahara wrote:

>  drivers/w1/slaves/w1_therm.c | 398 ---
>  1 file changed, 232 insertions(+), 166 deletions(-)
> 
> diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
> index 18f08d7..890cf09 100644
> --- a/drivers/w1/slaves/w1_therm.c
> +++ b/drivers/w1/slaves/w1_therm.c
> @@ -41,42 +41,55 @@
>  static int w1_strong_pullup = 1;
>  module_param_named(strong_pullup, w1_strong_pullup, int, 0);
>  
> +/* Helpers Macros */
> +
> +/* return the address of the refcnt in the family data */
> +#define THERM_REFCNT(family_data) \
> + (&((struct w1_therm_family_data *)family_data)->refcnt)
> +
> +/* Structs definition */
> +
> +/**
> + * struct w1_therm_family_converter - bind device specific functions
> + * @broken: flag for non registred families

non-registered

> + * @reserved: not used here
> + * @f: pointer to the device binding structure
> + * @convert: pointer to the device conversion function
> + * @precision: pointer to the device precicion function

precision

> + * @eeprom: pointer to eeprom function
> + */
> +struct w1_therm_family_converter {
> + u8  broken;
> + u16 reserved;
> + struct w1_family*f;
> + int (*convert)(u8 rom[9]);
> + int (*precision)(struct device *device, int val);
> + int (*eeprom)(struct device *device);
> +};
> +
> +/**
> + * struct w1_therm_family_data - device data
> + * @rom: ROM id of the device
> + * @refcnt: ref count
> + */
>  struct w1_therm_family_data {
>   uint8_t rom[9];

Why is "rom" 9 bytes in length?  Does it come from some
spec or standard?  Can it be a macro instead of an arbitrary
magic number?

>   atomic_t refcnt;
>  };
>  
> +/**
> + * struct therm_info - store temperature reading
> + * @rom: readen device data

read

> + * @crc: computed crc from rom
> + * @verdict: 1 crc checked, 0 crc not matching
> + */
>  struct therm_info {
>   u8 rom[9];
>   u8 crc;
>   u8 verdict;
>  };
>  
...

>  
> +/* Interface Functions declaration */
> +
> +/**
> + * w1_therm_add_slave() - Called when a new slave is discovered
> + * @sl: slave just discovered by the master.
> + *
> + * Called by the master when the slave is discovered on the bus.Used to

   bus. Used to

> + * initialized slave state before the beginning of any communication.

  initialize

> + *
> + * Return: 0 - If success, negative kernel code otherwise
> + */
> +static int w1_therm_add_slave(struct w1_slave *sl);
> +
> +/**
> + * w1_therm_remove_slave() - Called when a slave is removed
> + * @sl: slave to be removed.
> + *
> + * Called by the master when the slave is considered not to be on the bus
> + * anymore. Used to free memory.
> + */
> +static void w1_therm_remove_slave(struct w1_slave *sl);
> +
> +/* Family attributes */
> +
>  static struct attribute *w1_therm_attrs[] = {
>   _attr_w1_slave.attr,
>   NULL,
> @@ -101,6 +140,8 @@ static struct attribute *w1_ds28ea00_attrs[] = {
>   NULL,
>  };
>  
> +/* Attribute groups */
> +
>  ATTRIBUTE_GROUPS(w1_therm);
>  ATTRIBUTE_GROUPS(w1_ds28ea00);
>  
> @@ -154,6 +195,8 @@ static const struct hwmon_chip_info w1_chip_info = {
>  #define W1_CHIPINFO  NULL
>  #endif
>  
> +/* Family operations */
> +
>  static struct w1_family_ops w1_therm_fops = {
>   .add_slave  = w1_therm_add_slave,
>   .remove_slave   = w1_therm_remove_slave,
> @@ -168,6 +211,8 @@ static struct w1_family_ops w1_ds28ea00_fops = {
>   .chip_info  = W1_CHIPINFO,
>  };
>  
> +/* Family binding operations struct */
> +
>  static struct w1_family w1_therm_family_DS18S20 = {
>   .fid = W1_THERM_DS18S20,
>   .fops = _therm_fops,
...

> @@ -407,6 +332,7 @@ error:
>   return ret;
>  }
>  
> +/* The return value is millidegrees Centigrade. */

   Celsius. */

>  static inline int w1_DS18B20_convert_temp(u8 rom[9])
>  {
>   s16 t = le16_to_cpup((__le16 *)rom);
> @@ -414,6 +340,7 @@ static inline int w1_DS18B20_convert_temp(u8 rom[9])
>   return t*1000/16;
>  }
>  
> +/* The return value is millidegrees Centigrade. */

ditto.

>  static inline int w1_DS18S20_convert_temp(u8 rom[9])

9?

...

> @@ -564,6 +529,81 @@ error:
>   return ret;
>  }
>  
> +static inline int w1_therm_eeprom(struct device *device)
> +{
> + struct w1_slave *sl = dev_to_w1_slave(device);
> + struct w1_master *dev = sl->master;
> + u8 rom[9], external_power;

9?


thanks.
-- 
~Randy

Re: [PATCH v6 5/9] w1_therm: adding resolution sysfs entry

2020-05-10 Thread Randy Dunlap

Hi,

The kernel-doc comment changes look good.  Thanks for doing that.


On 5/10/20 7:17 AM, Akira Shimahara wrote:

> diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c
> index 08579dc..b1734ae 100644
> --- a/drivers/w1/slaves/w1_therm.c
> +++ b/drivers/w1/slaves/w1_therm.c
> @@ -50,12 +50,24 @@ module_param_named(strong_pullup, w1_strong_pullup, int, 
> 0);
>  
>  /* Helpers Macros */
>  
> +/* return a pointer on the slave w1_therm_family_converter struct:
> + * always test family data existence before
> + */

/*
 * Kernel multi-line comment coding style (except for networking source code)
 * is like this.
 */

(in multiple places)

> +/**
> + * write_scratchpad() - write nb_bytes in the device RAM
> + * @sl: pointer to the slave to write in
> + * @data: pointer to an array of 3 bytes, as 3 bytes MUST be written
> + * @nb_bytes: Nb bytes to be written (2 for DS18S20, 3 for other devices)

If Nb means Number, please spell it out.

> + *
> + * Return: 0 if success, -kernel error code otherwise
> + */
> +static int write_scratchpad(struct w1_slave *sl, const u8 *data, u8 
> nb_bytes);
> +
>  /**
>   * read_powermode() - Query the power mode of the slave
>   * @sl: slave to retrieve the power mode


thanks.
-- 
~Randy

Re: [PATCH v2 0/5] mtd: spi-nor: Add support for Octal 8D-8D-8D mode

2020-05-10 Thread masonccyang



Hi Vignesh,

> >>>
> >>> Our mx25uw51245g supports BFPT DWORD-18,19 and 20 data and xSPI 
> > profile 
> >>> 1.0,
> >>> and it comply with BFPT DWORD-19, octal mode enable sequences by 
write 
> > CFG 
> >>> Reg2 
> >>> with instruction 0x72. Therefore, I can't apply your patches.
> >>
> >> I didn't mean apply my patches directly. I meant more along the lines 
of 
> > 
> >> edit your patches to work on top of my series. It should be as easy 
as 
> >> adding your flash's fixup hooks and its octal DTR enable hook, but if 
my 
> > 
> >> series is missing something you need (like complete Profile 1.0 
parsing, 
> > 
> >> which I left out because I wanted to be conservative and didn't see 
any 
> >> immediate use-case for us), let me know, and we can work together to 
> >> address it.
> > 
> > yes,sure!
> > let's work together to upstream the Octal 8D-8D-8D driver to mainline.
> > 
> > The main concern is where and how to enable xSPI octal mode?
> > 
> > Vignesh don't agree to enable it in fixup hooks and that's why I 
patched
> > it to spi_nor_late_init_params() and confirmed the device support xSPI 

> > Octal mode after BFPT DWORD-19 and xSPI pf 1.0 have been parsed.
> > 
> 
> My suggestion was to use SFDP wherever possible.. E.g: it is possible to
> get opcode extension type from BFPT...
> 
> But using BFPT DWORD-19 is not correct for switching to 8D-8D-8D mode:
> 
> Per JESD216D.01 Bits 22:20 of  19th DWORD of BFPT:
> 
> Octal Enable Requirements:
> 
> This field describes whether the device contains a Octal Enable bit used
> to enable 1-1-8 and 1-
> 8-8 octal read or octal program operations.
> 
> So, this cannot be used for enabling 8D-8D-8D mode... Flashes that only
> support 1S-1S-1S and 8D-8D-8D will set this field to 0.

yes, you are right, the bits 22~20 your mentioned are for 1-1-8 and 1-8-8 
mode enable requirements and they are zero if Flash only supports 
1S-1S-1S,
8S-8S-8S and 8D-8D-8D, just like mx25xx series.

There are bits 8~4 for 8S-8S-8S and 8D-8D-8D mode enable sequences and
I have patched these in this patches. 

By bits 8~4 in 19 th DWORD of BFPT, driver will know enable 8S-8S-8S or
8D-8D-8D by either issue two instruction (06h and E8h) or 
by Write CFG Reg 2.

mx25xx series supports enable Octal 8S-8S-8S/8D-8D-8D mode by Write CFG 
Reg 2.


> 
> There is a separate table to enable 8D mode called
> "Command Sequences to Change to Octal DDR (8D-8D-8D) mode". But if flash
> does not have the table or has bad data, fixup hook is the only way...
> 
> If mx25* supports above table, please build on top of Pratyush's series
> to add support for parsing this table. Otherwise, macronix would have to
> use a fixup hook too...

mx25xx series also supports "Command Sequences to Change to Octal DDR 
(8D-8D-8D) mode" for sure. I will patch them in next version.

For mx25* series, a fixup hook will only setup specific dummy cycles to 
device for various frequency after xSPI 1.0 table has been parsed.


thanks for your time & comments.
Mason


CONFIDENTIALITY NOTE:

This e-mail and any attachments may contain confidential information 
and/or personal data, which is protected by applicable laws. Please be 
reminded that duplication, disclosure, distribution, or use of this e-mail 
(and/or its attachments) or any part thereof is prohibited. If you receive 
this e-mail in error, please notify us immediately and delete this mail as 
well as its attachment(s) from your system. In addition, please be 
informed that collection, processing, and/or use of personal data is 
prohibited unless expressly permitted by personal data protection laws. 
Thank you for your attention and cooperation.

Macronix International Co., Ltd.

=





CONFIDENTIALITY NOTE:

This e-mail and any attachments may contain confidential information and/or 
personal data, which is protected by applicable laws. Please be reminded that 
duplication, disclosure, distribution, or use of this e-mail (and/or its 
attachments) or any part thereof is prohibited. If you receive this e-mail in 
error, please notify us immediately and delete this mail as well as its 
attachment(s) from your system. In addition, please be informed that 
collection, processing, and/or use of personal data is prohibited unless 
expressly permitted by personal data protection laws. Thank you for your 
attention and cooperation.

Macronix International Co., Ltd.

=

linux-next: manual merge of the crypto tree with Linus' tree

2020-05-10 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the crypto tree got conflicts in:

  crypto/lrw.c
  crypto/xts.c

between commit:

  1a263ae60b04 ("gcc-10: avoid shadowing standard library 'free()' in crypto")

from Linus' tree and commit:

  d099ea6e6fde ("crypto - Avoid free() namespace collision")

from the crypto tree.

I fixed it up (I just used the versions from the crypto tree) and can
carry the fix as necessary. This is now fixed as far as linux-next is
concerned, but any non trivial conflicts should be mentioned to your
upstream maintainer when your tree is submitted for merging.  You may
also want to consider cooperating with the maintainer of the conflicting
tree to minimise any particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell


pgpQXFYh2AKcT.pgp
Description: OpenPGP digital signature

Re: [PATCH V2 6/8] phy: tegra: xusb: Add support for charger detect

2020-05-10 Thread Nagarjuna Kristam





On 04-05-2020 21:20, Thierry Reding wrote:


On Mon, May 04, 2020 at 02:32:51PM +0530, Nagarjuna Kristam wrote:

On 28-04-2020 16:25, Thierry Reding wrote:

On Wed, Apr 15, 2020 at 01:55:06PM +0530, Nagarjuna Kristam wrote:

[...]

diff --git a/drivers/phy/tegra/xusb-tegra-cd.c 
b/drivers/phy/tegra/xusb-tegra-cd.c
+static void tegra_xusb_padctl_utmi_pad_dcd(struct tegra_xusb_padctl *padctl,
+ u32 index)
+{
+   u32 value;
+   int dcd_timeout_ms = 0;
+   bool ret = false;
+
+   /* Turn on IDP_SRC */
+   value = padctl_readl(padctl, USB2_BATTERY_CHRG_OTGPADX_CTL0(index));
+   value |= OP_I_SRC_EN;
+   padctl_writel(padctl, value, USB2_BATTERY_CHRG_OTGPADX_CTL0(index));
+
+   /* Turn on D- pull-down resistor */
+   value = padctl_readl(padctl, USB2_BATTERY_CHRG_OTGPADX_CTL1(index));
+   value |= USBON_RPD_OVRD_VAL;
+   padctl_writel(padctl, value, USB2_BATTERY_CHRG_OTGPADX_CTL1(index));
+
+   /* Wait for TDCD_DBNC */
+   usleep_range(1, 12);

  From the comment this looks like we're waiting for some hardware
condition. Can we somehow obtain this rather than implementing a fixed
sleep? Especially since the range here is so large.


As per data sheet we need to wait for 10 micro seconds as settle time.

Okay, so TDCD_DBNC is a value that comes from a timing diagram in a
datasheet? Seems fine to leave it as-is then. Perhaps add parentheses
and mention which exact datasheet that's from, and perhaps which figure
so that people can more easily reference it. Provided there is a
publicly available datasheet, of course.

Will update reference to table in the data sheet where these values are 
recommended. ITs part of BC 1.2 spec from USB.



Actually, one other thing: If the data sheet says to wait 10 us, why do
you use an upper range of 120 us? Shouldn't a range of 10-20 us be good
enough?
Yes, will reduce it to 20ms.


-Nagarjuna

Re: [PATCH 2/5] exec: Directly call security_bprm_set_creds from __do_execve_file

2020-05-10 Thread Kees Cook

On Sat, May 09, 2020 at 02:41:17PM -0500, Eric W. Biederman wrote:
> 
> Now that security_bprm_set_creds is no longer responsible for calling
> cap_bprm_set_creds, security_bprm_set_creds only does something for
> the primary file that is being executed (not any interpreters it may
> have).  Therefore call security_bprm_set_creds from __do_execve_file,
> instead of from prepare_binprm so that it is only called once, and
> remove the now unnecessary called_set_creds field of struct binprm.
> 
> Signed-off-by: "Eric W. Biederman" 
> ---
>  fs/exec.c  | 11 +--
>  include/linux/binfmts.h|  6 --
>  security/apparmor/domain.c |  3 ---
>  security/selinux/hooks.c   |  2 --
>  security/smack/smack_lsm.c |  3 ---
>  security/tomoyo/tomoyo.c   |  6 --
>  6 files changed, 5 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index 765bfd51a546..635b5085050c 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -1635,12 +1635,6 @@ int prepare_binprm(struct linux_binprm *bprm)
>  
>   bprm_fill_uid(bprm);
>  
> - /* fill in binprm security blob */
> - retval = security_bprm_set_creds(bprm);
> - if (retval)
> - return retval;
> - bprm->called_set_creds = 1;
> -
>   retval = cap_bprm_set_creds(bprm);
>   if (retval)
>   return retval;
> @@ -1858,6 +1852,11 @@ static int __do_execve_file(int fd, struct filename 
> *filename,
>   if (retval < 0)
>   goto out;
>  
> + /* fill in binprm security blob */
> + retval = security_bprm_set_creds(bprm);
> + if (retval)
> + goto out;
> +
>   retval = prepare_binprm(bprm);
>   if (retval < 0)
>   goto out;
> 

Here I go with a Sunday night review, so hopefully I'm thinking better
than Friday night's review, but I *think* this patch is broken from
the LSM sense of the world in that security_bprm_set_creds() is getting
called _before_ the creds actually get fully set (in prepare_binprm()
by the calls to bprm_fill_uid(), cap_bprm_set_creds(), and
check_unsafe_exec()).

As a specific example, see the setting of LSM_UNSAFE_NO_NEW_PRIVS in
bprm->unsafe during check_unsafe_exec(), which must happen after
bprm_fill_uid(bprm) and cap_bprm_set_creds(bprm), to have a "true" view
of the execution privileges. Apparmor checks for this flag in its
security_bprm_set_creds() hook. Similarly do selinux, smack, etc...

The security_bprm_set_creds() boundary for LSM is to see the "final"
state of the process privileges, and that needs to happen after
bprm_fill_uid(), cap_bprm_set_creds(), and check_unsafe_exec() have all
finished.

So, as it stands, I don't think this will work, but perhaps it can still
be rearranged to avoid the called_set_creds silliness. I'll look more
this week...

-Kees

-- 
Kees Cook

Re: [PATCH V3 2/3] mm/hugetlb: Define a generic fallback for is_hugepage_only_range()

2020-05-10 Thread Anshuman Khandual




On 05/09/2020 03:52 AM, Mike Kravetz wrote:
> On 5/7/20 8:07 PM, Anshuman Khandual wrote:
>> There are multiple similar definitions for is_hugepage_only_range() on
>> various platforms. Lets just add it's generic fallback definition for
>> platforms that do not override. This help reduce code duplication.
>>
>> Cc: Russell King 
>> Cc: Catalin Marinas 
>> Cc: Will Deacon 
>> Cc: Tony Luck 
>> Cc: Fenghua Yu 
>> Cc: Thomas Bogendoerfer 
>> Cc: "James E.J. Bottomley" 
>> Cc: Helge Deller 
>> Cc: Benjamin Herrenschmidt 
>> Cc: Paul Mackerras 
>> Cc: Michael Ellerman 
>> Cc: Paul Walmsley 
>> Cc: Palmer Dabbelt 
>> Cc: Heiko Carstens 
>> Cc: Vasily Gorbik 
>> Cc: Christian Borntraeger 
>> Cc: Yoshinori Sato 
>> Cc: Rich Felker 
>> Cc: "David S. Miller" 
>> Cc: Thomas Gleixner 
>> Cc: Ingo Molnar 
>> Cc: Borislav Petkov 
>> Cc: "H. Peter Anvin" 
>> Cc: Mike Kravetz 
>> Cc: Andrew Morton 
>> Cc: x...@kernel.org
>> Cc: linux-arm-ker...@lists.infradead.org
>> Cc: linux-i...@vger.kernel.org
>> Cc: linux-m...@vger.kernel.org
>> Cc: linux-par...@vger.kernel.org
>> Cc: linuxppc-...@lists.ozlabs.org
>> Cc: linux-ri...@lists.infradead.org
>> Cc: linux-s...@vger.kernel.org
>> Cc: linux...@vger.kernel.org
>> Cc: sparcli...@vger.kernel.org
>> Cc: linux...@kvack.org
>> Cc: linux-a...@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual 
>> ---
>>  arch/arm/include/asm/hugetlb.h | 6 --
>>  arch/arm64/include/asm/hugetlb.h   | 6 --
>>  arch/ia64/include/asm/hugetlb.h| 1 +
>>  arch/mips/include/asm/hugetlb.h| 7 ---
>>  arch/parisc/include/asm/hugetlb.h  | 6 --
>>  arch/powerpc/include/asm/hugetlb.h | 1 +
>>  arch/riscv/include/asm/hugetlb.h   | 6 --
>>  arch/s390/include/asm/hugetlb.h| 7 ---
>>  arch/sh/include/asm/hugetlb.h  | 6 --
>>  arch/sparc/include/asm/hugetlb.h   | 6 --
>>  arch/x86/include/asm/hugetlb.h | 6 --
>>  include/linux/hugetlb.h| 9 +
>>  12 files changed, 11 insertions(+), 56 deletions(-)
>>
> 
>> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
>> index 43a1cef8f0f1..c01c0c6f7fd4 100644
>> --- a/include/linux/hugetlb.h
>> +++ b/include/linux/hugetlb.h
>> @@ -591,6 +591,15 @@ static inline unsigned int blocks_per_huge_page(struct 
>> hstate *h)
>>  
>>  #include 
>>  
>> +#ifndef is_hugepage_only_range
>> +static inline int is_hugepage_only_range(struct mm_struct *mm,
>> +unsigned long addr, unsigned long len)
>> +{
>> +return 0;
>> +}
>> +#define is_hugepage_only_range is_hugepage_only_range
>> +#endif
>> +
>>  #ifndef arch_make_huge_pte
>>  static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct 
>> *vma,
>> struct page *page, int writable)
>>
> 
> Did you try building without CONFIG_HUGETLB_PAGE defined?  I'm guessing

Yes I did for multiple platforms (s390, arm64, ia64, x86, powerpc etc).

> that you need a stub for is_hugepage_only_range().  Or, perhaps add this
> to asm-generic/hugetlb.h?
> 
There is already a stub (include/linux/hugetlb.h) when !CONFIG_HUGETLB_PAGE.

Re: [PATCH v2 4/6] dmaengine: dw: Print warning if multi-block is unsupported

2020-05-10 Thread Serge Semin

On Fri, May 08, 2020 at 10:06:22PM +0300, Andy Shevchenko wrote:
> On Fri, May 08, 2020 at 12:53:34PM +0100, Mark Brown wrote:
> > On Fri, May 08, 2020 at 02:26:04PM +0300, Andy Shevchenko wrote:
> > > On Fri, May 08, 2020 at 01:53:02PM +0300, Serge Semin wrote:
> > 
> > > > Multi-block support provides a way to map the kernel-specific SG-table 
> > > > so
> > > > the DW DMA device would handle it as a whole instead of handling the
> > > > SG-list items or so called LLP block items one by one. So if true LLP
> > > > list isn't supported by the DW DMA engine, then soft-LLP mode will be
> > > > utilized to load and execute each LLP-block one by one. A problem may
> > > > happen for multi-block DMA slave transfers, when the slave device 
> > > > buffers
> > > > (for example Tx and Rx FIFOs) depend on each other and have size smaller
> > > > than the block size. In this case writing data to the DMA slave Tx 
> > > > buffer
> > > > may cause the Rx buffer overflow if Rx DMA channel is paused to
> > > > reinitialize the DW DMA controller with a next Rx LLP item. In 
> > > > particular
> > > > We've discovered this problem in the framework of the DW APB SPI device
> > 
> > > Mark, do we have any adjustment knobs in SPI core to cope with this?
> > 
> > Frankly I'm not sure I follow what the issue is - is an LLP block item
> > different from a SG list entry?  As far as I can tell the problem is
> > that the DMA controller does not support chaining transactions together
> > and possibly also has a limit on the transfer size?  Or possibly some
> > issue with the DMA controller locking the CPU out of the I/O bus for
> > noticable periods?  I can't really think what we could do about that if
> > the issue is transfer sizes, that just seems like hardware which is
> > never going to work reliably.  If the issue is not being able to chain
> > transfers then possibly an option to linearize messages into a single
> > transfer as suggested to cope with PIO devices with ill considered
> > automated chip select handling, though at some point you have to worry
> > about the cost of the memcpy() vs the cost of just doing PIO.
> 
> My understanding that the programmed transfers (as separate items in SG list)
> can be desynchronized due to LLP emulation in DMA driver. And suggestion
> probably is to use only single entry (block) SG lists will do the trick (I
> guess that we can configure SPI core do or do not change CS between them).

CS has nothing to do with this. The problem is pure in the LLP emulation and Tx
channel being enabled before the Rx channel initialization during the next LLP
reload. Yes, if we have Tx and Rx SG/LLP list consisting of a single item, then
there is no problem. Though it would be good to fix the issue in general instead
of setting such fatal restrictions. If we had some fence of blocking one channel
before another is reinitialized, the problem could theoretically be solved.

It could be an interdependent DMA channels functionality. If two channels are
interdependent than the Rx channel could pause the Tx channel while it's in the
IRQ handling procedure (or at some other point... call a callback?). This 
!might!
fix the problem, but with no 100% guarantee of success. It will work only if IRQ
handler is executed with small latency, so the Tx channel is paused before the 
Rx
FIFO has been filled and overrun.

Another solution could be to reinitialize the interdependent channels
synchronously. Tx channel stops and waits until the Rx channel is finished its
business of data retrieval from SPI Rx FIFO. Though this solution implies
the Tx and Rx buffers of SG/LLP items being of the same size.

Although non of these solutions I really like to spend some time for its
development.

> 
> > > > working in conjunction with DW DMA. Since there is no comprehensive way 
> > > > to
> > > > fix it right now lets at least print a warning for the first found
> > > > multi-blockless DW DMAC channel. This shall point a developer to the
> > > > possible cause of the problem if one would experience a sudden data 
> > > > loss.
> > 
> > I thought from the description of the SPI driver I just reviewed that
> > this hardware didn't have DMA?  Or are there separate blocks in the
> > hardware that have a more standard instantiation of the DesignWare SPI
> > controller with DMA attached?
> 
> I speculate that the right words there should be 'we don't enable DMA right 
> now
> due to some issues' (see above).

It's your speculation and it's kind of offensive implicitly implying I was
lying. If our System SPI controller had DMA I would have said that and would
have made it supported in the driver and probably wouldn't bother with a
dedicated driver development. Again the Baikal-T1 System Boot SPI controller
doesn't have DMA, doesn't have IRQ, is equipped with only 8 bytes FIFO, is
embedded into the Boot Controller, provides a dirmap interface to an SPI flash
and so on. Baikal-T1 has also got two more normal DW APB SSI interfaces

Re: [PATCH v8 09/10] scsi: ufs-exynos: add UFS host support for Exynos SoCs

2020-05-10 Thread Randy Dunlap

On 5/10/20 7:00 PM, Alim Akhtar wrote:
> diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig
> index e2005aeddc2d..cc7e29c8c24f 100644
> --- a/drivers/scsi/ufs/Kconfig
> +++ b/drivers/scsi/ufs/Kconfig
> @@ -160,3 +160,15 @@ config SCSI_UFS_BSG
>  
> Select this if you need a bsg device node for your UFS controller.
> If unsure, say N.
> +
> +config SCSI_UFS_EXYNOS
> + bool "EXYNOS specific hooks to UFS controller platform driver"
> + depends on SCSI_UFSHCD_PLATFORM && ARCH_EXYNOS || COMPILE_TEST

Since && has higher precedence than ||, I am thinking that this should be

depends on SCSI_UFSHCD_PLATFORM && (ARCH_EXYNOS || COMPILE_TEST)

> + select PHY_SAMSUNG_UFS
> + help
> +   This selects the EXYNOS specific additions to UFSHCD platform driver.
> +   UFS host on EXYNOS includes HCI and UNIPRO layer, and associates with
> +   UFS-PHY driver.
> +
> +   Select this if you have UFS host controller on EXYNOS chipset.
> +   If unsure, say N.


-- 
~Randy

Re: [PATCH v2 00/91] drm/vc4: Support BCM2711 Display Pipelin

2020-05-10 Thread Jian-Hong Pan

Jian-Hong Pan  於 2020年5月8日 週五 下午2:20寫道：
>
> Maxime Ripard  於 2020年5月8日 週五 上午1:22寫道：
> >
> > On Mon, May 04, 2020 at 02:35:08PM +0800, Jian-Hong Pan wrote:
> > > Maxime Ripard  於 2020年4月29日 週三 上午12:21寫道：
> > > >
> > > > Hi,
> > > >
> > > > On Mon, Apr 27, 2020 at 03:23:42PM +0800, Jian-Hong Pan wrote:
> > > > > Hi Maxime,
> > > > >
> > > > > Thanks for your V2 patch series!  I'm testing it.
> > > > >
> > > > > This patch series is applied upon mainline kernel 5.7-rc2 cleanly and 
> > > > > built.
> > > > > System can boot into console text mode, but no graphic UI.
> > > > >
> > > > > Get the error in vc5_hdmi_phy_init(), and full dmesg is at [1]:
> > > > >
> > > > > [5.587543] vc4_hdmi fef00700.hdmi: Unknown register ID 46
> > > > > [5.587700] debugfs: Directory 'fef00700.hdmi' with parent 
> > > > > 'vc4-hdmi' already present!
> > > > > [5.588070] vc4_hdmi fef00700.hdmi: vc4-hdmi-hifi <-> 
> > > > > fef00700.hdmi mapping ok
> > > > > [5.588076] vc4_hdmi fef00700.hdmi: ASoC: no DMI vendor name!
> > > > > [5.588263] vc4-drm gpu: bound fef00700.hdmi (ops vc4_hdmi_ops)
> > > > > [5.588299] vc4_hdmi fef05700.hdmi: Unknown register ID 46
> > > > > [5.588373] debugfs: Directory 'vc4-hdmi' with parent 'asoc' 
> > > > > already present!
> > > > > [5.588673] vc4_hdmi fef05700.hdmi: vc4-hdmi-hifi <-> 
> > > > > fef05700.hdmi mapping ok
> > > > > [5.588677] vc4_hdmi fef05700.hdmi: ASoC: no DMI vendor name!
> > > > > [5.588809] vc4-drm gpu: bound fef05700.hdmi (ops vc4_hdmi_ops)
> > > > > [5.588854] vc4-drm gpu: bound fe806000.vec (ops vc4_vec_ops)
> > > > > [5.588897] vc4-drm gpu: bound fe004000.txp (ops vc4_txp_ops)
> > > > > [5.588934] vc4-drm gpu: bound fe40.hvs (ops vc4_hvs_ops)
> > > > > [5.588990] vc4-drm gpu: bound fe206000.pixelvalve (ops 
> > > > > vc4_crtc_ops)
> > > > > [5.589030] vc4-drm gpu: bound fe207000.pixelvalve (ops 
> > > > > vc4_crtc_ops)
> > > > > [5.589074] vc4-drm gpu: bound fe20a000.pixelvalve (ops 
> > > > > vc4_crtc_ops)
> > > > > [5.589106] vc4-drm gpu: bound fe216000.pixelvalve (ops 
> > > > > vc4_crtc_ops)
> > > > > [5.589145] vc4-drm gpu: bound fec12000.pixelvalve (ops 
> > > > > vc4_crtc_ops)
> > > > > [5.589294] checking generic (3e513000 6d8c00) vs hw (0 
> > > > > )
> > > > > [5.589297] fb0: switching to vc4drmfb from simple
> > > > > [5.589433] Console: switching to colour dummy device 80x25
> > > > > [5.589481] [drm] Supports vblank timestamp caching Rev 2 
> > > > > (21.10.2013).
> > > > > [5.589816] [drm] Initialized vc4 0.0.0 20140616 for gpu on minor 0
> > > > > [5.601079] [ cut here ]
> > > > > [5.601095] WARNING: CPU: 2 PID: 127 at 
> > > > > drivers/gpu/drm/vc4/vc4_hdmi_phy.c:413 vc5_hdmi_phy_init+0x7ac/0x2078
> > > > > [5.601097] Modules linked in:
> > > > > [5.601103] CPU: 2 PID: 127 Comm: kworker/2:1 Not tainted 
> > > > > 5.7.0-rc2-00091-ga181df59a930 #7
> > > > > [5.601105] Hardware name: Raspberry Pi 4 Model B (DT)
> > > > > [5.601112] Workqueue: events deferred_probe_work_func
> > > > > [5.601116] pstate: 2005 (nzCv daif -PAN -UAO)
> > > > > [5.601119] pc : vc5_hdmi_phy_init+0x7ac/0x2078
> > > > > [5.601123] lr : vc4_hdmi_encoder_enable+0x1b8/0x1ac0
> > > > > [5.601124] sp : 80001217b410
> > > > > [5.601126] x29: 80001217b410 x28: ec6370f0
> > > > > [5.601129] x27: f650d400 x26: 8a50
> > > > > [5.601132] x25: 8000113b4ac0 x24: 2060
> > > > > [5.601135] x23: 0a50 x22: 0300
> > > > > [5.601137] x21: 08d9ee20 x20: ec535080
> > > > > [5.601140] x19: 00010989e7c0 x18: 
> > > > > [5.601142] x17: 0001 x16: 5207
> > > > > [5.601145] x15: 4932ad293c92 x14: 0137
> > > > > [5.601147] x13: 800010015000 x12: 0001
> > > > > [5.601150] x11: 0001 x10: 
> > > > > [5.601152] x9 :  x8 : 800010015038
> > > > > [5.601154] x7 : 0001 x6 : 80001217b368
> > > > > [5.601157] x5 :  x4 : 004c
> > > > > [5.601159] x3 :  x2 : 8000113b4ac0
> > > > > [5.601162] x1 : 8000120c5f44 x0 : dc8984ff
> > > > > [5.601164] Call trace:
> > > > > [5.601169]  vc5_hdmi_phy_init+0x7ac/0x2078
> > > > > [5.601172]  vc4_hdmi_encoder_enable+0x1b8/0x1ac0
> > > > > [5.601176]  drm_atomic_helper_commit_modeset_enables+0x224/0x248
> > > > > [5.601179]  vc4_atomic_complete_commit+0x400/0x558
> > > > > [5.601182]  vc4_atomic_commit+0x1e0/0x200
> > > > > [5.601185]  drm_atomic_commit+0x4c/0x60
> > > > > [5.601190]  drm_client_modeset_commit_atomic.isra.0+0x17c/0x238
> > > > > [5.601192]  drm_client_modeset_commit_locked+0x5c/0x198
> > > > > [5.601195]

Re: [PATCH net v3] hinic: fix a bug of ndo_stop

2020-05-10 Thread Jakub Kicinski

On Sun, 10 May 2020 19:01:08 + Luo bin wrote:
> if some function in ndo_stop interface returns failure because of
> hardware fault, must go on excuting rest steps rather than return
> failure directly, otherwise will cause memory leak.And bump the
> timeout for SET_FUNC_STATE to ensure that cmd won't return failure
> when hw is busy. Otherwise hw may stomp host memory if we free
> memory regardless of the return value of SET_FUNC_STATE.
> 
> Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface")
> Signed-off-by: Luo bin 

Applied, thank you.

linux-next: build warning after merge of the ipsec-next tree

2020-05-10 Thread Stephen Rothwell

Hi all,

After merging the ipsec-next tree, today's linux-next build (arm
multi_v7_defconfig) produced this warning:

net/ipv4/xfrm4_output.c: In function '__xfrm4_output':
net/ipv4/xfrm4_output.c:19:21: warning: unused variable 'x' [-Wunused-variable]
   19 |  struct xfrm_state *x = skb_dst(skb)->xfrm;
  | ^

Introduced by commit

  2ab6096db2f1 ("xfrm: remove output_finish indirection from xfrm_state_afinfo")

-- 
Cheers,
Stephen Rothwell


pgpCPEdUsIFs4.pgp
Description: OpenPGP digital signature

linux-next: manual merge of the ipsec-next tree with Linus' tree

2020-05-10 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the ipsec-next tree got conflicts in:

  net/ipv4/xfrm4_output.c
  net/ipv6/xfrm6_output.c

between commit:

  0c922a4850eb ("xfrm: Always set XFRM_TRANSFORMED in xfrm{4,6}_output_finish")

from Linus' tree and commit:

  2ab6096db2f1 ("xfrm: remove output_finish indirection from xfrm_state_afinfo")

from the ipsec-next tree.

I fixed it up (I used the latter versions of these files and then added
the following patch) and can carry the fix as necessary. This is now fixed
as far as linux-next is concerned, but any non trivial conflicts should
be mentioned to your upstream maintainer when your tree is submitted for
merging.  You may also want to consider cooperating with the maintainer
of the conflicting tree to minimise any particularly complex conflicts.

From: Stephen Rothwell 
Date: Mon, 11 May 2020 12:57:24 +1000
Subject: [PATCH] xfrm: merge fixup for "remove output_finish indirection from 
xfrm_state_afinfo"

Signed-off-by: Stephen Rothwell 
---
 net/xfrm/xfrm_output.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 886a9b284b3a..0f4b3a5e02ba 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -574,16 +574,12 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
switch (x->outer_mode.family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
break;
case AF_INET6:
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
 
-#ifdef CONFIG_NETFILTER
IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
break;
}
 
-- 
2.26.2

-- 
Cheers,
Stephen Rothwell


pgpc3BxMZ5ZR1.pgp
Description: OpenPGP digital signature

[PATCH v5 16/16] powerpc/watchpoint/xmon: Support 2nd DAWR

2020-05-10 Thread Ravi Bangoria

Add support for 2nd DAWR in xmon. With this, we can have two
simultaneous breakpoints from xmon.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/xmon/xmon.c | 101 ++-
 1 file changed, 69 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 99e9138661e4..01da49b666db 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -111,7 +111,7 @@ struct bpt {
 
 #define NBPTS  256
 static struct bpt bpts[NBPTS];
-static struct bpt dabr;
+static struct bpt dabr[HBP_NUM_MAX];
 static struct bpt *iabr;
 static unsigned bpinstr = 0x7fe8;  /* trap */
 
@@ -787,10 +787,17 @@ static int xmon_sstep(struct pt_regs *regs)
 
 static int xmon_break_match(struct pt_regs *regs)
 {
+   int i;
+
if ((regs->msr & (MSR_IR|MSR_PR|MSR_64BIT)) != (MSR_IR|MSR_64BIT))
return 0;
-   if (dabr.enabled == 0)
-   return 0;
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (dabr[i].enabled)
+   goto found;
+   }
+   return 0;
+
+found:
xmon_core(regs, 0);
return 1;
 }
@@ -929,13 +936,16 @@ static void insert_bpts(void)
 
 static void insert_cpu_bpts(void)
 {
+   int i;
struct arch_hw_breakpoint brk;
 
-   if (dabr.enabled) {
-   brk.address = dabr.address;
-   brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | 
HW_BRK_TYPE_PRIV_ALL;
-   brk.len = DABR_MAX_LEN;
-   __set_breakpoint(0, );
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (dabr[i].enabled) {
+   brk.address = dabr[i].address;
+   brk.type = (dabr[i].enabled & HW_BRK_TYPE_DABR) | 
HW_BRK_TYPE_PRIV_ALL;
+   brk.len = 8;
+   __set_breakpoint(i, );
+   }
}
 
if (iabr)
@@ -1349,6 +1359,35 @@ static long check_bp_loc(unsigned long addr)
return 1;
 }
 
+static int find_free_data_bpt(void)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!dabr[i].enabled)
+   return i;
+   }
+   printf("Couldn't find free breakpoint register\n");
+   return -1;
+}
+
+static void print_data_bpts(void)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!dabr[i].enabled)
+   continue;
+
+   printf("   data   "REG"  [", dabr[i].address);
+   if (dabr[i].enabled & 1)
+   printf("r");
+   if (dabr[i].enabled & 2)
+   printf("w");
+   printf("]\n");
+   }
+}
+
 static char *breakpoint_help_string =
 "Breakpoint command usage:\n"
 "bshow breakpoints\n"
@@ -1382,10 +1421,9 @@ bpt_cmds(void)
printf("Hardware data breakpoint not supported on this 
cpu\n");
break;
}
-   if (dabr.enabled) {
-   printf("Couldn't find free breakpoint register\n");
+   i = find_free_data_bpt();
+   if (i < 0)
break;
-   }
mode = 7;
cmd = inchar();
if (cmd == 'r')
@@ -1394,15 +1432,15 @@ bpt_cmds(void)
mode = 6;
else
termch = cmd;
-   dabr.address = 0;
-   dabr.enabled = 0;
-   if (scanhex()) {
-   if (!is_kernel_addr(dabr.address)) {
+   dabr[i].address = 0;
+   dabr[i].enabled = 0;
+   if (scanhex([i].address)) {
+   if (!is_kernel_addr(dabr[i].address)) {
printf(badaddr);
break;
}
-   dabr.address &= ~HW_BRK_TYPE_DABR;
-   dabr.enabled = mode | BP_DABR;
+   dabr[i].address &= ~HW_BRK_TYPE_DABR;
+   dabr[i].enabled = mode | BP_DABR;
}
 
force_enable_xmon();
@@ -1441,7 +1479,9 @@ bpt_cmds(void)
for (i = 0; i < NBPTS; ++i)
bpts[i].enabled = 0;
iabr = NULL;
-   dabr.enabled = 0;
+   for (i = 0; i < nr_wp_slots(); i++)
+   dabr[i].enabled = 0;
+
printf("All breakpoints cleared\n");
break;
}
@@ -1475,14 +1515,7 @@ bpt_cmds(void)
if (xmon_is_ro || !scanhex()) {
/* print all breakpoints */
printf("   typeaddress\n");
-   if (dabr.enabled) {
-   printf("   data   "REG"  [",

[PATCH v5 09/16] powerpc/watchpoint: Convert thread_struct->hw_brk to an array

2020-05-10 Thread Ravi Bangoria

So far powerpc hw supported only one watchpoint. But Power10 is
introducing 2nd DAWR. Convert thread_struct->hw_brk into an array.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/processor.h  |  2 +-
 arch/powerpc/kernel/process.c | 60 ++-
 arch/powerpc/kernel/ptrace/ptrace-noadv.c | 40 ++-
 arch/powerpc/kernel/ptrace/ptrace32.c |  4 +-
 arch/powerpc/kernel/signal.c  | 13 +++--
 5 files changed, 78 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index a71bdd6bc284..668c02c67b61 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -187,7 +187,7 @@ struct thread_struct {
 */
struct perf_event *last_hit_ubp;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
-   struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
+   struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint 
info */
unsigned long   trap_nr;/* last trap # on this thread */
u8 load_slb;/* Ages out SLB preload cache entries */
u8 load_fp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 351fbd8d2c5b..6d1b7cede900 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -711,21 +711,49 @@ void switch_booke_debug_regs(struct debug_reg *new_debug)
 EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 #else  /* !CONFIG_PPC_ADV_DEBUG_REGS */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-static void set_breakpoint(struct arch_hw_breakpoint *brk)
+static void set_breakpoint(int i, struct arch_hw_breakpoint *brk)
 {
preempt_disable();
-   __set_breakpoint(0, brk);
+   __set_breakpoint(i, brk);
preempt_enable();
 }
 
 static void set_debug_reg_defaults(struct thread_struct *thread)
 {
-   thread->hw_brk.address = 0;
-   thread->hw_brk.type = 0;
-   thread->hw_brk.len = 0;
-   thread->hw_brk.hw_len = 0;
-   if (ppc_breakpoint_available())
-   set_breakpoint(>hw_brk);
+   int i;
+   struct arch_hw_breakpoint null_brk = {0};
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   thread->hw_brk[i] = null_brk;
+   if (ppc_breakpoint_available())
+   set_breakpoint(i, >hw_brk[i]);
+   }
+}
+
+static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
+   struct arch_hw_breakpoint *b)
+{
+   if (a->address != b->address)
+   return false;
+   if (a->type != b->type)
+   return false;
+   if (a->len != b->len)
+   return false;
+   /* no need to check hw_len. it's calculated from address and len */
+   return true;
+}
+
+static void switch_hw_breakpoint(struct task_struct *new)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (likely(hw_brk_match(this_cpu_ptr(_brk[i]),
+   >thread.hw_brk[i])))
+   continue;
+
+   __set_breakpoint(i, >thread.hw_brk[i]);
+   }
 }
 #endif /* !CONFIG_HAVE_HW_BREAKPOINT */
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
@@ -829,19 +857,6 @@ bool ppc_breakpoint_available(void)
 }
 EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
 
-static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
- struct arch_hw_breakpoint *b)
-{
-   if (a->address != b->address)
-   return false;
-   if (a->type != b->type)
-   return false;
-   if (a->len != b->len)
-   return false;
-   /* no need to check hw_len. it's calculated from address and len */
-   return true;
-}
-
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
 static inline bool tm_enabled(struct task_struct *tsk)
@@ -1174,8 +1189,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
  * schedule DABR
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-   if (unlikely(!hw_brk_match(this_cpu_ptr(_brk[0]), 
>thread.hw_brk)))
-   __set_breakpoint(0, >thread.hw_brk);
+   switch_hw_breakpoint(new);
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
 
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c 
b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 12962302d6a4..0dbb35392dd2 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -67,11 +67,16 @@ int ptrace_get_debugreg(struct task_struct *child, unsigned 
long addr,
/* We only support one DABR and no IABRS at the moment */
if (addr > 0)
return -EINVAL;
-   dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
-(child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
+   dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+(child->thread.hw_brk[0].type &

[PATCH v5 15/16] powerpc/watchpoint/xmon: Don't allow breakpoint overwriting

2020-05-10 Thread Ravi Bangoria

Xmon allows overwriting breakpoints because it's supported by only
one DAWR. But with multiple DAWRs, overwriting becomes ambiguous
or unnecessary complicated. So let's not allow it.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/xmon/xmon.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index d8c0f01e4b24..99e9138661e4 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1382,6 +1382,10 @@ bpt_cmds(void)
printf("Hardware data breakpoint not supported on this 
cpu\n");
break;
}
+   if (dabr.enabled) {
+   printf("Couldn't find free breakpoint register\n");
+   break;
+   }
mode = 7;
cmd = inchar();
if (cmd == 'r')
-- 
2.21.1

[PATCH v5 10/16] powerpc/watchpoint: Use loop for thread_struct->ptrace_bps

2020-05-10 Thread Ravi Bangoria

ptrace_bps is already an array of size HBP_NUM_MAX. But we use
hardcoded index 0 while fetching/updating it. Convert such code
to loop over array.

ptrace interface to use multiple watchpoint remains same. eg:
two PPC_PTRACE_SETHWDEBUG calls will create two watchpoint if
underneath hw supports it.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/kernel/hw_breakpoint.c   |  7 --
 arch/powerpc/kernel/process.c |  6 -
 arch/powerpc/kernel/ptrace/ptrace-noadv.c | 28 +--
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 5826f1f2cab9..772b2c953220 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -419,10 +419,13 @@ NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
  */
 void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
 {
+   int i;
struct thread_struct *t = >thread;
 
-   unregister_hw_breakpoint(t->ptrace_bps[0]);
-   t->ptrace_bps[0] = NULL;
+   for (i = 0; i < nr_wp_slots(); i++) {
+   unregister_hw_breakpoint(t->ptrace_bps[i]);
+   t->ptrace_bps[i] = NULL;
+   }
 }
 
 void hw_breakpoint_pmu_read(struct perf_event *bp)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6d1b7cede900..41a59a37383b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1604,6 +1604,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long usp,
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
struct thread_info *ti = task_thread_info(p);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+   int i;
+#endif
 
klp_init_thread_info(p);
 
@@ -1663,7 +1666,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long usp,
p->thread.ksp_limit = (unsigned long)end_of_stack(p);
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-   p->thread.ptrace_bps[0] = NULL;
+   for (i = 0; i < nr_wp_slots(); i++)
+   p->thread.ptrace_bps[i] = NULL;
 #endif
 
p->thread.fp_save_area = NULL;
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c 
b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 0dbb35392dd2..08cb8c1b504c 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -168,6 +168,19 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned 
long addr, unsigned l
return 0;
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static int find_empty_ptrace_bp(struct thread_struct *thread)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (!thread->ptrace_bps[i])
+   return i;
+   }
+   return -1;
+}
+#endif
+
 static int find_empty_hw_brk(struct thread_struct *thread)
 {
int i;
@@ -217,8 +230,9 @@ long ppc_set_hwdebug(struct task_struct *child, struct 
ppc_hw_breakpoint *bp_inf
len = 1;
else
return -EINVAL;
-   bp = thread->ptrace_bps[0];
-   if (bp)
+
+   i = find_empty_ptrace_bp(thread);
+   if (i < 0)
return -ENOSPC;
 
/* Create a new breakpoint request if one doesn't exist already */
@@ -228,13 +242,13 @@ long ppc_set_hwdebug(struct task_struct *child, struct 
ppc_hw_breakpoint *bp_inf
arch_bp_generic_fields(brk.type, _type);
 
bp = register_user_hw_breakpoint(, ptrace_triggered, NULL, child);
-   thread->ptrace_bps[0] = bp;
+   thread->ptrace_bps[i] = bp;
if (IS_ERR(bp)) {
-   thread->ptrace_bps[0] = NULL;
+   thread->ptrace_bps[i] = NULL;
return PTR_ERR(bp);
}
 
-   return 1;
+   return i + 1;
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
@@ -263,10 +277,10 @@ long ppc_del_hwdebug(struct task_struct *child, long data)
return -EINVAL;
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-   bp = thread->ptrace_bps[0];
+   bp = thread->ptrace_bps[data - 1];
if (bp) {
unregister_hw_breakpoint(bp);
-   thread->ptrace_bps[0] = NULL;
+   thread->ptrace_bps[data - 1] = NULL;
} else {
ret = -ENOENT;
}
-- 
2.21.1

[PATCH v5 13/16] powerpc/watchpoint: Prepare handler to handle more than one watcnhpoint

2020-05-10 Thread Ravi Bangoria

Currently we assume that we have only one watchpoint supported by hw.
Get rid of that assumption and use dynamic loop instead. This should
make supporting more watchpoints very easy.

With more than one watchpoint, exception handler needs to know which
DAWR caused the exception, and hw currently does not provide it. So
we need sw logic for the same. To figure out which DAWR caused the
exception, check all different combinations of user specified range,
DAWR address range, actual access range and DAWRX constrains. For ex,
if user specified range and actual access range overlaps but DAWRX is
configured for readonly watchpoint and the instruction is store, this
DAWR must not have caused exception.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/processor.h |   2 +-
 arch/powerpc/include/asm/sstep.h |   2 +
 arch/powerpc/kernel/hw_breakpoint.c  | 400 +--
 arch/powerpc/kernel/process.c|   3 -
 4 files changed, 315 insertions(+), 92 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 668c02c67b61..251f50eec9fa 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -185,7 +185,7 @@ struct thread_struct {
 * Helps identify source of single-step exception and subsequent
 * hw-breakpoint enablement
 */
-   struct perf_event *last_hit_ubp;
+   struct perf_event *last_hit_ubp[HBP_NUM_MAX];
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint 
info */
unsigned long   trap_nr;/* last trap # on this thread */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 769f055509c9..38919b27a6fa 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -48,6 +48,8 @@ enum instruction_type {
 
 #define INSTR_TYPE_MASK0x1f
 
+#define OP_IS_LOAD(type)   ((LOAD <= (type) && (type) <= LOAD_VSX) || 
(type) == LARX)
+#define OP_IS_STORE(type)  ((STORE <= (type) && (type) <= STORE_VSX) || 
(type) == STCX)
 #define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX)
 
 /* Compute flags, ORed in with type */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index ab0dd22fed5f..28d57d841642 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -30,7 +30,7 @@
  * Stores the breakpoints currently in use on each breakpoint address
  * register for every cpu
  */
-static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
 
 /*
  * Returns total number of data or instruction breakpoints available.
@@ -42,6 +42,17 @@ int hw_breakpoint_slots(int type)
return 0;   /* no instruction breakpoints available */
 }
 
+static bool single_step_pending(void)
+{
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   if (current->thread.last_hit_ubp[i])
+   return true;
+   }
+   return false;
+}
+
 /*
  * Install a perf counter breakpoint.
  *
@@ -54,16 +65,26 @@ int hw_breakpoint_slots(int type)
 int arch_install_hw_breakpoint(struct perf_event *bp)
 {
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-   struct perf_event **slot = this_cpu_ptr(_per_reg);
+   struct perf_event **slot;
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++) {
+   slot = this_cpu_ptr(_per_reg[i]);
+   if (!*slot) {
+   *slot = bp;
+   break;
+   }
+   }
 
-   *slot = bp;
+   if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+   return -EBUSY;
 
/*
 * Do not install DABR values if the instruction must be single-stepped.
 * If so, DABR will be populated in single_step_dabr_instruction().
 */
-   if (current->thread.last_hit_ubp != bp)
-   __set_breakpoint(0, info);
+   if (!single_step_pending())
+   __set_breakpoint(i, info);
 
return 0;
 }
@@ -79,15 +100,22 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
  */
 void arch_uninstall_hw_breakpoint(struct perf_event *bp)
 {
-   struct perf_event **slot = this_cpu_ptr(_per_reg);
+   struct arch_hw_breakpoint null_brk = {0};
+   struct perf_event **slot;
+   int i;
 
-   if (*slot != bp) {
-   WARN_ONCE(1, "Can't find the breakpoint");
-   return;
+   for (i = 0; i < nr_wp_slots(); i++) {
+   slot = this_cpu_ptr(_per_reg[i]);
+   if (*slot == bp) {
+   *slot = NULL;
+   break;
+   }
}
 
-   *slot = NULL;
-   hw_breakpoint_disable();
+   if (WARN_ONCE(i == nr_wp_slots(), "Can't find

[PATCH v5 14/16] powerpc/watchpoint: Don't allow concurrent perf and ptrace events

2020-05-10 Thread Ravi Bangoria

With Book3s DAWR, ptrace and perf watchpoints on powerpc behaves
differently. Ptrace watchpoint works in one-shot mode and generates
signal before executing instruction. It's ptrace user's job to
single-step the instruction and re-enable the watchpoint. OTOH, in
case of perf watchpoint, kernel emulates/single-steps the instruction
and then generates event. If perf and ptrace creates two events with
same or overlapping address ranges, it's ambiguous to decide who
should single-step the instruction. Because of this issue, don't
allow perf and ptrace watchpoint at the same time if their address
range overlaps.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/hw_breakpoint.h |   2 +
 arch/powerpc/kernel/hw_breakpoint.c  | 221 +++
 kernel/events/hw_breakpoint.c|  16 ++
 3 files changed, 239 insertions(+)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index add5aa076919..f42a55eb77d2 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -70,6 +70,8 @@ extern int hw_breakpoint_exceptions_notify(struct 
notifier_block *unused,
unsigned long val, void *data);
 int arch_install_hw_breakpoint(struct perf_event *bp);
 void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+int arch_reserve_bp_slot(struct perf_event *bp);
+void arch_release_bp_slot(struct perf_event *bp);
 void arch_unregister_hw_breakpoint(struct perf_event *bp);
 void hw_breakpoint_pmu_read(struct perf_event *bp);
 extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 28d57d841642..c8623708c9c7 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -123,6 +123,227 @@ static bool is_ptrace_bp(struct perf_event *bp)
return bp->overflow_handler == ptrace_triggered;
 }
 
+struct breakpoint {
+   struct list_head list;
+   struct perf_event *bp;
+   bool ptrace_bp;
+};
+
+static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
+static LIST_HEAD(task_bps);
+
+static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+   if (!tmp)
+   return ERR_PTR(-ENOMEM);
+   tmp->bp = bp;
+   tmp->ptrace_bp = is_ptrace_bp(bp);
+   return tmp;
+}
+
+static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event 
*bp2)
+{
+   __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr;
+
+   bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE);
+   bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, 
HW_BREAKPOINT_SIZE);
+   bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE);
+   bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, 
HW_BREAKPOINT_SIZE);
+
+   return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr);
+}
+
+static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp)
+{
+   return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp;
+}
+
+static bool can_co_exist(struct breakpoint *b, struct perf_event *bp)
+{
+   return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp));
+}
+
+static int task_bps_add(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   tmp = alloc_breakpoint(bp);
+   if (IS_ERR(tmp))
+   return PTR_ERR(tmp);
+
+   list_add(>list, _bps);
+   return 0;
+}
+
+static void task_bps_remove(struct perf_event *bp)
+{
+   struct list_head *pos, *q;
+
+   list_for_each_safe(pos, q, _bps) {
+   struct breakpoint *tmp = list_entry(pos, struct breakpoint, 
list);
+
+   if (tmp->bp == bp) {
+   list_del(>list);
+   kfree(tmp);
+   break;
+   }
+   }
+}
+
+/*
+ * If any task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool all_task_bps_check(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   list_for_each_entry(tmp, _bps, list) {
+   if (!can_co_exist(tmp, bp))
+   return true;
+   }
+   return false;
+}
+
+/*
+ * If same task has breakpoint from alternate infrastructure,
+ * return true. Otherwise return false.
+ */
+static bool same_task_bps_check(struct perf_event *bp)
+{
+   struct breakpoint *tmp;
+
+   list_for_each_entry(tmp, _bps, list) {
+   if (tmp->bp->hw.target == bp->hw.target &&
+   !can_co_exist(tmp, bp))
+   return true;
+   }
+   return false;
+}
+
+static int cpu_bps_add(struct perf_event *bp)
+{
+   struct breakpoint **cpu_bp;
+   struct breakpoint *tmp;
+   int i = 0;
+
+   tmp =

[PATCH v5 08/16] powerpc/watchpoint: Disable all available watchpoints when !dawr_force_enable

2020-05-10 Thread Ravi Bangoria

Instead of disabling only first watchpoint, disable all available
watchpoints while clearing dawr_force_enable.

Callback function is used only for disabling watchpoint, rename it
to disable_dawrs_cb(). And null_brk parameter is not really required
while disabling watchpoint, remove it.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/kernel/dawr.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 8114ad3a8574..500f52fa4711 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -50,9 +50,13 @@ int set_dawr(int nr, struct arch_hw_breakpoint *brk)
return 0;
 }
 
-static void set_dawr_cb(void *info)
+static void disable_dawrs_cb(void *info)
 {
-   set_dawr(0, info);
+   struct arch_hw_breakpoint null_brk = {0};
+   int i;
+
+   for (i = 0; i < nr_wp_slots(); i++)
+   set_dawr(i, _brk);
 }
 
 static ssize_t dawr_write_file_bool(struct file *file,
@@ -74,7 +78,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
 
/* If we are clearing, make sure all CPUs have the DAWR cleared */
if (!dawr_force_enable)
-   smp_call_function(set_dawr_cb, _brk, 0);
+   smp_call_function(disable_dawrs_cb, NULL, 0);
 
return rc;
 }
-- 
2.21.1

[PATCH v5 12/16] powerpc/watchpoint: Use builtin ALIGN*() macros

2020-05-10 Thread Ravi Bangoria

Currently we calculate hw aligned start and end addresses manually.
Replace them with builtin ALIGN_DOWN() and ALIGN() macros.

So far end_addr was inclusive but this patch makes it exclusive (by
avoiding -1) for better readability.

Suggested-by: Christophe Leroy 
Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/hw_breakpoint.h  |  5 +++--
 arch/powerpc/kernel/hw_breakpoint.c   | 12 ++--
 arch/powerpc/kernel/process.c |  8 
 arch/powerpc/kernel/ptrace/ptrace-noadv.c |  2 +-
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index d472b2eb757e..add5aa076919 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -34,10 +34,11 @@ struct arch_hw_breakpoint {
 #define HW_BRK_TYPE_PRIV_ALL   (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
 HW_BRK_TYPE_HYP)
 
+/* Minimum granularity */
 #ifdef CONFIG_PPC_8xx
-#define HW_BREAKPOINT_ALIGN 0x3
+#define HW_BREAKPOINT_SIZE  0x4
 #else
-#define HW_BREAKPOINT_ALIGN 0x7
+#define HW_BREAKPOINT_SIZE  0x8
 #endif
 
 #define DABR_MAX_LEN   8
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 319a761b7412..ab0dd22fed5f 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -145,10 +145,10 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
  *<---8 bytes--->
  *
  * In this case, we should configure hw as:
- *   start_addr = address & ~HW_BREAKPOINT_ALIGN
+ *   start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
  *   len = 16 bytes
  *
- * @start_addr and @end_addr are inclusive.
+ * @start_addr is inclusive but @end_addr is exclusive.
  */
 static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
 {
@@ -156,14 +156,14 @@ static int hw_breakpoint_validate_len(struct 
arch_hw_breakpoint *hw)
u16 hw_len;
unsigned long start_addr, end_addr;
 
-   start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
-   end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
-   hw_len = end_addr - start_addr + 1;
+   start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
+   end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
+   hw_len = end_addr - start_addr;
 
if (dawr_enabled()) {
max_len = DAWR_MAX_LEN;
/* DAWR region can't cross 512 bytes boundary */
-   if ((start_addr >> 9) != (end_addr >> 9))
+   if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M))
return -EINVAL;
} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
/* 8xx can setup a range without limitation */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 41a59a37383b..dcf9c5b4ac59 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -800,12 +800,12 @@ static inline int set_breakpoint_8xx(struct 
arch_hw_breakpoint *brk)
unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
   LCTRL1_CRWF_RW;
unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
-   unsigned long start_addr = brk->address & ~HW_BREAKPOINT_ALIGN;
-   unsigned long end_addr = (brk->address + brk->len - 1) | 
HW_BREAKPOINT_ALIGN;
+   unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE);
+   unsigned long end_addr = ALIGN(brk->address + brk->len, 
HW_BREAKPOINT_SIZE);
 
if (start_addr == 0)
lctrl2 |= LCTRL2_LW0LA_F;
-   else if (end_addr == ~0U)
+   else if (end_addr == 0)
lctrl2 |= LCTRL2_LW0LA_E;
else
lctrl2 |= LCTRL2_LW0LA_EandF;
@@ -821,7 +821,7 @@ static inline int set_breakpoint_8xx(struct 
arch_hw_breakpoint *brk)
lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
 
mtspr(SPRN_CMPE, start_addr - 1);
-   mtspr(SPRN_CMPF, end_addr + 1);
+   mtspr(SPRN_CMPF, end_addr);
mtspr(SPRN_LCTRL1, lctrl1);
mtspr(SPRN_LCTRL2, lctrl2);
 
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c 
b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index 08cb8c1b504c..697c7e4b5877 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -216,7 +216,7 @@ long ppc_set_hwdebug(struct task_struct *child, struct 
ppc_hw_breakpoint *bp_inf
if ((unsigned long)bp_info->addr >= TASK_SIZE)
return -EIO;
 
-   brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+   brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
brk.type = HW_BRK_TYPE_TRANSLATE;
brk.len = DABR_MAX_LEN;
if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
-- 
2.21.1

[PATCH v5 07/16] powerpc/watchpoint: Get watchpoint count dynamically while disabling them

2020-05-10 Thread Ravi Bangoria

Instead of disabling only one watchpoint, get num of available
watchpoints dynamically and disable all of them.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/hw_breakpoint.h | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 1120c7d9db58..d472b2eb757e 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -78,14 +78,14 @@ extern void ptrace_triggered(struct perf_event *bp,
struct perf_sample_data *data, struct pt_regs *regs);
 static inline void hw_breakpoint_disable(void)
 {
-   struct arch_hw_breakpoint brk;
-
-   brk.address = 0;
-   brk.type = 0;
-   brk.len = 0;
-   brk.hw_len = 0;
-   if (ppc_breakpoint_available())
-   __set_breakpoint(0, );
+   int i;
+   struct arch_hw_breakpoint null_brk = {0};
+
+   if (!ppc_breakpoint_available())
+   return;
+
+   for (i = 0; i < nr_wp_slots(); i++)
+   __set_breakpoint(i, _brk);
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 int hw_breakpoint_handler(struct die_args *args);
-- 
2.21.1

[PATCH v5 11/16] powerpc/watchpoint: Introduce is_ptrace_bp() function

2020-05-10 Thread Ravi Bangoria

Introduce is_ptrace_bp() function and move the check inside the
function. It will be utilize more in later set of patches.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/kernel/hw_breakpoint.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 772b2c953220..319a761b7412 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -90,6 +90,11 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
hw_breakpoint_disable();
 }
 
+static bool is_ptrace_bp(struct perf_event *bp)
+{
+   return bp->overflow_handler == ptrace_triggered;
+}
+
 /*
  * Perform cleanup of arch-specific counters during unregistration
  * of the perf-event
@@ -324,7 +329,7 @@ int hw_breakpoint_handler(struct die_args *args)
 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
 * generated in do_dabr().
 */
-   if (bp->overflow_handler == ptrace_triggered) {
+   if (is_ptrace_bp(bp)) {
perf_bp_event(bp, regs);
rc = NOTIFY_DONE;
goto out;
-- 
2.21.1

[PATCH v5 05/16] powerpc/watchpoint: Provide DAWR number to set_dawr

2020-05-10 Thread Ravi Bangoria

Introduce new parameter 'nr' to set_dawr() which indicates which DAWR
should be programed.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/hw_breakpoint.h |  4 ++--
 arch/powerpc/kernel/dawr.c   | 15 ++-
 arch/powerpc/kernel/process.c|  2 +-
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 518b41eef924..5b3b02834e0b 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -104,10 +104,10 @@ static inline bool dawr_enabled(void)
 {
return dawr_force_enable;
 }
-int set_dawr(struct arch_hw_breakpoint *brk);
+int set_dawr(int nr, struct arch_hw_breakpoint *brk);
 #else
 static inline bool dawr_enabled(void) { return false; }
-static inline int set_dawr(struct arch_hw_breakpoint *brk) { return -1; }
+static inline int set_dawr(int nr, struct arch_hw_breakpoint *brk) { return 
-1; }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index e91b613bf137..8114ad3a8574 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -16,7 +16,7 @@
 bool dawr_force_enable;
 EXPORT_SYMBOL_GPL(dawr_force_enable);
 
-int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(int nr, struct arch_hw_breakpoint *brk)
 {
unsigned long dawr, dawrx, mrd;
 
@@ -39,15 +39,20 @@ int set_dawr(struct arch_hw_breakpoint *brk)
if (ppc_md.set_dawr)
return ppc_md.set_dawr(dawr, dawrx);
 
-   mtspr(SPRN_DAWR0, dawr);
-   mtspr(SPRN_DAWRX0, dawrx);
+   if (nr == 0) {
+   mtspr(SPRN_DAWR0, dawr);
+   mtspr(SPRN_DAWRX0, dawrx);
+   } else {
+   mtspr(SPRN_DAWR1, dawr);
+   mtspr(SPRN_DAWRX1, dawrx);
+   }
 
return 0;
 }
 
 static void set_dawr_cb(void *info)
 {
-   set_dawr(info);
+   set_dawr(0, info);
 }
 
 static ssize_t dawr_write_file_bool(struct file *file,
@@ -60,7 +65,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
/* Send error to user if they hypervisor won't allow us to write DAWR */
if (!dawr_force_enable &&
firmware_has_feature(FW_FEATURE_LPAR) &&
-   set_dawr(_brk) != H_SUCCESS)
+   set_dawr(0, _brk) != H_SUCCESS)
return -ENODEV;
 
rc = debugfs_write_file_bool(file, user_buf, count, ppos);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8479c762aef2..7488adf4d61c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -806,7 +806,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
 
if (dawr_enabled())
// Power8 or later
-   set_dawr(brk);
+   set_dawr(0, brk);
else if (IS_ENABLED(CONFIG_PPC_8xx))
set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
-- 
2.21.1

[PATCH v5 06/16] powerpc/watchpoint: Provide DAWR number to __set_breakpoint

2020-05-10 Thread Ravi Bangoria

Introduce new parameter 'nr' to __set_breakpoint() which indicates
which DAWR should be programed. Also convert current_brk variable
to an array.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/include/asm/debug.h |  2 +-
 arch/powerpc/include/asm/hw_breakpoint.h |  2 +-
 arch/powerpc/kernel/hw_breakpoint.c  |  8 
 arch/powerpc/kernel/process.c| 14 +++---
 arch/powerpc/kernel/signal.c |  2 +-
 arch/powerpc/xmon/xmon.c |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 7756026b95ca..ec57daf87f40 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -45,7 +45,7 @@ static inline int debugger_break_match(struct pt_regs *regs) 
{ return 0; }
 static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
-void __set_breakpoint(struct arch_hw_breakpoint *brk);
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h 
b/arch/powerpc/include/asm/hw_breakpoint.h
index 5b3b02834e0b..1120c7d9db58 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -85,7 +85,7 @@ static inline void hw_breakpoint_disable(void)
brk.len = 0;
brk.hw_len = 0;
if (ppc_breakpoint_available())
-   __set_breakpoint();
+   __set_breakpoint(0, );
 }
 extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
 int hw_breakpoint_handler(struct die_args *args);
diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 4120349e2abe..5826f1f2cab9 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -63,7 +63,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
 * If so, DABR will be populated in single_step_dabr_instruction().
 */
if (current->thread.last_hit_ubp != bp)
-   __set_breakpoint(info);
+   __set_breakpoint(0, info);
 
return 0;
 }
@@ -221,7 +221,7 @@ void thread_change_pc(struct task_struct *tsk, struct 
pt_regs *regs)
 
info = counter_arch_bp(tsk->thread.last_hit_ubp);
regs->msr &= ~MSR_SE;
-   __set_breakpoint(info);
+   __set_breakpoint(0, info);
tsk->thread.last_hit_ubp = NULL;
 }
 
@@ -346,7 +346,7 @@ int hw_breakpoint_handler(struct die_args *args)
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
 
-   __set_breakpoint(info);
+   __set_breakpoint(0, info);
 out:
rcu_read_unlock();
return rc;
@@ -379,7 +379,7 @@ static int single_step_dabr_instruction(struct die_args 
*args)
if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
perf_bp_event(bp, regs);
 
-   __set_breakpoint(info);
+   __set_breakpoint(0, info);
current->thread.last_hit_ubp = NULL;
 
/*
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7488adf4d61c..351fbd8d2c5b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -637,7 +637,7 @@ void do_break (struct pt_regs *regs, unsigned long address,
 }
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
 
-static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
+static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]);
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 /*
@@ -714,7 +714,7 @@ EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
 static void set_breakpoint(struct arch_hw_breakpoint *brk)
 {
preempt_disable();
-   __set_breakpoint(brk);
+   __set_breakpoint(0, brk);
preempt_enable();
 }
 
@@ -800,13 +800,13 @@ static inline int set_breakpoint_8xx(struct 
arch_hw_breakpoint *brk)
return 0;
 }
 
-void __set_breakpoint(struct arch_hw_breakpoint *brk)
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
 {
-   memcpy(this_cpu_ptr(_brk), brk, sizeof(*brk));
+   memcpy(this_cpu_ptr(_brk[nr]), brk, sizeof(*brk));
 
if (dawr_enabled())
// Power8 or later
-   set_dawr(0, brk);
+   set_dawr(nr, brk);
else if (IS_ENABLED(CONFIG_PPC_8xx))
set_breakpoint_8xx(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -1174,8 +1174,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
  * schedule DABR
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-   if (unlikely(!hw_brk_match(this_cpu_ptr(_brk), 
>thread.hw_brk)))
-   __set_breakpoint(>thread.hw_brk);
+   if (unlikely(!hw_brk_match(this_cpu_ptr(_brk[0]), 
>thread.hw_brk)))
+   __set_breakpoint(0, >thread.hw_brk);
 #endif /*

[PATCH v5 04/16] powerpc/watchpoint/ptrace: Return actual num of available watchpoints

2020-05-10 Thread Ravi Bangoria

User can ask for num of available watchpoints(dbginfo.num_data_bps)
using ptrace(PPC_PTRACE_GETHWDBGINFO). Return actual number of
available watchpoints on the machine rather than hardcoded 1.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Michael Neuling 
---
 arch/powerpc/kernel/ptrace/ptrace-noadv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c 
b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
index f87e7c5c3bf3..12962302d6a4 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-noadv.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -44,7 +44,7 @@ void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
dbginfo->version = 1;
dbginfo->num_instruction_bps = 0;
if (ppc_breakpoint_available())
-   dbginfo->num_data_bps = 1;
+   dbginfo->num_data_bps = nr_wp_slots();
else
dbginfo->num_data_bps = 0;
dbginfo->num_condition_regs = 0;
-- 
2.21.1

1 2 3 4 5 6 >

1 - 100 of 561 matches

Mail list logo