Re: [PATCH v14 3/9] fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()

2018-02-15 Thread Marc-Andre Lureau
On Wed, Feb 14, 2018 at 9:46 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:44PM +0100, Marc-André Lureau wrote:
>> The function is used for both LE & BE target type, use __force casting.
>>
>> Fixes:
>> $ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o
>>
>> drivers/firmware/qemu_fw_cfg.c:55:33: warning: restricted __be16 degrades to 
>> integer
>> drivers/firmware/qemu_fw_cfg.c:55:52: warning: restricted __le16 degrades to 
>> integer
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 90f467232777..85e693287d87 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -52,7 +52,9 @@ static DEFINE_MUTEX(fw_cfg_dev_lock);
>>  /* pick appropriate endianness for selector key */
>>  static inline u16 fw_cfg_sel_endianness(u16 key)
>>  {
>> - return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
>> + return fw_cfg_is_mmio ?
>> + (u16 __force)cpu_to_be16(key) :
>> + (u16 __force)cpu_to_le16(key);
>>  }
>>
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>
> Well the caller does cpu_to_le16 on the result ...
> All this makes my head spin.
>
> IMHO what you want is a wrapper that does iowrite and iowritebe
> rather than __force.

iowrite16(key) is the same as iowrite16(cpu_to_le16(key)) ? There is
no iowrite16le()...

Is this equivalent, and not introducing regressions?

static inline u16 fw_cfg_sel_endianness(u16 key)
+static void fw_cfg_sel_endianness(u16 key)
 {
-   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
+   if (fw_cfg_is_mmio)
+   iowrite16be(key, fw_cfg_reg_ctrl);
+   else
+   iowrite16(key, fw_cfg_reg_ctrl);
 }

 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
@@ -74,7 +77,7 @@ static inline void fw_cfg_read_blob(u16 key,
}

mutex_lock(_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   fw_cfg_sel_endianness(key);

>
>
>> --
>> 2.16.1.73.g5832b7e9f2


Re: [PATCH v14 3/9] fw_cfg: fix sparse warnings in fw_cfg_sel_endianness()

2018-02-15 Thread Marc-Andre Lureau
On Wed, Feb 14, 2018 at 9:46 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:44PM +0100, Marc-André Lureau wrote:
>> The function is used for both LE & BE target type, use __force casting.
>>
>> Fixes:
>> $ make C=1 CF=-D__CHECK_ENDIAN__ drivers/firmware/qemu_fw_cfg.o
>>
>> drivers/firmware/qemu_fw_cfg.c:55:33: warning: restricted __be16 degrades to 
>> integer
>> drivers/firmware/qemu_fw_cfg.c:55:52: warning: restricted __le16 degrades to 
>> integer
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 4 +++-
>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 90f467232777..85e693287d87 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -52,7 +52,9 @@ static DEFINE_MUTEX(fw_cfg_dev_lock);
>>  /* pick appropriate endianness for selector key */
>>  static inline u16 fw_cfg_sel_endianness(u16 key)
>>  {
>> - return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
>> + return fw_cfg_is_mmio ?
>> + (u16 __force)cpu_to_be16(key) :
>> + (u16 __force)cpu_to_le16(key);
>>  }
>>
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>
> Well the caller does cpu_to_le16 on the result ...
> All this makes my head spin.
>
> IMHO what you want is a wrapper that does iowrite and iowritebe
> rather than __force.

iowrite16(key) is the same as iowrite16(cpu_to_le16(key)) ? There is
no iowrite16le()...

Is this equivalent, and not introducing regressions?

static inline u16 fw_cfg_sel_endianness(u16 key)
+static void fw_cfg_sel_endianness(u16 key)
 {
-   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
+   if (fw_cfg_is_mmio)
+   iowrite16be(key, fw_cfg_reg_ctrl);
+   else
+   iowrite16(key, fw_cfg_reg_ctrl);
 }

 /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
@@ -74,7 +77,7 @@ static inline void fw_cfg_read_blob(u16 key,
}

mutex_lock(_cfg_dev_lock);
-   iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
+   fw_cfg_sel_endianness(key);

>
>
>> --
>> 2.16.1.73.g5832b7e9f2


Re: [PATCH v14 2/9] fw_cfg: add a public uapi header

2018-02-15 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 8:37 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:43PM +0100, Marc-André Lureau wrote:
>> Create a common header file for well-known values and structures to be
>> shared by the Linux kernel with qemu or other projects.
>>
>> Suggested-by: Michael S. Tsirkin 
>> Signed-off-by: Marc-André Lureau 
>>
>> ---
>>
>> The related qemu patch making use of it, to be submitted:
>> https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
>> ---
>>  MAINTAINERS|   1 +
>>  drivers/firmware/qemu_fw_cfg.c |  22 +
>>  include/uapi/linux/fw_cfg.h| 102 
>> +
>>  3 files changed, 105 insertions(+), 20 deletions(-)
>>  create mode 100644 include/uapi/linux/fw_cfg.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 3bdc260e36b7..a66b65f62811 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -11352,6 +11352,7 @@ M:"Michael S. Tsirkin" 
>>  L:   qemu-de...@nongnu.org
>>  S:   Maintained
>>  F:   drivers/firmware/qemu_fw_cfg.c
>> +F:   include/uapi/linux/fw_cfg.h
>>
>>  QIB DRIVER
>>  M:   Dennis Dalessandro 
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index a41b572eeeb1..90f467232777 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -32,30 +32,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>
> You include the header from include/linux/fw_cfg.h ...
>
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> -/* selector key values for "well-known" fw_cfg entries */
>> -#define FW_CFG_SIGNATURE  0x00
>> -#define FW_CFG_ID 0x01
>> -#define FW_CFG_FILE_DIR   0x19
>> -
>> -/* size in bytes of fw_cfg signature */
>> -#define FW_CFG_SIG_SIZE 4
>> -
>> -/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> -#define FW_CFG_MAX_FILE_PATH 56
>> -
>> -/* fw_cfg file directory entry type */
>> -struct fw_cfg_file {
>> - u32 size;
>> - u16 select;
>> - u16 reserved;
>> - char name[FW_CFG_MAX_FILE_PATH];
>> -};
>> -
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
>>
>>  #ifdef CONFIG_ACPI
>>  static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
>> - { "QEMU0002", },
>> + { FW_CFG_ACPI_DEVICE_ID, },
>>   {},
>>  };
>>  MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
>> diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
>> new file mode 100644
>> index ..5b8136ce46ee
>> --- /dev/null
>> +++ b/include/uapi/linux/fw_cfg.h
>
> Yet the header is located in include/uapi/linux/fw_cfg.h
>
> How can this work?
>

$ make drivers/firmware/qemu_fw_cfg.ko V=1

gcc -Wp,-MD,drivers/firmware/.qemu_fw_cfg.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-redhat-linux/7/include -I./arch/x86/include
-I./arch/x86/include/generated  -I./include -I./arch/x86/include/uapi
...

>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>
> As far as I can see these have all been lifted from a BSD-licensed
> hw/nvram/fw_cfg.c in qemu. So please make it BSD accordingly,
> and include the explanation in the commit log.
>

(see other reply)

>> +#ifndef _LINUX_FW_CFG_H
>> +#define _LINUX_FW_CFG_H
>> +
>> +#include 
>> +
>> +#define FW_CFG_ACPI_DEVICE_ID"QEMU0002"
>> +
>> +/* selector key values for "well-known" fw_cfg entries */
>> +#define FW_CFG_SIGNATURE 0x00
>> +#define FW_CFG_ID0x01
>> +#define FW_CFG_UUID  0x02
>> +#define FW_CFG_RAM_SIZE  0x03
>> +#define FW_CFG_NOGRAPHIC 0x04
>> +#define FW_CFG_NB_CPUS   0x05
>> +#define FW_CFG_MACHINE_ID0x06
>> +#define FW_CFG_KERNEL_ADDR   0x07
>> +#define FW_CFG_KERNEL_SIZE   0x08
>> +#define FW_CFG_KERNEL_CMDLINE0x09
>> +#define FW_CFG_INITRD_ADDR   0x0a
>> +#define FW_CFG_INITRD_SIZE   0x0b
>> +#define FW_CFG_BOOT_DEVICE   0x0c
>> +#define FW_CFG_NUMA  0x0d
>> +#define FW_CFG_BOOT_MENU 0x0e
>> +#define FW_CFG_MAX_CPUS  0x0f
>> +#define FW_CFG_KERNEL_ENTRY  0x10
>> +#define FW_CFG_KERNEL_DATA   0x11
>> +#define FW_CFG_INITRD_DATA   0x12
>> +#define FW_CFG_CMDLINE_ADDR  0x13
>> +#define FW_CFG_CMDLINE_SIZE  0x14
>> +#define FW_CFG_CMDLINE_DATA  0x15
>> +#define FW_CFG_SETUP_ADDR0x16
>> +#define FW_CFG_SETUP_SIZE0x17
>> +#define FW_CFG_SETUP_DATA0x18
>> +#define FW_CFG_FILE_DIR  0x19
>> +
>> +#define FW_CFG_FILE_FIRST0x20
>> +#define FW_CFG_FILE_SLOTS_MIN0x10
>> +
>> +#define FW_CFG_WRITE_CHANNEL 0x4000
>> +#define FW_CFG_ARCH_LOCAL0x8000
>> +#define FW_CFG_ENTRY_MASK

Re: [PATCH v14 2/9] fw_cfg: add a public uapi header

2018-02-15 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 8:37 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:43PM +0100, Marc-André Lureau wrote:
>> Create a common header file for well-known values and structures to be
>> shared by the Linux kernel with qemu or other projects.
>>
>> Suggested-by: Michael S. Tsirkin 
>> Signed-off-by: Marc-André Lureau 
>>
>> ---
>>
>> The related qemu patch making use of it, to be submitted:
>> https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
>> ---
>>  MAINTAINERS|   1 +
>>  drivers/firmware/qemu_fw_cfg.c |  22 +
>>  include/uapi/linux/fw_cfg.h| 102 
>> +
>>  3 files changed, 105 insertions(+), 20 deletions(-)
>>  create mode 100644 include/uapi/linux/fw_cfg.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 3bdc260e36b7..a66b65f62811 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -11352,6 +11352,7 @@ M:"Michael S. Tsirkin" 
>>  L:   qemu-de...@nongnu.org
>>  S:   Maintained
>>  F:   drivers/firmware/qemu_fw_cfg.c
>> +F:   include/uapi/linux/fw_cfg.h
>>
>>  QIB DRIVER
>>  M:   Dennis Dalessandro 
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index a41b572eeeb1..90f467232777 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -32,30 +32,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>
> You include the header from include/linux/fw_cfg.h ...
>
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> -/* selector key values for "well-known" fw_cfg entries */
>> -#define FW_CFG_SIGNATURE  0x00
>> -#define FW_CFG_ID 0x01
>> -#define FW_CFG_FILE_DIR   0x19
>> -
>> -/* size in bytes of fw_cfg signature */
>> -#define FW_CFG_SIG_SIZE 4
>> -
>> -/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> -#define FW_CFG_MAX_FILE_PATH 56
>> -
>> -/* fw_cfg file directory entry type */
>> -struct fw_cfg_file {
>> - u32 size;
>> - u16 select;
>> - u16 reserved;
>> - char name[FW_CFG_MAX_FILE_PATH];
>> -};
>> -
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
>>
>>  #ifdef CONFIG_ACPI
>>  static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
>> - { "QEMU0002", },
>> + { FW_CFG_ACPI_DEVICE_ID, },
>>   {},
>>  };
>>  MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
>> diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
>> new file mode 100644
>> index ..5b8136ce46ee
>> --- /dev/null
>> +++ b/include/uapi/linux/fw_cfg.h
>
> Yet the header is located in include/uapi/linux/fw_cfg.h
>
> How can this work?
>

$ make drivers/firmware/qemu_fw_cfg.ko V=1

gcc -Wp,-MD,drivers/firmware/.qemu_fw_cfg.o.d  -nostdinc -isystem
/usr/lib/gcc/x86_64-redhat-linux/7/include -I./arch/x86/include
-I./arch/x86/include/generated  -I./include -I./arch/x86/include/uapi
...

>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>
> As far as I can see these have all been lifted from a BSD-licensed
> hw/nvram/fw_cfg.c in qemu. So please make it BSD accordingly,
> and include the explanation in the commit log.
>

(see other reply)

>> +#ifndef _LINUX_FW_CFG_H
>> +#define _LINUX_FW_CFG_H
>> +
>> +#include 
>> +
>> +#define FW_CFG_ACPI_DEVICE_ID"QEMU0002"
>> +
>> +/* selector key values for "well-known" fw_cfg entries */
>> +#define FW_CFG_SIGNATURE 0x00
>> +#define FW_CFG_ID0x01
>> +#define FW_CFG_UUID  0x02
>> +#define FW_CFG_RAM_SIZE  0x03
>> +#define FW_CFG_NOGRAPHIC 0x04
>> +#define FW_CFG_NB_CPUS   0x05
>> +#define FW_CFG_MACHINE_ID0x06
>> +#define FW_CFG_KERNEL_ADDR   0x07
>> +#define FW_CFG_KERNEL_SIZE   0x08
>> +#define FW_CFG_KERNEL_CMDLINE0x09
>> +#define FW_CFG_INITRD_ADDR   0x0a
>> +#define FW_CFG_INITRD_SIZE   0x0b
>> +#define FW_CFG_BOOT_DEVICE   0x0c
>> +#define FW_CFG_NUMA  0x0d
>> +#define FW_CFG_BOOT_MENU 0x0e
>> +#define FW_CFG_MAX_CPUS  0x0f
>> +#define FW_CFG_KERNEL_ENTRY  0x10
>> +#define FW_CFG_KERNEL_DATA   0x11
>> +#define FW_CFG_INITRD_DATA   0x12
>> +#define FW_CFG_CMDLINE_ADDR  0x13
>> +#define FW_CFG_CMDLINE_SIZE  0x14
>> +#define FW_CFG_CMDLINE_DATA  0x15
>> +#define FW_CFG_SETUP_ADDR0x16
>> +#define FW_CFG_SETUP_SIZE0x17
>> +#define FW_CFG_SETUP_DATA0x18
>> +#define FW_CFG_FILE_DIR  0x19
>> +
>> +#define FW_CFG_FILE_FIRST0x20
>> +#define FW_CFG_FILE_SLOTS_MIN0x10
>> +
>> +#define FW_CFG_WRITE_CHANNEL 0x4000
>> +#define FW_CFG_ARCH_LOCAL0x8000
>> +#define FW_CFG_ENTRY_MASK(~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
>> +
>> +#define FW_CFG_INVALID   0x
>> +
>> +/* width in bytes of fw_cfg 

Re: [PATCH v14 2/9] fw_cfg: add a public uapi header

2018-02-15 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 9:41 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:43PM +0100, Marc-André Lureau wrote:
>> Create a common header file for well-known values and structures to be
>> shared by the Linux kernel with qemu or other projects.
>>
>> Suggested-by: Michael S. Tsirkin 
>> Signed-off-by: Marc-André Lureau 
>>
>> ---
>>
>> The related qemu patch making use of it, to be submitted:
>> https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
>> ---
>>  MAINTAINERS|   1 +
>>  drivers/firmware/qemu_fw_cfg.c |  22 +
>>  include/uapi/linux/fw_cfg.h| 102 
>> +
>>  3 files changed, 105 insertions(+), 20 deletions(-)
>>  create mode 100644 include/uapi/linux/fw_cfg.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 3bdc260e36b7..a66b65f62811 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -11352,6 +11352,7 @@ M:"Michael S. Tsirkin" 
>>  L:   qemu-de...@nongnu.org
>>  S:   Maintained
>>  F:   drivers/firmware/qemu_fw_cfg.c
>> +F:   include/uapi/linux/fw_cfg.h
>>
>>  QIB DRIVER
>>  M:   Dennis Dalessandro 
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index a41b572eeeb1..90f467232777 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -32,30 +32,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> -/* selector key values for "well-known" fw_cfg entries */
>> -#define FW_CFG_SIGNATURE  0x00
>> -#define FW_CFG_ID 0x01
>> -#define FW_CFG_FILE_DIR   0x19
>> -
>> -/* size in bytes of fw_cfg signature */
>> -#define FW_CFG_SIG_SIZE 4
>> -
>> -/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> -#define FW_CFG_MAX_FILE_PATH 56
>> -
>> -/* fw_cfg file directory entry type */
>> -struct fw_cfg_file {
>> - u32 size;
>> - u16 select;
>> - u16 reserved;
>> - char name[FW_CFG_MAX_FILE_PATH];
>> -};
>> -
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
>>
>>  #ifdef CONFIG_ACPI
>>  static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
>> - { "QEMU0002", },
>> + { FW_CFG_ACPI_DEVICE_ID, },
>>   {},
>>  };
>>  MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
>> diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
>> new file mode 100644
>> index ..5b8136ce46ee
>> --- /dev/null
>> +++ b/include/uapi/linux/fw_cfg.h
>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +#ifndef _LINUX_FW_CFG_H
>> +#define _LINUX_FW_CFG_H
>> +
>> +#include 
>> +
>> +#define FW_CFG_ACPI_DEVICE_ID"QEMU0002"
>> +
>> +/* selector key values for "well-known" fw_cfg entries */
>> +#define FW_CFG_SIGNATURE 0x00
>> +#define FW_CFG_ID0x01
>> +#define FW_CFG_UUID  0x02
>> +#define FW_CFG_RAM_SIZE  0x03
>> +#define FW_CFG_NOGRAPHIC 0x04
>> +#define FW_CFG_NB_CPUS   0x05
>> +#define FW_CFG_MACHINE_ID0x06
>> +#define FW_CFG_KERNEL_ADDR   0x07
>> +#define FW_CFG_KERNEL_SIZE   0x08
>> +#define FW_CFG_KERNEL_CMDLINE0x09
>> +#define FW_CFG_INITRD_ADDR   0x0a
>> +#define FW_CFG_INITRD_SIZE   0x0b
>> +#define FW_CFG_BOOT_DEVICE   0x0c
>> +#define FW_CFG_NUMA  0x0d
>> +#define FW_CFG_BOOT_MENU 0x0e
>> +#define FW_CFG_MAX_CPUS  0x0f
>> +#define FW_CFG_KERNEL_ENTRY  0x10
>> +#define FW_CFG_KERNEL_DATA   0x11
>> +#define FW_CFG_INITRD_DATA   0x12
>> +#define FW_CFG_CMDLINE_ADDR  0x13
>> +#define FW_CFG_CMDLINE_SIZE  0x14
>> +#define FW_CFG_CMDLINE_DATA  0x15
>> +#define FW_CFG_SETUP_ADDR0x16
>> +#define FW_CFG_SETUP_SIZE0x17
>> +#define FW_CFG_SETUP_DATA0x18
>> +#define FW_CFG_FILE_DIR  0x19
>> +
>> +#define FW_CFG_FILE_FIRST0x20
>> +#define FW_CFG_FILE_SLOTS_MIN0x10
>> +
>> +#define FW_CFG_WRITE_CHANNEL 0x4000
>> +#define FW_CFG_ARCH_LOCAL0x8000
>> +#define FW_CFG_ENTRY_MASK(~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
>> +
>> +#define FW_CFG_INVALID   0x
>> +
>> +/* width in bytes of fw_cfg control register */
>> +#define FW_CFG_CTL_SIZE  0x02
>> +
>> +/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> +#define FW_CFG_MAX_FILE_PATH 56
>> +
>> +/* size in bytes of fw_cfg signature */
>> +#define FW_CFG_SIG_SIZE 4
>> +
>> +/* FW_CFG_ID bits */
>> +#define FW_CFG_VERSION   0x01
>> +#define FW_CFG_VERSION_DMA   0x02
>> +
>> +/* fw_cfg file directory entry type */
>> +struct fw_cfg_file {
>> + __be32 size;  

Re: [PATCH v14 2/9] fw_cfg: add a public uapi header

2018-02-15 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 9:41 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:43PM +0100, Marc-André Lureau wrote:
>> Create a common header file for well-known values and structures to be
>> shared by the Linux kernel with qemu or other projects.
>>
>> Suggested-by: Michael S. Tsirkin 
>> Signed-off-by: Marc-André Lureau 
>>
>> ---
>>
>> The related qemu patch making use of it, to be submitted:
>> https://github.com/elmarco/qemu/commit/4884fc9e9c4c4467a371e5a40f3181239e1b70f5
>> ---
>>  MAINTAINERS|   1 +
>>  drivers/firmware/qemu_fw_cfg.c |  22 +
>>  include/uapi/linux/fw_cfg.h| 102 
>> +
>>  3 files changed, 105 insertions(+), 20 deletions(-)
>>  create mode 100644 include/uapi/linux/fw_cfg.h
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index 3bdc260e36b7..a66b65f62811 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -11352,6 +11352,7 @@ M:"Michael S. Tsirkin" 
>>  L:   qemu-de...@nongnu.org
>>  S:   Maintained
>>  F:   drivers/firmware/qemu_fw_cfg.c
>> +F:   include/uapi/linux/fw_cfg.h
>>
>>  QIB DRIVER
>>  M:   Dennis Dalessandro 
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index a41b572eeeb1..90f467232777 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -32,30 +32,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>>  MODULE_LICENSE("GPL");
>>
>> -/* selector key values for "well-known" fw_cfg entries */
>> -#define FW_CFG_SIGNATURE  0x00
>> -#define FW_CFG_ID 0x01
>> -#define FW_CFG_FILE_DIR   0x19
>> -
>> -/* size in bytes of fw_cfg signature */
>> -#define FW_CFG_SIG_SIZE 4
>> -
>> -/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> -#define FW_CFG_MAX_FILE_PATH 56
>> -
>> -/* fw_cfg file directory entry type */
>> -struct fw_cfg_file {
>> - u32 size;
>> - u16 select;
>> - u16 reserved;
>> - char name[FW_CFG_MAX_FILE_PATH];
>> -};
>> -
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -597,7 +579,7 @@ MODULE_DEVICE_TABLE(of, fw_cfg_sysfs_mmio_match);
>>
>>  #ifdef CONFIG_ACPI
>>  static const struct acpi_device_id fw_cfg_sysfs_acpi_match[] = {
>> - { "QEMU0002", },
>> + { FW_CFG_ACPI_DEVICE_ID, },
>>   {},
>>  };
>>  MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_acpi_match);
>> diff --git a/include/uapi/linux/fw_cfg.h b/include/uapi/linux/fw_cfg.h
>> new file mode 100644
>> index ..5b8136ce46ee
>> --- /dev/null
>> +++ b/include/uapi/linux/fw_cfg.h
>> @@ -0,0 +1,102 @@
>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
>> +#ifndef _LINUX_FW_CFG_H
>> +#define _LINUX_FW_CFG_H
>> +
>> +#include 
>> +
>> +#define FW_CFG_ACPI_DEVICE_ID"QEMU0002"
>> +
>> +/* selector key values for "well-known" fw_cfg entries */
>> +#define FW_CFG_SIGNATURE 0x00
>> +#define FW_CFG_ID0x01
>> +#define FW_CFG_UUID  0x02
>> +#define FW_CFG_RAM_SIZE  0x03
>> +#define FW_CFG_NOGRAPHIC 0x04
>> +#define FW_CFG_NB_CPUS   0x05
>> +#define FW_CFG_MACHINE_ID0x06
>> +#define FW_CFG_KERNEL_ADDR   0x07
>> +#define FW_CFG_KERNEL_SIZE   0x08
>> +#define FW_CFG_KERNEL_CMDLINE0x09
>> +#define FW_CFG_INITRD_ADDR   0x0a
>> +#define FW_CFG_INITRD_SIZE   0x0b
>> +#define FW_CFG_BOOT_DEVICE   0x0c
>> +#define FW_CFG_NUMA  0x0d
>> +#define FW_CFG_BOOT_MENU 0x0e
>> +#define FW_CFG_MAX_CPUS  0x0f
>> +#define FW_CFG_KERNEL_ENTRY  0x10
>> +#define FW_CFG_KERNEL_DATA   0x11
>> +#define FW_CFG_INITRD_DATA   0x12
>> +#define FW_CFG_CMDLINE_ADDR  0x13
>> +#define FW_CFG_CMDLINE_SIZE  0x14
>> +#define FW_CFG_CMDLINE_DATA  0x15
>> +#define FW_CFG_SETUP_ADDR0x16
>> +#define FW_CFG_SETUP_SIZE0x17
>> +#define FW_CFG_SETUP_DATA0x18
>> +#define FW_CFG_FILE_DIR  0x19
>> +
>> +#define FW_CFG_FILE_FIRST0x20
>> +#define FW_CFG_FILE_SLOTS_MIN0x10
>> +
>> +#define FW_CFG_WRITE_CHANNEL 0x4000
>> +#define FW_CFG_ARCH_LOCAL0x8000
>> +#define FW_CFG_ENTRY_MASK(~(FW_CFG_WRITE_CHANNEL | FW_CFG_ARCH_LOCAL))
>> +
>> +#define FW_CFG_INVALID   0x
>> +
>> +/* width in bytes of fw_cfg control register */
>> +#define FW_CFG_CTL_SIZE  0x02
>> +
>> +/* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>> +#define FW_CFG_MAX_FILE_PATH 56
>> +
>> +/* size in bytes of fw_cfg signature */
>> +#define FW_CFG_SIG_SIZE 4
>> +
>> +/* FW_CFG_ID bits */
>> +#define FW_CFG_VERSION   0x01
>> +#define FW_CFG_VERSION_DMA   0x02
>> +
>> +/* fw_cfg file directory entry type */
>> +struct fw_cfg_file {
>> + __be32 size;/* file size */
>> + __be16 select;  /* write this to 0x510 to read it */
>> + __u16 

Re: [PATCH v14 9/9] RFC: fw_cfg: do DMA read operation

2018-02-14 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 5:59 PM, Michael S. Tsirkin <m...@redhat.com> wrote:
> On Wed, Feb 14, 2018 at 05:52:10PM +0100, Marc-Andre Lureau wrote:
>> >> @@ -282,8 +320,9 @@ static int fw_cfg_do_platform_probe(struct 
>> >> platform_device *pdev)
>> >>  #endif
>> >>
>> >>   /* verify fw_cfg device signature */
>> >> - fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
>> >> - if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
>> >> + if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
>> >> + 0, FW_CFG_SIG_SIZE, false) < 0 ||
>> >> + memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
>> >>   fw_cfg_io_cleanup();
>> >>   return -ENODEV;
>> >>   }
>> >
>> > Rather than add dead code, how about a promise not to
>> > fail if dma is disabled? Patch will be smaller then.
>>
>> Even with dma disabled, you could have a locking bug which was
>> silently ignored before and is now taken into account.
>
> I see. I'd start with a patch reporting errors to users then.
> That would be a bugfix and can be merged for this version.

silently is the wrong word for it, there is already a WARN(). However,
the memcmp was done on uninitialzed sig[] (which likely resulted in
returning -ENODEV in the function)

Now it can check if the function failed to read before doing the memcmp.

Hope that clarifies it.

>
>> >
>> >> @@ -466,8 +505,8 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file 
>> >> *filp, struct kobject *kobj,
>> >>   if (count > entry->size - pos)
>> >>   count = entry->size - pos;
>> >>
>> >> - fw_cfg_read_blob(entry->select, buf, pos, count);
>> >> - return count;
>> >> + /* do not use DMA, virt_to_phys(buf) might not be ok */
>> >> + return fw_cfg_read_blob(entry->select, buf, pos, count, false);
>> >>  }
>> >>
>> >>  static struct bin_attribute fw_cfg_sysfs_attr_raw = {
>> >> @@ -632,7 +671,12 @@ static int fw_cfg_register_dir_entries(void)
>> >>   struct fw_cfg_file *dir;
>> >>   size_t dir_size;
>> >>
>> >> - fw_cfg_read_blob(FW_CFG_FILE_DIR, , 0, 
>> >> sizeof(files.count));
>> >> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, ,
>> >> + 0, sizeof(files.count), false);
>> >> + if (ret < 0) {
>> >> + return ret;
>> >> + }
>> >> +
>> >>   count = be32_to_cpu(files.count);
>> >>   dir_size = count * sizeof(struct fw_cfg_file);
>> >>
>> >> @@ -640,7 +684,11 @@ static int fw_cfg_register_dir_entries(void)
>> >>   if (!dir)
>> >>   return -ENOMEM;
>> >>
>> >> - fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files.count), 
>> >> dir_size);
>> >> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
>> >> + sizeof(files.count), dir_size, false);
>> >> + if (ret < 0) {
>> >> + goto end;
>> >> + }
>> >>
>> >>   for (i = 0; i < count; i++) {
>> >>   ret = fw_cfg_register_file([i]);
>> >> @@ -648,6 +696,7 @@ static int fw_cfg_register_dir_entries(void)
>> >>   break;
>> >>   }
>> >>
>> >> +end:
>> >>   kfree(dir);
>> >>   return ret;
>> >>  }
>> >> @@ -688,7 +737,10 @@ static int fw_cfg_sysfs_probe(struct platform_device 
>> >> *pdev)
>> >>   goto err_probe;
>> >>
>> >>   /* get revision number, add matching top-level attribute */
>> >> - fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev));
>> >> + err = fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev), false);
>> >> + if (err < 0) {
>> >> + goto err_probe;
>> >> + }
>> >>   fw_cfg_rev = le32_to_cpu(rev);
>> >>   err = sysfs_create_file(fw_cfg_top_ko, _cfg_rev_attr.attr);
>> >>   if (err)
>> >> --
>> >> 2.16.1.73.g5832b7e9f2


Re: [PATCH v14 9/9] RFC: fw_cfg: do DMA read operation

2018-02-14 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 5:59 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 05:52:10PM +0100, Marc-Andre Lureau wrote:
>> >> @@ -282,8 +320,9 @@ static int fw_cfg_do_platform_probe(struct 
>> >> platform_device *pdev)
>> >>  #endif
>> >>
>> >>   /* verify fw_cfg device signature */
>> >> - fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE);
>> >> - if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
>> >> + if (fw_cfg_read_blob(FW_CFG_SIGNATURE, sig,
>> >> + 0, FW_CFG_SIG_SIZE, false) < 0 ||
>> >> + memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) {
>> >>   fw_cfg_io_cleanup();
>> >>   return -ENODEV;
>> >>   }
>> >
>> > Rather than add dead code, how about a promise not to
>> > fail if dma is disabled? Patch will be smaller then.
>>
>> Even with dma disabled, you could have a locking bug which was
>> silently ignored before and is now taken into account.
>
> I see. I'd start with a patch reporting errors to users then.
> That would be a bugfix and can be merged for this version.

silently is the wrong word for it, there is already a WARN(). However,
the memcmp was done on uninitialzed sig[] (which likely resulted in
returning -ENODEV in the function)

Now it can check if the function failed to read before doing the memcmp.

Hope that clarifies it.

>
>> >
>> >> @@ -466,8 +505,8 @@ static ssize_t fw_cfg_sysfs_read_raw(struct file 
>> >> *filp, struct kobject *kobj,
>> >>   if (count > entry->size - pos)
>> >>   count = entry->size - pos;
>> >>
>> >> - fw_cfg_read_blob(entry->select, buf, pos, count);
>> >> - return count;
>> >> + /* do not use DMA, virt_to_phys(buf) might not be ok */
>> >> + return fw_cfg_read_blob(entry->select, buf, pos, count, false);
>> >>  }
>> >>
>> >>  static struct bin_attribute fw_cfg_sysfs_attr_raw = {
>> >> @@ -632,7 +671,12 @@ static int fw_cfg_register_dir_entries(void)
>> >>   struct fw_cfg_file *dir;
>> >>   size_t dir_size;
>> >>
>> >> - fw_cfg_read_blob(FW_CFG_FILE_DIR, , 0, 
>> >> sizeof(files.count));
>> >> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, ,
>> >> + 0, sizeof(files.count), false);
>> >> + if (ret < 0) {
>> >> + return ret;
>> >> + }
>> >> +
>> >>   count = be32_to_cpu(files.count);
>> >>   dir_size = count * sizeof(struct fw_cfg_file);
>> >>
>> >> @@ -640,7 +684,11 @@ static int fw_cfg_register_dir_entries(void)
>> >>   if (!dir)
>> >>   return -ENOMEM;
>> >>
>> >> - fw_cfg_read_blob(FW_CFG_FILE_DIR, dir, sizeof(files.count), 
>> >> dir_size);
>> >> + ret = fw_cfg_read_blob(FW_CFG_FILE_DIR, dir,
>> >> + sizeof(files.count), dir_size, false);
>> >> + if (ret < 0) {
>> >> + goto end;
>> >> + }
>> >>
>> >>   for (i = 0; i < count; i++) {
>> >>   ret = fw_cfg_register_file([i]);
>> >> @@ -648,6 +696,7 @@ static int fw_cfg_register_dir_entries(void)
>> >>   break;
>> >>   }
>> >>
>> >> +end:
>> >>   kfree(dir);
>> >>   return ret;
>> >>  }
>> >> @@ -688,7 +737,10 @@ static int fw_cfg_sysfs_probe(struct platform_device 
>> >> *pdev)
>> >>   goto err_probe;
>> >>
>> >>   /* get revision number, add matching top-level attribute */
>> >> - fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev));
>> >> + err = fw_cfg_read_blob(FW_CFG_ID, , 0, sizeof(rev), false);
>> >> + if (err < 0) {
>> >> + goto err_probe;
>> >> + }
>> >>   fw_cfg_rev = le32_to_cpu(rev);
>> >>   err = sysfs_create_file(fw_cfg_top_ko, _cfg_rev_attr.attr);
>> >>   if (err)
>> >> --
>> >> 2.16.1.73.g5832b7e9f2


Re: [PATCH v14 9/9] RFC: fw_cfg: do DMA read operation

2018-02-14 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 5:48 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:50PM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> So far, only one call in fw_cfg_register_dir_entries() is using
>> kmalloc'ed buf and is thus clearly eligible to DMA read.
>>
>> Initially, I didn't implement DMA read to speed up boot time, but as a
>> first step before introducing DMA write (since read operations were
>> already presents). Even more, I didn't realize fw-cfg entries were
>> being read by the kernel during boot by default. But actally fw-cfg
>> entries are being populated during module probe. I knew DMA improved a
>> lot bios boot time (the main reason the DMA interface was added
>> afaik). Let see the time it would take to read the whole ACPI
>> tables (128kb allocated)
>>
>>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>>   - with DMA: sys 0m0.003s
>>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>>
>> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> small (1-2kb). Since it does not expose itself, in order to measure
>> the time it takes to read such small file, I took a comparable sized
>> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> modified read_raw enabling DMA)
>>
>>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
>>   - with DMA:
>>   0.636037  task-clock (msec) #0.141 CPUs utilized   
>>  ( +-  1.19% )
>>   - without DMA:
>>   6.430128  task-clock (msec) #0.622 CPUs utilized   
>>  ( +-  0.22% )
>>
>> That's a few msec saved during boot by enabling DMA read (the gain
>> would be more substantial if other & bigger fw-cfg entries are read by
>> others from sysfs, unfortunately, it's not clear if we can always
>> enable DMA there)
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 80 
>> ++
>>  1 file changed, 66 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 69939e2529f2..ba9b907a4399 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -124,12 +124,46 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
>> length, u32 control)
>>   return ret;
>>  }
>>
>> +/* with acpi & dev locks taken */
>> +static ssize_t fw_cfg_read_blob_dma(u16 key,
>> + void *buf, loff_t pos, size_t count)
>> +{
>> + ssize_t ret;
>> +
>> + if (pos == 0) {
>> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> + | FW_CFG_DMA_CTL_SELECT
>> + | FW_CFG_DMA_CTL_READ);
>> + } else {
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
>> + if (ret < 0)
>> + return ret;
>> + ret = fw_cfg_dma_transfer(buf, count,
>> + FW_CFG_DMA_CTL_READ);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/* with acpi & dev locks taken */
>> +static void fw_cfg_read_blob_io(u16 key,
>> + void *buf, loff_t pos, size_t count)
>> +{
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + while (pos-- > 0)
>> + ioread8(fw_cfg_reg_data);
>> + ioread8_rep(fw_cfg_reg_data, buf, count);
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> -static inline void fw_cfg_read_blob(u16 key,
>> - void *buf, loff_t pos, size_t count)
>> +static ssize_t fw_cfg_read_blob(u16 key,
>> + void *buf, loff_t pos, size_t count,
>> + bool dma)
>>  {
>>   u32 glk = -1U;
>>   acpi_status status;
>> + ssize_t ret = count;
>>
>>   /* If we have ACPI, ensure mutual exclusion against any potential
>>* device access by the firmware, e.g. via AML methods:
>> @@ -139,17 +173,21 @@ static inline void fw_cfg_read_blob(u16 key,
>>   /* Should never get here */
>>   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
>>   memset(buf, 0, count);
>> - return;
>> + return -EINVAL;
>>   }
>>
>>   mutex_lock(_cfg_dev_lock);
>> - iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> - while (pos-- > 0)
>> - ioread8(fw_cfg_reg_data);
>> - ioread8_rep(fw_cfg_reg_data, buf, count);
>> + if (dma && fw_cfg_dma_enabled()) {
>> + ret = fw_cfg_read_blob_dma(key, buf, pos, count);
>> +   

Re: [PATCH v14 9/9] RFC: fw_cfg: do DMA read operation

2018-02-14 Thread Marc-Andre Lureau
Hi

On Wed, Feb 14, 2018 at 5:48 PM, Michael S. Tsirkin  wrote:
> On Wed, Feb 14, 2018 at 03:18:50PM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> So far, only one call in fw_cfg_register_dir_entries() is using
>> kmalloc'ed buf and is thus clearly eligible to DMA read.
>>
>> Initially, I didn't implement DMA read to speed up boot time, but as a
>> first step before introducing DMA write (since read operations were
>> already presents). Even more, I didn't realize fw-cfg entries were
>> being read by the kernel during boot by default. But actally fw-cfg
>> entries are being populated during module probe. I knew DMA improved a
>> lot bios boot time (the main reason the DMA interface was added
>> afaik). Let see the time it would take to read the whole ACPI
>> tables (128kb allocated)
>>
>>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>>   - with DMA: sys 0m0.003s
>>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>>
>> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> small (1-2kb). Since it does not expose itself, in order to measure
>> the time it takes to read such small file, I took a comparable sized
>> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> modified read_raw enabling DMA)
>>
>>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
>>   - with DMA:
>>   0.636037  task-clock (msec) #0.141 CPUs utilized   
>>  ( +-  1.19% )
>>   - without DMA:
>>   6.430128  task-clock (msec) #0.622 CPUs utilized   
>>  ( +-  0.22% )
>>
>> That's a few msec saved during boot by enabling DMA read (the gain
>> would be more substantial if other & bigger fw-cfg entries are read by
>> others from sysfs, unfortunately, it's not clear if we can always
>> enable DMA there)
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 80 
>> ++
>>  1 file changed, 66 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 69939e2529f2..ba9b907a4399 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -124,12 +124,46 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
>> length, u32 control)
>>   return ret;
>>  }
>>
>> +/* with acpi & dev locks taken */
>> +static ssize_t fw_cfg_read_blob_dma(u16 key,
>> + void *buf, loff_t pos, size_t count)
>> +{
>> + ssize_t ret;
>> +
>> + if (pos == 0) {
>> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> + | FW_CFG_DMA_CTL_SELECT
>> + | FW_CFG_DMA_CTL_READ);
>> + } else {
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + ret = fw_cfg_dma_transfer(NULL, pos, FW_CFG_DMA_CTL_SKIP);
>> + if (ret < 0)
>> + return ret;
>> + ret = fw_cfg_dma_transfer(buf, count,
>> + FW_CFG_DMA_CTL_READ);
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +/* with acpi & dev locks taken */
>> +static void fw_cfg_read_blob_io(u16 key,
>> + void *buf, loff_t pos, size_t count)
>> +{
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + while (pos-- > 0)
>> + ioread8(fw_cfg_reg_data);
>> + ioread8_rep(fw_cfg_reg_data, buf, count);
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> -static inline void fw_cfg_read_blob(u16 key,
>> - void *buf, loff_t pos, size_t count)
>> +static ssize_t fw_cfg_read_blob(u16 key,
>> + void *buf, loff_t pos, size_t count,
>> + bool dma)
>>  {
>>   u32 glk = -1U;
>>   acpi_status status;
>> + ssize_t ret = count;
>>
>>   /* If we have ACPI, ensure mutual exclusion against any potential
>>* device access by the firmware, e.g. via AML methods:
>> @@ -139,17 +173,21 @@ static inline void fw_cfg_read_blob(u16 key,
>>   /* Should never get here */
>>   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
>>   memset(buf, 0, count);
>> - return;
>> + return -EINVAL;
>>   }
>>
>>   mutex_lock(_cfg_dev_lock);
>> - iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> - while (pos-- > 0)
>> - ioread8(fw_cfg_reg_data);
>> - ioread8_rep(fw_cfg_reg_data, buf, count);
>> + if (dma && fw_cfg_dma_enabled()) {
>> + ret = fw_cfg_read_blob_dma(key, buf, pos, count);
>> + } else {
>> + 

Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-13 Thread Marc-Andre Lureau
On Tue, Feb 13, 2018 at 4:19 PM, Michael S. Tsirkin <m...@redhat.com> wrote:
> On Tue, Feb 13, 2018 at 04:16:08PM +0100, Marc-Andre Lureau wrote:
>> Hi
>>
>> On Tue, Feb 13, 2018 at 3:27 PM, Michael S. Tsirkin <m...@redhat.com> wrote:
>> > On Tue, Feb 13, 2018 at 03:14:03PM +0100, Marc-Andre Lureau wrote:
>> >> Hi
>> >>
>> >> On Mon, Feb 12, 2018 at 10:00 PM, Michael S. Tsirkin <m...@redhat.com> 
>> >> wrote:
>> >> > On Mon, Feb 12, 2018 at 11:04:49AM +0100, Marc-Andre Lureau wrote:
>> >> >> >> +}
>> >> >> >> +
>> >> >> >> +/* qemu fw_cfg device is sync today, but spec says it may become 
>> >> >> >> async */
>> >> >> >> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> >> >> >> +{
>> >> >> >> + do {
>> >> >> >> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> >> >> >> +
>> >> >> >> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> >> >> >> + return;
>> >> >> >> +
>> >> >> >> + usleep_range(50, 100);
>> >> >> >> + } while (true);
>> >> >> >
>> >> >> > And you need an smp rmb here.
>> >> >
>> >> > I'd just do rmb() in fact.
>> >> >
>> >> >> Could you explain? thanks
>> >> >
>> >> > See Documentation/memory-barriers.txt
>> >> > You know that control is valid, but following read of
>> >> > the structure could be reordered. So you need that barrier there.
>> >> > Same for write: wmb.
>> >>
>> >> Is this ok?
>> >> @@ -103,10 +104,14 @@ static ssize_t fw_cfg_dma_transfer(void
>> >> *address, u32 length, u32 control)
>> >> dma = virt_to_phys(d);
>> >>
>> >> iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> >> +   /* force memory to sync before notifying device via MMIO */
>> >> +   wmb();
>> >> iowrite32be(dma, fw_cfg_reg_dma + 4);
>> >>
>> >> fw_cfg_wait_for_control(d);
>> >>
>> >> +   /* do not reorder the read to d->control */
>> >> +   rmb();
>> >> if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> >> ret = -EIO;
>> >> }
>> >
>> > I think you need an rmb after the read of d->control.
>> >
>>
>>
>> There are two reads of d->control, one in fw_cfg_wait_for_control() to
>> wait for completion, and the other one here to handle error. Do you
>> mean that for clarity rmb() should be moved at the end of
>> fw_cfg_wait_for_control() instead?
>>
>> thanks
>
>
> IMHO that's a reasonable way to do it, yes.
> OTOH is looking at DMA data the only way to detect DMA is complete?
> Isn't there an IO register for that?

That's the only thing that indicate completion it seems:

https://git.qemu.org/?p=qemu.git;a=blob;f=hw/nvram/fw_cfg.c;hb=HEAD#l389

The spec doesn't describe other ways either:

https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/fw_cfg.txt


Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-13 Thread Marc-Andre Lureau
On Tue, Feb 13, 2018 at 4:19 PM, Michael S. Tsirkin  wrote:
> On Tue, Feb 13, 2018 at 04:16:08PM +0100, Marc-Andre Lureau wrote:
>> Hi
>>
>> On Tue, Feb 13, 2018 at 3:27 PM, Michael S. Tsirkin  wrote:
>> > On Tue, Feb 13, 2018 at 03:14:03PM +0100, Marc-Andre Lureau wrote:
>> >> Hi
>> >>
>> >> On Mon, Feb 12, 2018 at 10:00 PM, Michael S. Tsirkin  
>> >> wrote:
>> >> > On Mon, Feb 12, 2018 at 11:04:49AM +0100, Marc-Andre Lureau wrote:
>> >> >> >> +}
>> >> >> >> +
>> >> >> >> +/* qemu fw_cfg device is sync today, but spec says it may become 
>> >> >> >> async */
>> >> >> >> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> >> >> >> +{
>> >> >> >> + do {
>> >> >> >> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> >> >> >> +
>> >> >> >> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> >> >> >> + return;
>> >> >> >> +
>> >> >> >> + usleep_range(50, 100);
>> >> >> >> + } while (true);
>> >> >> >
>> >> >> > And you need an smp rmb here.
>> >> >
>> >> > I'd just do rmb() in fact.
>> >> >
>> >> >> Could you explain? thanks
>> >> >
>> >> > See Documentation/memory-barriers.txt
>> >> > You know that control is valid, but following read of
>> >> > the structure could be reordered. So you need that barrier there.
>> >> > Same for write: wmb.
>> >>
>> >> Is this ok?
>> >> @@ -103,10 +104,14 @@ static ssize_t fw_cfg_dma_transfer(void
>> >> *address, u32 length, u32 control)
>> >> dma = virt_to_phys(d);
>> >>
>> >> iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> >> +   /* force memory to sync before notifying device via MMIO */
>> >> +   wmb();
>> >> iowrite32be(dma, fw_cfg_reg_dma + 4);
>> >>
>> >> fw_cfg_wait_for_control(d);
>> >>
>> >> +   /* do not reorder the read to d->control */
>> >> +   rmb();
>> >> if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> >> ret = -EIO;
>> >> }
>> >
>> > I think you need an rmb after the read of d->control.
>> >
>>
>>
>> There are two reads of d->control, one in fw_cfg_wait_for_control() to
>> wait for completion, and the other one here to handle error. Do you
>> mean that for clarity rmb() should be moved at the end of
>> fw_cfg_wait_for_control() instead?
>>
>> thanks
>
>
> IMHO that's a reasonable way to do it, yes.
> OTOH is looking at DMA data the only way to detect DMA is complete?
> Isn't there an IO register for that?

That's the only thing that indicate completion it seems:

https://git.qemu.org/?p=qemu.git;a=blob;f=hw/nvram/fw_cfg.c;hb=HEAD#l389

The spec doesn't describe other ways either:

https://git.qemu.org/?p=qemu.git;a=blob;f=docs/specs/fw_cfg.txt


Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-13 Thread Marc-Andre Lureau
Hi

On Tue, Feb 13, 2018 at 3:27 PM, Michael S. Tsirkin <m...@redhat.com> wrote:
> On Tue, Feb 13, 2018 at 03:14:03PM +0100, Marc-Andre Lureau wrote:
>> Hi
>>
>> On Mon, Feb 12, 2018 at 10:00 PM, Michael S. Tsirkin <m...@redhat.com> wrote:
>> > On Mon, Feb 12, 2018 at 11:04:49AM +0100, Marc-Andre Lureau wrote:
>> >> >> +}
>> >> >> +
>> >> >> +/* qemu fw_cfg device is sync today, but spec says it may become 
>> >> >> async */
>> >> >> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> >> >> +{
>> >> >> + do {
>> >> >> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> >> >> +
>> >> >> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> >> >> + return;
>> >> >> +
>> >> >> + usleep_range(50, 100);
>> >> >> + } while (true);
>> >> >
>> >> > And you need an smp rmb here.
>> >
>> > I'd just do rmb() in fact.
>> >
>> >> Could you explain? thanks
>> >
>> > See Documentation/memory-barriers.txt
>> > You know that control is valid, but following read of
>> > the structure could be reordered. So you need that barrier there.
>> > Same for write: wmb.
>>
>> Is this ok?
>> @@ -103,10 +104,14 @@ static ssize_t fw_cfg_dma_transfer(void
>> *address, u32 length, u32 control)
>> dma = virt_to_phys(d);
>>
>> iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> +   /* force memory to sync before notifying device via MMIO */
>> +   wmb();
>> iowrite32be(dma, fw_cfg_reg_dma + 4);
>>
>> fw_cfg_wait_for_control(d);
>>
>> +   /* do not reorder the read to d->control */
>> +   rmb();
>> if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> ret = -EIO;
>> }
>
> I think you need an rmb after the read of d->control.
>


There are two reads of d->control, one in fw_cfg_wait_for_control() to
wait for completion, and the other one here to handle error. Do you
mean that for clarity rmb() should be moved at the end of
fw_cfg_wait_for_control() instead?

thanks


Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-13 Thread Marc-Andre Lureau
Hi

On Tue, Feb 13, 2018 at 3:27 PM, Michael S. Tsirkin  wrote:
> On Tue, Feb 13, 2018 at 03:14:03PM +0100, Marc-Andre Lureau wrote:
>> Hi
>>
>> On Mon, Feb 12, 2018 at 10:00 PM, Michael S. Tsirkin  wrote:
>> > On Mon, Feb 12, 2018 at 11:04:49AM +0100, Marc-Andre Lureau wrote:
>> >> >> +}
>> >> >> +
>> >> >> +/* qemu fw_cfg device is sync today, but spec says it may become 
>> >> >> async */
>> >> >> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> >> >> +{
>> >> >> + do {
>> >> >> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> >> >> +
>> >> >> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> >> >> + return;
>> >> >> +
>> >> >> + usleep_range(50, 100);
>> >> >> + } while (true);
>> >> >
>> >> > And you need an smp rmb here.
>> >
>> > I'd just do rmb() in fact.
>> >
>> >> Could you explain? thanks
>> >
>> > See Documentation/memory-barriers.txt
>> > You know that control is valid, but following read of
>> > the structure could be reordered. So you need that barrier there.
>> > Same for write: wmb.
>>
>> Is this ok?
>> @@ -103,10 +104,14 @@ static ssize_t fw_cfg_dma_transfer(void
>> *address, u32 length, u32 control)
>> dma = virt_to_phys(d);
>>
>> iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> +   /* force memory to sync before notifying device via MMIO */
>> +   wmb();
>> iowrite32be(dma, fw_cfg_reg_dma + 4);
>>
>> fw_cfg_wait_for_control(d);
>>
>> +   /* do not reorder the read to d->control */
>> +   rmb();
>> if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> ret = -EIO;
>> }
>
> I think you need an rmb after the read of d->control.
>


There are two reads of d->control, one in fw_cfg_wait_for_control() to
wait for completion, and the other one here to handle error. Do you
mean that for clarity rmb() should be moved at the end of
fw_cfg_wait_for_control() instead?

thanks


Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-13 Thread Marc-Andre Lureau
Hi

On Mon, Feb 12, 2018 at 10:00 PM, Michael S. Tsirkin <m...@redhat.com> wrote:
> On Mon, Feb 12, 2018 at 11:04:49AM +0100, Marc-Andre Lureau wrote:
>> >> +}
>> >> +
>> >> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> >> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> >> +{
>> >> + do {
>> >> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> >> +
>> >> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> >> + return;
>> >> +
>> >> + usleep_range(50, 100);
>> >> + } while (true);
>> >
>> > And you need an smp rmb here.
>
> I'd just do rmb() in fact.
>
>> Could you explain? thanks
>
> See Documentation/memory-barriers.txt
> You know that control is valid, but following read of
> the structure could be reordered. So you need that barrier there.
> Same for write: wmb.

Is this ok?
@@ -103,10 +104,14 @@ static ssize_t fw_cfg_dma_transfer(void
*address, u32 length, u32 control)
dma = virt_to_phys(d);

iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   /* force memory to sync before notifying device via MMIO */
+   wmb();
iowrite32be(dma, fw_cfg_reg_dma + 4);

fw_cfg_wait_for_control(d);

+   /* do not reorder the read to d->control */
+   rmb();
if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
ret = -EIO;
}

>
>
>> >
>> >> +}
>> >> +
>> >> +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 
>> >> control)
>> >> +{
>> >> + phys_addr_t dma;
>> >> + struct fw_cfg_dma *d = NULL;
>> >> + ssize_t ret = length;
>> >> +
>> >> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> >> + if (!d) {
>> >> + ret = -ENOMEM;
>> >> + goto end;
>> >> + }
>> >> +
>> >> + *d = (struct fw_cfg_dma) {
>> >> + .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
>> >> + .length = cpu_to_be32(length),
>> >> + .control = cpu_to_be32(control)
>> >> + };
>> >> +
>> >> + dma = virt_to_phys(d);
>> >
>> > Pls add docs on why this DMA bypasses the DMA API.
>>
>> Peter said in his patch: "fw_cfg device does not need IOMMU
>> protection, so use physical addresses
>> always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
>> traces during boot."
>>
>> Is that enough justification?
>
> what are the reasons for the traces exactly though?
> some kind of explanation should go into comments, and
> I think it should be a bit more detailed than just "it doesn't
> work otherwise".
>

I can use Peter help here. My understanding is because the qemu fw-cfg
device doesn't go through iommu when doing DMA op. Whether it should
or could, I can't answer.

thanks


Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-13 Thread Marc-Andre Lureau
Hi

On Mon, Feb 12, 2018 at 10:00 PM, Michael S. Tsirkin  wrote:
> On Mon, Feb 12, 2018 at 11:04:49AM +0100, Marc-Andre Lureau wrote:
>> >> +}
>> >> +
>> >> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> >> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> >> +{
>> >> + do {
>> >> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> >> +
>> >> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> >> + return;
>> >> +
>> >> + usleep_range(50, 100);
>> >> + } while (true);
>> >
>> > And you need an smp rmb here.
>
> I'd just do rmb() in fact.
>
>> Could you explain? thanks
>
> See Documentation/memory-barriers.txt
> You know that control is valid, but following read of
> the structure could be reordered. So you need that barrier there.
> Same for write: wmb.

Is this ok?
@@ -103,10 +104,14 @@ static ssize_t fw_cfg_dma_transfer(void
*address, u32 length, u32 control)
dma = virt_to_phys(d);

iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
+   /* force memory to sync before notifying device via MMIO */
+   wmb();
iowrite32be(dma, fw_cfg_reg_dma + 4);

fw_cfg_wait_for_control(d);

+   /* do not reorder the read to d->control */
+   rmb();
if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
ret = -EIO;
}

>
>
>> >
>> >> +}
>> >> +
>> >> +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 
>> >> control)
>> >> +{
>> >> + phys_addr_t dma;
>> >> + struct fw_cfg_dma *d = NULL;
>> >> + ssize_t ret = length;
>> >> +
>> >> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> >> + if (!d) {
>> >> + ret = -ENOMEM;
>> >> + goto end;
>> >> + }
>> >> +
>> >> + *d = (struct fw_cfg_dma) {
>> >> + .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
>> >> + .length = cpu_to_be32(length),
>> >> + .control = cpu_to_be32(control)
>> >> + };
>> >> +
>> >> + dma = virt_to_phys(d);
>> >
>> > Pls add docs on why this DMA bypasses the DMA API.
>>
>> Peter said in his patch: "fw_cfg device does not need IOMMU
>> protection, so use physical addresses
>> always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
>> traces during boot."
>>
>> Is that enough justification?
>
> what are the reasons for the traces exactly though?
> some kind of explanation should go into comments, and
> I think it should be a bit more detailed than just "it doesn't
> work otherwise".
>

I can use Peter help here. My understanding is because the qemu fw-cfg
device doesn't go through iommu when doing DMA op. Whether it should
or could, I can't answer.

thanks


Re: [PATCH v13 4/4] RFC: fw_cfg: do DMA read operation

2018-02-12 Thread Marc-Andre Lureau
Hi

On Mon, Feb 12, 2018 at 4:30 AM, Michael S. Tsirkin  wrote:
> On Wed, Feb 07, 2018 at 02:35:25AM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> So far, only one call in fw_cfg_register_dir_entries() is using
>> kmalloc'ed buf and is thus clearly eligible to DMA read.
>>
>> Initially, I didn't implement DMA read to speed up boot time, but as a
>> first step before introducing DMA write (since read operations were
>> already presents). Even more, I didn't realize fw-cfg entries were
>> being read by the kernel during boot by default. But actally fw-cfg
>> entries are being populated during module probe. I knew DMA improved a
>> lot bios boot time (the main reason the DMA interface was added
>> afaik). Let see the time it would take to read the whole ACPI
>> tables (128kb allocated)
>>
>>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>>   - with DMA: sys 0m0.003s
>>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>>
>> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> small (1-2kb). Since it does not expose itself, in order to measure
>> the time it takes to read such small file, I took a comparable sized
>> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> modified read_raw enabling DMA)
>>
>>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
>>   - with DMA:
>>   0.636037  task-clock (msec) #0.141 CPUs utilized   
>>  ( +-  1.19% )
>>   - without DMA:
>>   6.430128  task-clock (msec) #0.622 CPUs utilized   
>>  ( +-  0.22% )
>>
>> That's a few msec saved during boot by enabling DMA read (the gain
>> would be more substantial if other & bigger fw-cfg entries are read by
>> others from sysfs, unfortunately, it's not clear if we can always
>> enable DMA there)
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 47 
>> ++
>>  1 file changed, 34 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index fd576ba7b337..3721dc868a2b 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -150,11 +150,13 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
>> length, u32 control)
>>  }
>>
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> -static inline void fw_cfg_read_blob(u16 key,
>> - void *buf, loff_t pos, size_t count)
>> +static ssize_t fw_cfg_read_blob(u16 key,
>> + void *buf, loff_t pos, size_t count,
>> + bool dma)
>>  {
>>   u32 glk = -1U;
>>   acpi_status status;
>> + ssize_t ret = count;
>>
>>   /* If we have ACPI, ensure mutual exclusion against any potential
>>* device access by the firmware, e.g. via AML methods:
>> @@ -164,17 +166,36 @@ static inline void fw_cfg_read_blob(u16 key,
>>   /* Should never get here */
>>   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
>>   memset(buf, 0, count);
>> - return;
>> + return -EINVAL;
>>   }
>>
>>   mutex_lock(_cfg_dev_lock);
>> - iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> - while (pos-- > 0)
>> - ioread8(fw_cfg_reg_data);
>> - ioread8_rep(fw_cfg_reg_data, buf, count);
>> + if (dma && fw_cfg_dma_enabled()) {
>> + if (pos == 0) {
>> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> +   | FW_CFG_DMA_CTL_SELECT
>> +   | FW_CFG_DMA_CTL_READ);
>> + } else {
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + ret = fw_cfg_dma_transfer(NULL, pos, 
>> FW_CFG_DMA_CTL_SKIP);
>> + if (ret < 0)
>> + goto end;
>> + ret = fw_cfg_dma_transfer(buf, count,
>> +   FW_CFG_DMA_CTL_READ);
>> + }
>> + } else {
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + while (pos-- > 0)
>> + ioread8(fw_cfg_reg_data);
>> + ioread8_rep(fw_cfg_reg_data, buf, count);
>> + }
>> +
>> +end:
>>   mutex_unlock(_cfg_dev_lock);
>>
>>   acpi_release_global_lock(glk);
>> +
>> + return ret;
>>  }
>>
>
> These two functions share no common code at all.
> Pls name the dma one fw_cfg_dma_read or something like this,
> cleaner than a flag.

They share arguments, ACPI locking, error handling, 

Re: [PATCH v13 4/4] RFC: fw_cfg: do DMA read operation

2018-02-12 Thread Marc-Andre Lureau
Hi

On Mon, Feb 12, 2018 at 4:30 AM, Michael S. Tsirkin  wrote:
> On Wed, Feb 07, 2018 at 02:35:25AM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> So far, only one call in fw_cfg_register_dir_entries() is using
>> kmalloc'ed buf and is thus clearly eligible to DMA read.
>>
>> Initially, I didn't implement DMA read to speed up boot time, but as a
>> first step before introducing DMA write (since read operations were
>> already presents). Even more, I didn't realize fw-cfg entries were
>> being read by the kernel during boot by default. But actally fw-cfg
>> entries are being populated during module probe. I knew DMA improved a
>> lot bios boot time (the main reason the DMA interface was added
>> afaik). Let see the time it would take to read the whole ACPI
>> tables (128kb allocated)
>>
>>  # time cat /sys/firmware/qemu_fw_cfg/by_name/etc/acpi/tables/raw
>>   - with DMA: sys 0m0.003s
>>   - without DMA (-global fw_cfg.dma_enabled=off): sys 0m7.674s
>>
>> FW_CFG_FILE_DIR (0x19) is the only "file" that is read during kernel
>> boot to populate sysfs qemu_fw_cfg directory, and it is quite
>> small (1-2kb). Since it does not expose itself, in order to measure
>> the time it takes to read such small file, I took a comparable sized
>> file of 2048 bytes and exposed it (-fw_cfg test,file=file with a
>> modified read_raw enabling DMA)
>>
>>  # perf stat -r 100 cat /sys/firmware/qemu_fw_cfg/by_name/test/raw >/dev/null
>>   - with DMA:
>>   0.636037  task-clock (msec) #0.141 CPUs utilized   
>>  ( +-  1.19% )
>>   - without DMA:
>>   6.430128  task-clock (msec) #0.622 CPUs utilized   
>>  ( +-  0.22% )
>>
>> That's a few msec saved during boot by enabling DMA read (the gain
>> would be more substantial if other & bigger fw-cfg entries are read by
>> others from sysfs, unfortunately, it's not clear if we can always
>> enable DMA there)
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 47 
>> ++
>>  1 file changed, 34 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index fd576ba7b337..3721dc868a2b 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -150,11 +150,13 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
>> length, u32 control)
>>  }
>>
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>> -static inline void fw_cfg_read_blob(u16 key,
>> - void *buf, loff_t pos, size_t count)
>> +static ssize_t fw_cfg_read_blob(u16 key,
>> + void *buf, loff_t pos, size_t count,
>> + bool dma)
>>  {
>>   u32 glk = -1U;
>>   acpi_status status;
>> + ssize_t ret = count;
>>
>>   /* If we have ACPI, ensure mutual exclusion against any potential
>>* device access by the firmware, e.g. via AML methods:
>> @@ -164,17 +166,36 @@ static inline void fw_cfg_read_blob(u16 key,
>>   /* Should never get here */
>>   WARN(1, "fw_cfg_read_blob: Failed to lock ACPI!\n");
>>   memset(buf, 0, count);
>> - return;
>> + return -EINVAL;
>>   }
>>
>>   mutex_lock(_cfg_dev_lock);
>> - iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> - while (pos-- > 0)
>> - ioread8(fw_cfg_reg_data);
>> - ioread8_rep(fw_cfg_reg_data, buf, count);
>> + if (dma && fw_cfg_dma_enabled()) {
>> + if (pos == 0) {
>> + ret = fw_cfg_dma_transfer(buf, count, key << 16
>> +   | FW_CFG_DMA_CTL_SELECT
>> +   | FW_CFG_DMA_CTL_READ);
>> + } else {
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + ret = fw_cfg_dma_transfer(NULL, pos, 
>> FW_CFG_DMA_CTL_SKIP);
>> + if (ret < 0)
>> + goto end;
>> + ret = fw_cfg_dma_transfer(buf, count,
>> +   FW_CFG_DMA_CTL_READ);
>> + }
>> + } else {
>> + iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
>> + while (pos-- > 0)
>> + ioread8(fw_cfg_reg_data);
>> + ioread8_rep(fw_cfg_reg_data, buf, count);
>> + }
>> +
>> +end:
>>   mutex_unlock(_cfg_dev_lock);
>>
>>   acpi_release_global_lock(glk);
>> +
>> + return ret;
>>  }
>>
>
> These two functions share no common code at all.
> Pls name the dma one fw_cfg_dma_read or something like this,
> cleaner than a flag.

They share arguments, ACPI locking, error handling, cleanup. But they
also allow to abstract read over 

Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-12 Thread Marc-Andre Lureau
Hi

On Mon, Feb 12, 2018 at 4:43 AM, Michael S. Tsirkin  wrote:
> On Wed, Feb 07, 2018 at 02:35:24AM +0100, Marc-André Lureau wrote:
>> If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
>> the kdump kernel, write the addr/size of the vmcoreinfo ELF note.
>>
>> The DMA operation is expected to run synchronously with today qemu,
>> but the specification states that it may become async, so we run
>> "control" field check in a loop for eventual changes.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 157 
>> -
>>  1 file changed, 154 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 740df0df2260..fd576ba7b337 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -33,6 +33,9 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>> @@ -43,12 +46,24 @@ MODULE_LICENSE("GPL");
>>  #define FW_CFG_ID 0x01
>>  #define FW_CFG_FILE_DIR   0x19
>>
>> +#define FW_CFG_VERSION_DMA 0x02
>> +#define FW_CFG_DMA_CTL_ERROR   0x01
>> +#define FW_CFG_DMA_CTL_READ0x02
>> +#define FW_CFG_DMA_CTL_SKIP0x04
>> +#define FW_CFG_DMA_CTL_SELECT  0x08
>> +#define FW_CFG_DMA_CTL_WRITE   0x10
>> +
>>  /* size in bytes of fw_cfg signature */
>>  #define FW_CFG_SIG_SIZE 4
>>
>>  /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>>  #define FW_CFG_MAX_FILE_PATH 56
>>
>> +#define VMCOREINFO_FORMAT_ELF 0x1
>
> How about exporting interface parts in include/uapi/linux/ ?
> QEMU can import it from there then.
> This is what virtio does.

Good idea, we didn't have it yet. So this is an additional change.
I'll work on it. Though, if this should delay more this series, I
think we should drop it.

>
>> +
>> +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
>> +static u32 fw_cfg_rev;
>> +
>>  /* fw_cfg file directory entry type */
>>  struct fw_cfg_file {
>>   u32 size;
>> @@ -57,6 +72,12 @@ struct fw_cfg_file {
>>   char name[FW_CFG_MAX_FILE_PATH];
>>  };
>>
>> +struct fw_cfg_dma {
>> + u32 control;
>> + u32 length;
>> + u64 address;
>> +} __packed;
>> +
>
> you can drop __packed here - it's always aligned properly.

Isn't it preferable to make that explicit? Fwiw, qemu also declares
the struct packed in its headers.

>
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -75,6 +96,59 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
>>   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
>>  }
>>
>> +static inline bool fw_cfg_dma_enabled(void)
>> +{
>> + return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
>
> Why do you use () with == below but not with && here?
>

Let's add them.

>> +}
>> +
>> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> +{
>> + do {
>> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> +
>> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> + return;
>> +
>> + usleep_range(50, 100);
>> + } while (true);
>
> And you need an smp rmb here.

Could you explain? thanks

>
>> +}
>> +
>> +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>> +{
>> + phys_addr_t dma;
>> + struct fw_cfg_dma *d = NULL;
>> + ssize_t ret = length;
>> +
>> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> + if (!d) {
>> + ret = -ENOMEM;
>> + goto end;
>> + }
>> +
>> + *d = (struct fw_cfg_dma) {
>> + .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
>> + .length = cpu_to_be32(length),
>> + .control = cpu_to_be32(control)
>> + };
>> +
>> + dma = virt_to_phys(d);
>
> Pls add docs on why this DMA bypasses the DMA API.

Peter said in his patch: "fw_cfg device does not need IOMMU
protection, so use physical addresses
always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
traces during boot."

Is that enough justification?

>
>> +
>> + iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> + iowrite32be(dma, fw_cfg_reg_dma + 4);
>> +
>> + fw_cfg_wait_for_control(d);
>> +
>> + if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> + ret = -EIO;
>> + }
>> +
>> +end:
>> + kfree(d);
>> +
>> + return ret;
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>>  static inline void fw_cfg_read_blob(u16 key,
>>   void *buf, loff_t pos, size_t count)
>> @@ -103,6 +177,47 @@ static inline void 

Re: [PATCH v13 3/4] fw_cfg: write vmcoreinfo details

2018-02-12 Thread Marc-Andre Lureau
Hi

On Mon, Feb 12, 2018 at 4:43 AM, Michael S. Tsirkin  wrote:
> On Wed, Feb 07, 2018 at 02:35:24AM +0100, Marc-André Lureau wrote:
>> If the "etc/vmcoreinfo" fw_cfg file is present and we are not running
>> the kdump kernel, write the addr/size of the vmcoreinfo ELF note.
>>
>> The DMA operation is expected to run synchronously with today qemu,
>> but the specification states that it may become async, so we run
>> "control" field check in a loop for eventual changes.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 157 
>> -
>>  1 file changed, 154 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 740df0df2260..fd576ba7b337 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -33,6 +33,9 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>> +#include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>> @@ -43,12 +46,24 @@ MODULE_LICENSE("GPL");
>>  #define FW_CFG_ID 0x01
>>  #define FW_CFG_FILE_DIR   0x19
>>
>> +#define FW_CFG_VERSION_DMA 0x02
>> +#define FW_CFG_DMA_CTL_ERROR   0x01
>> +#define FW_CFG_DMA_CTL_READ0x02
>> +#define FW_CFG_DMA_CTL_SKIP0x04
>> +#define FW_CFG_DMA_CTL_SELECT  0x08
>> +#define FW_CFG_DMA_CTL_WRITE   0x10
>> +
>>  /* size in bytes of fw_cfg signature */
>>  #define FW_CFG_SIG_SIZE 4
>>
>>  /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>>  #define FW_CFG_MAX_FILE_PATH 56
>>
>> +#define VMCOREINFO_FORMAT_ELF 0x1
>
> How about exporting interface parts in include/uapi/linux/ ?
> QEMU can import it from there then.
> This is what virtio does.

Good idea, we didn't have it yet. So this is an additional change.
I'll work on it. Though, if this should delay more this series, I
think we should drop it.

>
>> +
>> +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
>> +static u32 fw_cfg_rev;
>> +
>>  /* fw_cfg file directory entry type */
>>  struct fw_cfg_file {
>>   u32 size;
>> @@ -57,6 +72,12 @@ struct fw_cfg_file {
>>   char name[FW_CFG_MAX_FILE_PATH];
>>  };
>>
>> +struct fw_cfg_dma {
>> + u32 control;
>> + u32 length;
>> + u64 address;
>> +} __packed;
>> +
>
> you can drop __packed here - it's always aligned properly.

Isn't it preferable to make that explicit? Fwiw, qemu also declares
the struct packed in its headers.

>
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -75,6 +96,59 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
>>   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
>>  }
>>
>> +static inline bool fw_cfg_dma_enabled(void)
>> +{
>> + return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
>
> Why do you use () with == below but not with && here?
>

Let's add them.

>> +}
>> +
>> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> +{
>> + do {
>> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> +
>> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> + return;
>> +
>> + usleep_range(50, 100);
>> + } while (true);
>
> And you need an smp rmb here.

Could you explain? thanks

>
>> +}
>> +
>> +static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>> +{
>> + phys_addr_t dma;
>> + struct fw_cfg_dma *d = NULL;
>> + ssize_t ret = length;
>> +
>> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> + if (!d) {
>> + ret = -ENOMEM;
>> + goto end;
>> + }
>> +
>> + *d = (struct fw_cfg_dma) {
>> + .address = address ? cpu_to_be64(virt_to_phys(address)) : 0,
>> + .length = cpu_to_be32(length),
>> + .control = cpu_to_be32(control)
>> + };
>> +
>> + dma = virt_to_phys(d);
>
> Pls add docs on why this DMA bypasses the DMA API.

Peter said in his patch: "fw_cfg device does not need IOMMU
protection, so use physical addresses
always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
traces during boot."

Is that enough justification?

>
>> +
>> + iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> + iowrite32be(dma, fw_cfg_reg_dma + 4);
>> +
>> + fw_cfg_wait_for_control(d);
>> +
>> + if (be32_to_cpu(READ_ONCE(d->control)) & FW_CFG_DMA_CTL_ERROR) {
>> + ret = -EIO;
>> + }
>> +
>> +end:
>> + kfree(d);
>> +
>> + return ret;
>> +}
>> +
>>  /* read chunk of given fw_cfg blob (caller responsible for sanity-check) */
>>  static inline void fw_cfg_read_blob(u16 key,
>>   void *buf, loff_t pos, size_t count)
>> @@ -103,6 +177,47 @@ static inline void fw_cfg_read_blob(u16 key,
>>   acpi_release_global_lock(glk);
>>  

Re: [PATCH v11 4/4] fw_cfg: write vmcoreinfo details

2018-02-02 Thread Marc-Andre Lureau
Hi

On Fri, Feb 2, 2018 at 3:44 AM, Michael S. Tsirkin  wrote:
> On Thu, Feb 01, 2018 at 02:03:00PM +0100, Marc-André Lureau wrote:
>> @@ -314,6 +359,37 @@ struct fw_cfg_sysfs_entry {
>>   struct device *dev;
>>  };
>>
>> +#ifdef CONFIG_CRASH_CORE
>> +static ssize_t write_vmcoreinfo(struct device *dev, const struct 
>> fw_cfg_file *f)
>> +{
>> + struct vmci {
>> + __le16 host_format;
>> + __le16 guest_format;
>> + __le32 size;
>> + __le64 paddr;
>> + } __packed;
>> + static struct vmci *data;
>> + ssize_t ret;
>> +
>> + data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
>> + if (!data)
>> + return -ENOMEM;
>> +
>> + *data = (struct vmci) {
>> + .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
>> + .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
>> + .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
>> + };
>> + /* spare ourself reading host format support for now since we
>> +  * don't know what else to format - host may ignore ours
>> +  */
>> + ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
>> +
>> + kfree(data);
>> + return ret;
>> +}
>> +#endif /* CONFIG_CRASH_CORE */
>> +
>>  /* get fw_cfg_sysfs_entry from kobject member */
>>  static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
>>  {
>
> kmalloc during crash is I think a bad idea.
> How about preallocating on probe?

It doesn't kmalloc during crash, it kmalloc during boot. As such, I
don't see a reason to change it.

>
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 33e0256..ffd81d1 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -363,22 +363,33 @@ struct fw_cfg_sysfs_entry {
>  };
>
>  #ifdef CONFIG_CRASH_CORE
> +
> +struct vmci {
> +   __le16 host_format;
> +   __le16 guest_format;
> +   __le32 size;
> +   __le64 paddr;
> +} *fw_cfg_vmcore_data;
> +
> +static int fw_cfg_vmcore_init(void)
> +{
> +   fw_cfg_vmcore_data = kmalloc(sizeof(*fw_cfg_vmcore_data), GFP_KERNEL);
> +   if (!fw_cfg_vmcore_data)
> +   return -ENOMEM;
> +   return 0;
> +}
> +
> +static int fw_cfg_vmcore_cleanup(void)
> +{
> +   kfree(fw_cfg_vmcore_data);
> +}
> +
>  static ssize_t write_vmcoreinfo(struct device *dev, const struct fw_cfg_file 
> *f)
>  {
> -   struct vmci {
> -   __le16 host_format;
> -   __le16 guest_format;
> -   __le32 size;
> -   __le64 paddr;
> -   } __packed;
> static struct vmci *data;
> ssize_t ret;
>
> -   data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
> -   if (!data)
> -   return -ENOMEM;
> -
> -   *data = (struct vmci) {
> +   *fw_cfg_vmcore_data = (struct vmci) {
> .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
> .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
> .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
> @@ -386,11 +397,23 @@ static ssize_t write_vmcoreinfo(struct device *dev, 
> const struct fw_cfg_file *f)
> /* spare ourself reading host format support for now since we
>  * don't know what else to format - host may ignore ours
>  */
> -   ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
> +   ret = fw_cfg_write_blob(dev, f->select, fw_cfg_vmcore_data, 0,
> +   sizeof(*fw_cfg_vmcore_data));
>
> -   kfree(data);
> return ret;
>  }
> +
> +#else
> +
> +static int fw_cfg_vmcore_init(void)
> +{
> +   return 0;
> +}
> +
> +static int fw_cfg_vmcore_cleanup(void)
> +{
> +}
> +
>  #endif /* CONFIG_CRASH_CORE */
>
>  /* get fw_cfg_sysfs_entry from kobject member */
> @@ -725,11 +748,15 @@ static int fw_cfg_sysfs_probe(struct platform_device 
> *pdev)
> if (fw_cfg_sel_ko)
> return -EBUSY;
>
> +   err = fw_cfg_vmcore_init();
> +   if (err)
> +   goto err_sel;
> +
> /* create by_key and by_name subdirs of /sys/firmware/qemu_fw_cfg/ */
> err = -ENOMEM;
> fw_cfg_sel_ko = kobject_create_and_add("by_key", fw_cfg_top_ko);
> if (!fw_cfg_sel_ko)
> -   goto err_sel;
> +   goto err_vmcore;
> fw_cfg_fname_kset = kset_create_and_add("by_name", NULL, 
> fw_cfg_top_ko);
> if (!fw_cfg_fname_kset)
> goto err_name;
> @@ -764,6 +791,8 @@ static int fw_cfg_sysfs_probe(struct platform_device 
> *pdev)
> fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
>  err_name:
> fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
> +err_vmcore:
> +   fw_cfg_vmcore_cleanup();
>  err_sel:
> return err;
>  }
> @@ -776,6 +805,7 @@ static int fw_cfg_sysfs_remove(struct platform_device 
> *pdev)
> fw_cfg_io_cleanup();
> 

Re: [PATCH v11 4/4] fw_cfg: write vmcoreinfo details

2018-02-02 Thread Marc-Andre Lureau
Hi

On Fri, Feb 2, 2018 at 3:44 AM, Michael S. Tsirkin  wrote:
> On Thu, Feb 01, 2018 at 02:03:00PM +0100, Marc-André Lureau wrote:
>> @@ -314,6 +359,37 @@ struct fw_cfg_sysfs_entry {
>>   struct device *dev;
>>  };
>>
>> +#ifdef CONFIG_CRASH_CORE
>> +static ssize_t write_vmcoreinfo(struct device *dev, const struct 
>> fw_cfg_file *f)
>> +{
>> + struct vmci {
>> + __le16 host_format;
>> + __le16 guest_format;
>> + __le32 size;
>> + __le64 paddr;
>> + } __packed;
>> + static struct vmci *data;
>> + ssize_t ret;
>> +
>> + data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
>> + if (!data)
>> + return -ENOMEM;
>> +
>> + *data = (struct vmci) {
>> + .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
>> + .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
>> + .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
>> + };
>> + /* spare ourself reading host format support for now since we
>> +  * don't know what else to format - host may ignore ours
>> +  */
>> + ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
>> +
>> + kfree(data);
>> + return ret;
>> +}
>> +#endif /* CONFIG_CRASH_CORE */
>> +
>>  /* get fw_cfg_sysfs_entry from kobject member */
>>  static inline struct fw_cfg_sysfs_entry *to_entry(struct kobject *kobj)
>>  {
>
> kmalloc during crash is I think a bad idea.
> How about preallocating on probe?

It doesn't kmalloc during crash, it kmalloc during boot. As such, I
don't see a reason to change it.

>
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 33e0256..ffd81d1 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -363,22 +363,33 @@ struct fw_cfg_sysfs_entry {
>  };
>
>  #ifdef CONFIG_CRASH_CORE
> +
> +struct vmci {
> +   __le16 host_format;
> +   __le16 guest_format;
> +   __le32 size;
> +   __le64 paddr;
> +} *fw_cfg_vmcore_data;
> +
> +static int fw_cfg_vmcore_init(void)
> +{
> +   fw_cfg_vmcore_data = kmalloc(sizeof(*fw_cfg_vmcore_data), GFP_KERNEL);
> +   if (!fw_cfg_vmcore_data)
> +   return -ENOMEM;
> +   return 0;
> +}
> +
> +static int fw_cfg_vmcore_cleanup(void)
> +{
> +   kfree(fw_cfg_vmcore_data);
> +}
> +
>  static ssize_t write_vmcoreinfo(struct device *dev, const struct fw_cfg_file 
> *f)
>  {
> -   struct vmci {
> -   __le16 host_format;
> -   __le16 guest_format;
> -   __le32 size;
> -   __le64 paddr;
> -   } __packed;
> static struct vmci *data;
> ssize_t ret;
>
> -   data = kmalloc(sizeof(struct vmci), GFP_KERNEL);
> -   if (!data)
> -   return -ENOMEM;
> -
> -   *data = (struct vmci) {
> +   *fw_cfg_vmcore_data = (struct vmci) {
> .guest_format = cpu_to_le16(VMCOREINFO_FORMAT_ELF),
> .size = cpu_to_le32(VMCOREINFO_NOTE_SIZE),
> .paddr = cpu_to_le64(paddr_vmcoreinfo_note())
> @@ -386,11 +397,23 @@ static ssize_t write_vmcoreinfo(struct device *dev, 
> const struct fw_cfg_file *f)
> /* spare ourself reading host format support for now since we
>  * don't know what else to format - host may ignore ours
>  */
> -   ret = fw_cfg_write_blob(dev, f->select, data, 0, sizeof(struct vmci));
> +   ret = fw_cfg_write_blob(dev, f->select, fw_cfg_vmcore_data, 0,
> +   sizeof(*fw_cfg_vmcore_data));
>
> -   kfree(data);
> return ret;
>  }
> +
> +#else
> +
> +static int fw_cfg_vmcore_init(void)
> +{
> +   return 0;
> +}
> +
> +static int fw_cfg_vmcore_cleanup(void)
> +{
> +}
> +
>  #endif /* CONFIG_CRASH_CORE */
>
>  /* get fw_cfg_sysfs_entry from kobject member */
> @@ -725,11 +748,15 @@ static int fw_cfg_sysfs_probe(struct platform_device 
> *pdev)
> if (fw_cfg_sel_ko)
> return -EBUSY;
>
> +   err = fw_cfg_vmcore_init();
> +   if (err)
> +   goto err_sel;
> +
> /* create by_key and by_name subdirs of /sys/firmware/qemu_fw_cfg/ */
> err = -ENOMEM;
> fw_cfg_sel_ko = kobject_create_and_add("by_key", fw_cfg_top_ko);
> if (!fw_cfg_sel_ko)
> -   goto err_sel;
> +   goto err_vmcore;
> fw_cfg_fname_kset = kset_create_and_add("by_name", NULL, 
> fw_cfg_top_ko);
> if (!fw_cfg_fname_kset)
> goto err_name;
> @@ -764,6 +791,8 @@ static int fw_cfg_sysfs_probe(struct platform_device 
> *pdev)
> fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
>  err_name:
> fw_cfg_kobj_cleanup(fw_cfg_sel_ko);
> +err_vmcore:
> +   fw_cfg_vmcore_cleanup();
>  err_sel:
> return err;
>  }
> @@ -776,6 +805,7 @@ static int fw_cfg_sysfs_remove(struct platform_device 
> *pdev)
> fw_cfg_io_cleanup();
> fw_cfg_kset_unregister_recursive(fw_cfg_fname_kset);
>   

Re: [PATCH v10 2/4] fw_cfg: do DMA read operation

2018-01-24 Thread Marc-Andre Lureau
Hi

On Wed, Jan 24, 2018 at 4:25 AM, Peter Xu  wrote:
> On Tue, Jan 23, 2018 at 05:40:39PM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> The DMA operation is expected to run synchronously with today qemu,
>> but the specification states that it may become async, so we run
>> "control" field check in a loop for eventual changes.
>>
>> We may want to switch all the *buf addresses to use only kmalloc'ed
>> buffers (instead of using stack/image addresses with dma=false).
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 131 
>> ++---
>>  1 file changed, 111 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 740df0df2260..686f0e839858 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -33,6 +33,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>> @@ -43,12 +44,22 @@ MODULE_LICENSE("GPL");
>>  #define FW_CFG_ID 0x01
>>  #define FW_CFG_FILE_DIR   0x19
>>
>> +#define FW_CFG_VERSION_DMA 0x02
>> +#define FW_CFG_DMA_CTL_ERROR   0x01
>> +#define FW_CFG_DMA_CTL_READ0x02
>> +#define FW_CFG_DMA_CTL_SKIP0x04
>> +#define FW_CFG_DMA_CTL_SELECT  0x08
>> +#define FW_CFG_DMA_CTL_WRITE   0x10
>> +
>>  /* size in bytes of fw_cfg signature */
>>  #define FW_CFG_SIG_SIZE 4
>>
>>  /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>>  #define FW_CFG_MAX_FILE_PATH 56
>>
>> +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
>> +static u32 fw_cfg_rev;
>> +
>>  /* fw_cfg file directory entry type */
>>  struct fw_cfg_file {
>>   u32 size;
>> @@ -57,6 +68,12 @@ struct fw_cfg_file {
>>   char name[FW_CFG_MAX_FILE_PATH];
>>  };
>>
>> +struct fw_cfg_dma {
>> + u32 control;
>> + u32 length;
>> + u64 address;
>> +} __packed;
>> +
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -75,12 +92,68 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
>>   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
>>  }
>>
>> +static inline bool fw_cfg_dma_enabled(void)
>> +{
>> + return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
>> +}
>> +
>> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> +{
>> + do {
>> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> +
>> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> + return;
>> +
>> + usleep_range(50, 100);
>> + } while (true);
>> +}
>> +
>> +static ssize_t fw_cfg_dma_transfer(struct device *dev,
>> + void *address, u32 length, u32 control)
>> +{
>> + phys_addr_t dma;
>> + struct fw_cfg_dma *d = NULL;
>> + ssize_t ret = length;
>> +
>> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> + if (!d) {
>> + ret = -ENOMEM;
>> + goto end;
>> + }
>> +
>> + *d = (struct fw_cfg_dma) {
>> + .address = cpu_to_be64(virt_to_phys(address)),
>> + .length = cpu_to_be32(length),
>> + .control = cpu_to_be32(control)
>> + };
>> +
>> + dma = virt_to_phys(d);
>> +
>> + iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> + iowrite32be(dma, fw_cfg_reg_dma + 4);
>
> We can do it with iowrite64be(virt_to_phys(d)) too?  In all cases I
> think it's good enough and no worth for a repost.

That would not build on 32 bit, but we could have a #ifdef
CONFIG_64BIT (untested).

>
> For the DMA transfer part:
>
> Acked-by: Peter Xu 

thanks


Re: [PATCH v10 2/4] fw_cfg: do DMA read operation

2018-01-24 Thread Marc-Andre Lureau
Hi

On Wed, Jan 24, 2018 at 4:25 AM, Peter Xu  wrote:
> On Tue, Jan 23, 2018 at 05:40:39PM +0100, Marc-André Lureau wrote:
>> Modify fw_cfg_read_blob() to use DMA if the device supports it.
>> Return errors, because the operation may fail.
>>
>> The DMA operation is expected to run synchronously with today qemu,
>> but the specification states that it may become async, so we run
>> "control" field check in a loop for eventual changes.
>>
>> We may want to switch all the *buf addresses to use only kmalloc'ed
>> buffers (instead of using stack/image addresses with dma=false).
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>  drivers/firmware/qemu_fw_cfg.c | 131 
>> ++---
>>  1 file changed, 111 insertions(+), 20 deletions(-)
>>
>> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
>> index 740df0df2260..686f0e839858 100644
>> --- a/drivers/firmware/qemu_fw_cfg.c
>> +++ b/drivers/firmware/qemu_fw_cfg.c
>> @@ -33,6 +33,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  MODULE_AUTHOR("Gabriel L. Somlo ");
>>  MODULE_DESCRIPTION("QEMU fw_cfg sysfs support");
>> @@ -43,12 +44,22 @@ MODULE_LICENSE("GPL");
>>  #define FW_CFG_ID 0x01
>>  #define FW_CFG_FILE_DIR   0x19
>>
>> +#define FW_CFG_VERSION_DMA 0x02
>> +#define FW_CFG_DMA_CTL_ERROR   0x01
>> +#define FW_CFG_DMA_CTL_READ0x02
>> +#define FW_CFG_DMA_CTL_SKIP0x04
>> +#define FW_CFG_DMA_CTL_SELECT  0x08
>> +#define FW_CFG_DMA_CTL_WRITE   0x10
>> +
>>  /* size in bytes of fw_cfg signature */
>>  #define FW_CFG_SIG_SIZE 4
>>
>>  /* fw_cfg "file name" is up to 56 characters (including terminating nul) */
>>  #define FW_CFG_MAX_FILE_PATH 56
>>
>> +/* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */
>> +static u32 fw_cfg_rev;
>> +
>>  /* fw_cfg file directory entry type */
>>  struct fw_cfg_file {
>>   u32 size;
>> @@ -57,6 +68,12 @@ struct fw_cfg_file {
>>   char name[FW_CFG_MAX_FILE_PATH];
>>  };
>>
>> +struct fw_cfg_dma {
>> + u32 control;
>> + u32 length;
>> + u64 address;
>> +} __packed;
>> +
>>  /* fw_cfg device i/o register addresses */
>>  static bool fw_cfg_is_mmio;
>>  static phys_addr_t fw_cfg_p_base;
>> @@ -75,12 +92,68 @@ static inline u16 fw_cfg_sel_endianness(u16 key)
>>   return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
>>  }
>>
>> +static inline bool fw_cfg_dma_enabled(void)
>> +{
>> + return fw_cfg_rev & FW_CFG_VERSION_DMA && fw_cfg_reg_dma;
>> +}
>> +
>> +/* qemu fw_cfg device is sync today, but spec says it may become async */
>> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>> +{
>> + do {
>> + u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
>> +
>> + if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
>> + return;
>> +
>> + usleep_range(50, 100);
>> + } while (true);
>> +}
>> +
>> +static ssize_t fw_cfg_dma_transfer(struct device *dev,
>> + void *address, u32 length, u32 control)
>> +{
>> + phys_addr_t dma;
>> + struct fw_cfg_dma *d = NULL;
>> + ssize_t ret = length;
>> +
>> + d = kmalloc(sizeof(*d), GFP_KERNEL);
>> + if (!d) {
>> + ret = -ENOMEM;
>> + goto end;
>> + }
>> +
>> + *d = (struct fw_cfg_dma) {
>> + .address = cpu_to_be64(virt_to_phys(address)),
>> + .length = cpu_to_be32(length),
>> + .control = cpu_to_be32(control)
>> + };
>> +
>> + dma = virt_to_phys(d);
>> +
>> + iowrite32be((u64)dma >> 32, fw_cfg_reg_dma);
>> + iowrite32be(dma, fw_cfg_reg_dma + 4);
>
> We can do it with iowrite64be(virt_to_phys(d)) too?  In all cases I
> think it's good enough and no worth for a repost.

That would not build on 32 bit, but we could have a #ifdef
CONFIG_64BIT (untested).

>
> For the DMA transfer part:
>
> Acked-by: Peter Xu 

thanks


Re: [PATCH] fw_cfg: don't use DMA mapping for fw_cfg device

2018-01-15 Thread Marc-Andre Lureau
Hi

On Mon, Jan 15, 2018 at 9:55 AM, Peter Xu  wrote:
> fw_cfg device does not need IOMMU protection, so use physical addresses
> always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
> traces during boot when vIOMMU is enabled in guest:
>
> [1.018306] [ cut here ]
> [1.018314] WARNING: CPU: 1 PID: 1 at drivers/firmware/qemu_fw_cfg.c:152 
> fw_cfg_dma_transfer+0x399/0x500
> [1.018315] fw_cfg_dma_transfer: failed to map fw_cfg_dma
> [1.018316] Modules linked in:
> [1.018320] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
> 3.10.0-827.el7.x86_64 #1
> [1.018321] Hardware name: Red Hat KVM, BIOS 1.11.0-1.el7 04/01/2014
> [1.018322] Call Trace:
> [1.018330]  [] dump_stack+0x19/0x1b
> [1.018334]  [] __warn+0xd8/0x100
> [1.018336]  [] warn_slowpath_fmt+0x5f/0x80
> [1.018338]  [] fw_cfg_dma_transfer+0x399/0x500
> [1.018340]  [] fw_cfg_read_blob+0xac/0x1c0
> [1.018342]  [] fw_cfg_register_dir_entries+0x80/0x450
> [1.018344]  [] fw_cfg_sysfs_probe+0x212/0x3f0
> [1.018347]  [] platform_drv_probe+0x42/0x110
> [1.018350]  [] driver_probe_device+0xc2/0x3e0
> [1.018352]  [] __driver_attach+0x93/0xa0
> [1.018354]  [] ? __device_attach+0x40/0x40
> [1.018359]  [] bus_for_each_dev+0x73/0xc0
> [1.018362]  [] driver_attach+0x1e/0x20
> [1.018364]  [] bus_add_driver+0x200/0x2d0
> [1.018366]  [] ? firmware_map_add_early+0x58/0x58
> [1.018368]  [] driver_register+0x64/0xf0
> [1.018370]  [] __platform_driver_register+0x4a/0x50
> [1.018372]  [] fw_cfg_sysfs_init+0x34/0x61
> [1.018376]  [] do_one_initcall+0xb8/0x230
> [1.018379]  [] kernel_init_freeable+0x17a/0x219
> [1.018381]  [] ? initcall_blacklist+0xb0/0xb0
> [1.018383]  [] ? rest_init+0x80/0x80
> [1.018385]  [] kernel_init+0xe/0xf0
> [1.018388]  [] ret_from_fork+0x58/0x90
> [1.018390]  [] ? rest_init+0x80/0x80
> [1.018392] ---[ end trace d00a5b71608a8f59 ]---
>
> Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1533367
> Fixes: e90cb816599b ("fw_cfg: do DMA read operation", 2017-11-28)
> CC: Marc-André Lureau 
> CC: Michael S. Tsirkin 
> Signed-off-by: Peter Xu 
> --
>
> This is based on tree:
>   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git/log/?h=vhost
>
> Please review, thanks.
>
> Signed-off-by: Peter Xu 

The DMA business is confusing, sadly I didn't get much clue what I was
supposed to do. What I can say:

Tested-by: Marc-André Lureau 

Should the series be removed from Michael tree and I squash your fix &
send a v10?

Fwiw, "fw_cfg: write vmcoreinfo details" should also be fixed to
allocate memory (unless your approach fixes that?)

thanks

> ---
>  drivers/firmware/qemu_fw_cfg.c | 37 -
>  1 file changed, 8 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 6eb5d8f43c3e..62a44bc81a88 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -106,11 +106,12 @@ static inline bool fw_cfg_dma_enabled(void)
>  }
>
>  /* qemu fw_cfg device is sync today, but spec says it may become async */
> -static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>  {
> do {
> -   dma_sync_single_for_cpu(dev, dma, sizeof(*d), 
> DMA_FROM_DEVICE);
> -   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
> +   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
> +
> +   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
> return;
>
> usleep_range(50, 100);
> @@ -119,21 +120,9 @@ static void fw_cfg_wait_for_control(struct fw_cfg_dma 
> *d, dma_addr_t dma)
>
>  static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>  {
> -   dma_addr_t dma_addr = 0;
> struct fw_cfg_dma *d = NULL;
> -   dma_addr_t dma;
> ssize_t ret = length;
> -   enum dma_data_direction dir =
> -   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
> -   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
> -
> -   if (address && length) {
> -   dma_addr = dma_map_single(dev, address, length, dir);
> -   if (dma_mapping_error(NULL, dma_addr)) {
> -   WARN(1, "%s: failed to map address\n", __func__);
> -   return -EFAULT;
> -   }
> -   }
> +   phys_addr_t dma;
>
> d = kmalloc(sizeof(*d), GFP_KERNEL);
> if (!d) {
> @@ -142,34 +131,24 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
> length, u32 control)
> }
>
> *d = (struct fw_cfg_dma) {
> -   .address = cpu_to_be64(dma_addr),
> +   

Re: [PATCH] fw_cfg: don't use DMA mapping for fw_cfg device

2018-01-15 Thread Marc-Andre Lureau
Hi

On Mon, Jan 15, 2018 at 9:55 AM, Peter Xu  wrote:
> fw_cfg device does not need IOMMU protection, so use physical addresses
> always.  That's how QEMU implements fw_cfg.  Otherwise we'll see call
> traces during boot when vIOMMU is enabled in guest:
>
> [1.018306] [ cut here ]
> [1.018314] WARNING: CPU: 1 PID: 1 at drivers/firmware/qemu_fw_cfg.c:152 
> fw_cfg_dma_transfer+0x399/0x500
> [1.018315] fw_cfg_dma_transfer: failed to map fw_cfg_dma
> [1.018316] Modules linked in:
> [1.018320] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 
> 3.10.0-827.el7.x86_64 #1
> [1.018321] Hardware name: Red Hat KVM, BIOS 1.11.0-1.el7 04/01/2014
> [1.018322] Call Trace:
> [1.018330]  [] dump_stack+0x19/0x1b
> [1.018334]  [] __warn+0xd8/0x100
> [1.018336]  [] warn_slowpath_fmt+0x5f/0x80
> [1.018338]  [] fw_cfg_dma_transfer+0x399/0x500
> [1.018340]  [] fw_cfg_read_blob+0xac/0x1c0
> [1.018342]  [] fw_cfg_register_dir_entries+0x80/0x450
> [1.018344]  [] fw_cfg_sysfs_probe+0x212/0x3f0
> [1.018347]  [] platform_drv_probe+0x42/0x110
> [1.018350]  [] driver_probe_device+0xc2/0x3e0
> [1.018352]  [] __driver_attach+0x93/0xa0
> [1.018354]  [] ? __device_attach+0x40/0x40
> [1.018359]  [] bus_for_each_dev+0x73/0xc0
> [1.018362]  [] driver_attach+0x1e/0x20
> [1.018364]  [] bus_add_driver+0x200/0x2d0
> [1.018366]  [] ? firmware_map_add_early+0x58/0x58
> [1.018368]  [] driver_register+0x64/0xf0
> [1.018370]  [] __platform_driver_register+0x4a/0x50
> [1.018372]  [] fw_cfg_sysfs_init+0x34/0x61
> [1.018376]  [] do_one_initcall+0xb8/0x230
> [1.018379]  [] kernel_init_freeable+0x17a/0x219
> [1.018381]  [] ? initcall_blacklist+0xb0/0xb0
> [1.018383]  [] ? rest_init+0x80/0x80
> [1.018385]  [] kernel_init+0xe/0xf0
> [1.018388]  [] ret_from_fork+0x58/0x90
> [1.018390]  [] ? rest_init+0x80/0x80
> [1.018392] ---[ end trace d00a5b71608a8f59 ]---
>
> Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1533367
> Fixes: e90cb816599b ("fw_cfg: do DMA read operation", 2017-11-28)
> CC: Marc-André Lureau 
> CC: Michael S. Tsirkin 
> Signed-off-by: Peter Xu 
> --
>
> This is based on tree:
>   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git/log/?h=vhost
>
> Please review, thanks.
>
> Signed-off-by: Peter Xu 

The DMA business is confusing, sadly I didn't get much clue what I was
supposed to do. What I can say:

Tested-by: Marc-André Lureau 

Should the series be removed from Michael tree and I squash your fix &
send a v10?

Fwiw, "fw_cfg: write vmcoreinfo details" should also be fixed to
allocate memory (unless your approach fixes that?)

thanks

> ---
>  drivers/firmware/qemu_fw_cfg.c | 37 -
>  1 file changed, 8 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c
> index 6eb5d8f43c3e..62a44bc81a88 100644
> --- a/drivers/firmware/qemu_fw_cfg.c
> +++ b/drivers/firmware/qemu_fw_cfg.c
> @@ -106,11 +106,12 @@ static inline bool fw_cfg_dma_enabled(void)
>  }
>
>  /* qemu fw_cfg device is sync today, but spec says it may become async */
> -static void fw_cfg_wait_for_control(struct fw_cfg_dma *d, dma_addr_t dma)
> +static void fw_cfg_wait_for_control(struct fw_cfg_dma *d)
>  {
> do {
> -   dma_sync_single_for_cpu(dev, dma, sizeof(*d), 
> DMA_FROM_DEVICE);
> -   if ((be32_to_cpu(d->control) & ~FW_CFG_DMA_CTL_ERROR) == 0)
> +   u32 ctrl = be32_to_cpu(READ_ONCE(d->control));
> +
> +   if ((ctrl & ~FW_CFG_DMA_CTL_ERROR) == 0)
> return;
>
> usleep_range(50, 100);
> @@ -119,21 +120,9 @@ static void fw_cfg_wait_for_control(struct fw_cfg_dma 
> *d, dma_addr_t dma)
>
>  static ssize_t fw_cfg_dma_transfer(void *address, u32 length, u32 control)
>  {
> -   dma_addr_t dma_addr = 0;
> struct fw_cfg_dma *d = NULL;
> -   dma_addr_t dma;
> ssize_t ret = length;
> -   enum dma_data_direction dir =
> -   (control & FW_CFG_DMA_CTL_READ ? DMA_FROM_DEVICE : 0) |
> -   (control & FW_CFG_DMA_CTL_WRITE ? DMA_TO_DEVICE : 0);
> -
> -   if (address && length) {
> -   dma_addr = dma_map_single(dev, address, length, dir);
> -   if (dma_mapping_error(NULL, dma_addr)) {
> -   WARN(1, "%s: failed to map address\n", __func__);
> -   return -EFAULT;
> -   }
> -   }
> +   phys_addr_t dma;
>
> d = kmalloc(sizeof(*d), GFP_KERNEL);
> if (!d) {
> @@ -142,34 +131,24 @@ static ssize_t fw_cfg_dma_transfer(void *address, u32 
> length, u32 control)
> }
>
> *d = (struct fw_cfg_dma) {
> -   .address = cpu_to_be64(dma_addr),
> +   .address = cpu_to_be64(virt_to_phys(address)),
> .length = cpu_to_be32(length),
> .control =