date:20210414

Re: [PATCH v2] iommu/vt-d: Force to flush iotlb before creating superpage

2021-04-14 Thread Lu Baolu


Hi Longpeng,

On 4/15/21 8:46 AM, Longpeng(Mike) wrote:

The translation caches may preserve obsolete data when the
mapping size is changed, suppose the following sequence which
can reveal the problem with high probability.

1.mmap(4GB,MAP_HUGETLB)
2.
   while (1) {
(a)DMA MAP   0,0xa
(b)DMA UNMAP 0,0xa
(c)DMA MAP   0,0xc000
  * DMA read IOVA 0 may failure here (Not present)
  * if the problem occurs.
(d)DMA UNMAP 0,0xc000
   }

The page table(only focus on IOVA 0) after (a) is:
  PML4: 0x19db5c1003   entry:0x899bdcd2f000
   PDPE: 0x1a1cacb003  entry:0x89b35b5c1000
PDE: 0x1a30a72003  entry:0x89b39cacb000
 PTE: 0x21d200803  entry:0x89b3b0a72000

The page table after (b) is:
  PML4: 0x19db5c1003   entry:0x899bdcd2f000
   PDPE: 0x1a1cacb003  entry:0x89b35b5c1000
PDE: 0x1a30a72003  entry:0x89b39cacb000
 PTE: 0x0  entry:0x89b3b0a72000

The page table after (c) is:
  PML4: 0x19db5c1003   entry:0x899bdcd2f000
   PDPE: 0x1a1cacb003  entry:0x89b35b5c1000
PDE: 0x21d200883   entry:0x89b39cacb000 (*)

Because the PDE entry after (b) is present, it won't be
flushed even if the iommu driver flush cache when unmap,
so the obsolete data may be preserved in cache, which
would cause the wrong translation at end.

However, we can see the PDE entry is finally switch to
2M-superpage mapping, but it does not transform
to 0x21d200883 directly:

1. PDE: 0x1a30a72003
2. __domain_mapping
  dma_pte_free_pagetable
Set the PDE entry to ZERO
  Set the PDE entry to 0x21d200883

So we must flush the cache after the entry switch to ZERO
to avoid the obsolete info be preserved.

Cc: David Woodhouse 
Cc: Lu Baolu 
Cc: Nadav Amit 
Cc: Alex Williamson 
Cc: Joerg Roedel 
Cc: Kevin Tian 
Cc: Gonglei (Arei) 

Fixes: 6491d4d02893 ("intel-iommu: Free old page tables before creating 
superpage")
Cc:  # v3.0+
Link: 
https://lore.kernel.org/linux-iommu/670baaf8-4ff8-4e84-4be3-030b95ab5...@huawei.com/
Suggested-by: Lu Baolu 
Signed-off-by: Longpeng(Mike) 
---
v1 -> v2:
   - add Joerg
   - reconstruct the solution base on the Baolu's suggestion
---
  drivers/iommu/intel/iommu.c | 52 +
  1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index ee09323..881c9f2 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2289,6 +2289,41 @@ static inline int hardware_largepage_caps(struct 
dmar_domain *domain,
return level;
  }
  
+/*

+ * Ensure that old small page tables are removed to make room for superpage(s).
+ * We're going to add new large pages, so make sure we don't remove their 
parent
+ * tables. The IOTLB/devTLBs should be flushed if any PDE/PTEs are cleared.
+ */
+static void switch_to_super_page(struct dmar_domain *domain,
+unsigned long start_pfn,
+unsigned long end_pfn, int level)
+{
+   unsigned long lvl_pages = lvl_to_nr_pages(level);
+   struct dma_pte *pte = NULL;
+   int i;
+
+   while (start_pfn <= end_pfn) {
+   if (!pte)
+   pte = pfn_to_dma_pte(domain, start_pfn, );
+
+   if (dma_pte_present(pte)) {
+   dma_pte_free_pagetable(domain, start_pfn,
+  start_pfn + lvl_pages - 1,
+  level + 1);
+
+   for_each_domain_iommu(i, domain)
+   iommu_flush_iotlb_psi(g_iommus[i], domain,
+ start_pfn, lvl_pages,
+ 0, 0);
+   }
+
+   pte++;
+   start_pfn += lvl_pages;
+   if (first_pte_in_page(pte))
+   pte = NULL;
+   }
+}
+
  static int
  __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 unsigned long phys_pfn, unsigned long nr_pages, int prot)
@@ -2329,22 +2364,11 @@ static inline int hardware_largepage_caps(struct 
dmar_domain *domain,
return -ENOMEM;
/* It is large page*/
if (largepage_lvl > 1) {
-   unsigned long nr_superpages, end_pfn;
+   unsigned long end_pfn;
  
  pteval |= DMA_PTE_LARGE_PAGE;

-   lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
-   nr_superpages = nr_pages / lvl_pages;
-   end_pfn = iov_pfn + nr_superpages * lvl_pages - 
1;
-
-   /*
-* Ensure that old small page tables are
-* removed to make room for superpage(s).
-

RE: [PATCH 1/2] fpga: mgr: Adds secure BitStream loading support

2021-04-14 Thread Nava kishore Manne

Hi Moritz,

Thanks for providing the review comments.
Please find my response inline.

> -Original Message-
> From: Moritz Fischer 
> Sent: Thursday, March 4, 2021 4:42 AM
> To: Moritz Fischer 
> Cc: Nava kishore Manne ; t...@redhat.com;
> robh...@kernel.org; Michal Simek ; linux-
> f...@vger.kernel.org; devicet...@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; linux-kernel@vger.kernel.org; git
> ; chinnikishore...@gmail.com
> Subject: Re: [PATCH 1/2] fpga: mgr: Adds secure BitStream loading support
> 
> Hi Nava,
> 
> On Thu, Jan 21, 2021 at 09:17:10PM -0800, Moritz Fischer wrote:
> > On Mon, Jan 18, 2021 at 08:20:57AM +0530, Nava kishore Manne wrote:
> > > This commit adds secure flags to the framework to support secure
> > > BitStream Loading.
> > >
> > > Signed-off-by: Nava kishore Manne 
> > > ---
> > >  drivers/fpga/of-fpga-region.c | 10 ++
> > > include/linux/fpga/fpga-mgr.h | 12 
> > >  2 files changed, 22 insertions(+)
> > >
> > > diff --git a/drivers/fpga/of-fpga-region.c
> > > b/drivers/fpga/of-fpga-region.c index e405309baadc..3a5eb480
> > > 100644
> > > --- a/drivers/fpga/of-fpga-region.c
> > > +++ b/drivers/fpga/of-fpga-region.c
> > > @@ -228,6 +228,16 @@ static struct fpga_image_info
> *of_fpga_region_parse_ov(
> > >   if (of_property_read_bool(overlay, "encrypted-fpga-config"))
> > >   info->flags |= FPGA_MGR_ENCRYPTED_BITSTREAM;
> > >
> > > + if (of_property_read_bool(overlay, "userkey-encrypted-fpga-
> config"))
> > > + info->flags |=
> FPGA_MGR_USERKEY_ENCRYPTED_BITSTREAM;
> >
> > Can this just be encrypted-fpga-config/FPGA_MGR_ENCRYPTED?
> > > +
> > > + if (of_property_read_bool(overlay, "ddrmem-authenticated-fpga-
> config"))
> > > + info->flags |= FPGA_MGR_DDR_MEM_AUTH_BITSTREAM;
> > > +
> > > + if (of_property_read_bool(overlay,
> > > +   "securemem-authenticated-fpga-config"))
> > > + info->flags |= FPGA_MGR_SECURE_MEM_AUTH_BITSTREAM;
> Shouldn't all these get binding docs? I remember Richard adding
> authentication support for an Intel platform, too and I'd like to avoid adding
> random bindings per vendor.
> 
> Would it be possible to have an 'authentication method / type' that is more
> extensible with different methods maybe?
> 
Yes, I agree this flags should not be a vendor specific we should make it as 
generic.
can we have flags something like below .
1) authenticated-fpga-config --> By default it uses kernel memory(DDR) to 
authenticate the image(FPGA_MGR_MEM_AUTH_BITSTREAM)
2) authenticated-onchip-fpga-config --> It uses the On chip memory to 
authenticate the image ( FPGA_MGR_AUTH_ON_CHIP_BITSTREAM)

if you have any other thoughts let me know will try to adopt the same...


Regards,
Navakishore.

Re: [PATCH v2 0/2] Fix binfmt_flat loader for RISC-V

2021-04-14 Thread Christoph Hellwig

binfmt_flat tends to go through Greg's uclinux tree, adding him and
the list.

On Wed, Apr 14, 2021 at 10:46:36PM -0700, Palmer Dabbelt wrote:
> On Wed, 14 Apr 2021 17:32:10 PDT (-0700), Damien Le Moal wrote:
>>> On 2021/04/08 0:49, Damien Le Moal wrote:
>>> RISC-V NOMMU flat binaries cannot tolerate a gap between the text and
>>> data section as the toolchain fully resolves at compile time the PC
>>> relative global pointer (__global_pointer$ value loaded in gp register).
>>> Without a relocation entry provided, the flat bin loader cannot fix the
>>> value if a gap is introduced and executables fail to run.
>>>
>>> This series fixes this problem by allowing an architecture to request
>>> the flat loader to suppress the gap between the text and data sections.
>>> The first patch fixes binfmt_flat flat_load_file() using the new
>>> configuration option CONFIG_BINFMT_FLAT_NO_TEXT_DATA_GAP. The second
>>> patch enables this option for RISCV NOMMU builds.
>>>
>>> These patches do not change the binfmt_flat loader behavior for other
>>> architectures.
>>>
>>> Changes from v1:
>>> * Replace FLAT_TEXT_DATA_NO_GAP macro with
>>>   CONFIG_BINFMT_FLAT_NO_TEXT_DATA_GAP config option (patch 1).
>>> * Remove the addition of riscv/include/asm/flat.h and set
>>>   CONFIG_BINFMT_FLAT_NO_TEXT_DATA_GAP for RISCV and !MMU
>>>
>>> Damien Le Moal (2):
>>>   binfmt_flat: allow not offsetting data start
>>>   riscv: Disable text-data gap in flat binaries
>>>
>>>  arch/riscv/Kconfig |  1 +
>>>  fs/Kconfig.binfmt  |  3 +++
>>>  fs/binfmt_flat.c   | 21 +++--
>>>  3 files changed, 19 insertions(+), 6 deletions(-)
>>>
>>
>> Ping ?
>>
>> Any comment on these patches ?
>>
>> Without them, RISC-V NOMMU user space does not run... I would really like to 
>> get
>> these in this cycle if possible.
>
> This LGTM, but it's pretty far out of my area of expertise.  I'm happy to 
> take them via my tree, but I'd prefer to get an Ack from someone.
>
> Al, get_maintainer suggests you?
>
> Acked-by: Palmer Dabbelt 
---end quoted text---

Re: [PATCH 2/3] vDPA/ifcvf: enable Intel C5000X-PL virtio-block for vDPA

2021-04-14 Thread Zhu Lingshan





On 4/15/2021 11:34 AM, Jason Wang wrote:


在 2021/4/14 下午5:18, Zhu Lingshan 写道:

This commit enabled Intel FPGA SmartNIC C5000X-PL virtio-block
for vDPA.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_base.h | 17 -
  drivers/vdpa/ifcvf/ifcvf_main.c | 10 +-
  2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h 
b/drivers/vdpa/ifcvf/ifcvf_base.h

index 1c04cd256fa7..8b403522bf06 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -15,6 +15,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  @@ -28,7 +29,12 @@
  #define C5000X_PL_SUBSYS_VENDOR_ID    0x8086
  #define C5000X_PL_SUBSYS_DEVICE_ID    0x0001
  -#define IFCVF_SUPPORTED_FEATURES \
+#define C5000X_PL_BLK_VENDOR_ID    0x1AF4
+#define C5000X_PL_BLK_DEVICE_ID    0x1001
+#define C5000X_PL_BLK_SUBSYS_VENDOR_ID    0x8086
+#define C5000X_PL_BLK_SUBSYS_DEVICE_ID    0x0002
+
+#define IFCVF_NET_SUPPORTED_FEATURES \
  ((1ULL << VIRTIO_NET_F_MAC)    | \
   (1ULL << VIRTIO_F_ANY_LAYOUT)    | \
   (1ULL << VIRTIO_F_VERSION_1)    | \
@@ -37,6 +43,15 @@
   (1ULL << VIRTIO_F_ACCESS_PLATFORM)    | \
   (1ULL << VIRTIO_NET_F_MRG_RXBUF))
  +#define IFCVF_BLK_SUPPORTED_FEATURES \
+    ((1ULL << VIRTIO_BLK_F_SIZE_MAX)    | \
+ (1ULL << VIRTIO_BLK_F_SEG_MAX)    | \
+ (1ULL << VIRTIO_BLK_F_BLK_SIZE)    | \
+ (1ULL << VIRTIO_BLK_F_TOPOLOGY)    | \
+ (1ULL << VIRTIO_BLK_F_MQ)    | \
+ (1ULL << VIRTIO_F_VERSION_1)    | \
+ (1ULL << VIRTIO_F_ACCESS_PLATFORM))



I think we've discussed this sometime in the past but what's the 
reason for such whitelist consider there's already a get_features() 
implemention?


E.g Any reason to block VIRTIO_BLK_F_WRITE_ZEROS or VIRTIO_F_RING_PACKED?

Thanks
The reason is some feature bits are supported in the device but not 
supported by the driver, e.g, for virtio-net, mq & cq implementation is 
not ready in the driver.


Thanks!





+
  /* Only one queue pair for now. */
  #define IFCVF_MAX_QUEUE_PAIRS    1
  diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c 
b/drivers/vdpa/ifcvf/ifcvf_main.c

index 99b0a6b4c227..9b6a38b798fa 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -171,7 +171,11 @@ static u64 ifcvf_vdpa_get_features(struct 
vdpa_device *vdpa_dev)

  struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
  u64 features;
  -    features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;
+    if (vf->dev_type == VIRTIO_ID_NET)
+    features = ifcvf_get_features(vf) & 
IFCVF_NET_SUPPORTED_FEATURES;

+
+    if (vf->dev_type == VIRTIO_ID_BLOCK)
+    features = ifcvf_get_features(vf) & 
IFCVF_BLK_SUPPORTED_FEATURES;

    return features;
  }
@@ -509,6 +513,10 @@ static struct pci_device_id ifcvf_pci_ids[] = {
   C5000X_PL_DEVICE_ID,
   C5000X_PL_SUBSYS_VENDOR_ID,
   C5000X_PL_SUBSYS_DEVICE_ID) },
+    { PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID,
+ C5000X_PL_BLK_DEVICE_ID,
+ C5000X_PL_BLK_SUBSYS_VENDOR_ID,
+ C5000X_PL_BLK_SUBSYS_DEVICE_ID) },
    { 0 },
  };

Re: [PATCH 37/57] staging: rtl8188eu: os_dep: ioctl_linux: Move 2 large data buffers into the heap

2021-04-14 Thread Dan Carpenter

I screwed up my last email and dropped Lee and Arnd from the To: headers.
Resending.

On Thu, Apr 15, 2021 at 08:20:16AM +0300, Dan Carpenter wrote:
> On Wed, Apr 14, 2021 at 07:11:09PM +0100, Lee Jones wrote:
> > ---
> >  drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 12 +++-
> >  1 file changed, 11 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c 
> > b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> > index c95ae4d6a3b6b..cc14f00947781 100644
> > --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> > +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> > @@ -224,7 +224,7 @@ static char *translate_scan(struct adapter *padapter,
> > /* parsing WPA/WPA2 IE */
> > {
> > u8 *buf;
> > -   u8 wpa_ie[255], rsn_ie[255];
> > +   u8 *wpa_ie, *rsn_ie;
> > u16 wpa_len = 0, rsn_len = 0;
> > u8 *p;
> >  
> > @@ -232,6 +232,14 @@ static char *translate_scan(struct adapter *padapter,
> > if (!buf)
> > return start;

Arnd added this return.  I think it should be -ENOMEM, though?

> >  
> > +   wpa_ie = kzalloc(255, GFP_ATOMIC);
> > +   if (!wpa_ie)
> > +   return start;
> 
> kfree(buf);
> 
> > +
> > +   rsn_ie = kzalloc(255, GFP_ATOMIC);
> > +   if (!rsn_ie)
> > +   return start;
> 

regards,
dan carpenter

[PATCH v2 7/7] arm64: dts: mt8192: Add APU power domain node

2021-04-14 Thread Flora Fu

Add APU power domain node to MT8192.

Signed-off-by: Flora Fu 
---
Note:
This patch depends on MT8192 clock[1] and PMIC[2] patches which haven't yet 
been accepted.
[1] 
https://patchwork.kernel.org/project/linux-mediatek/patch/20210324104110.13383-7-chun-jie.c...@mediatek.com/
[2] 
https://patchwork.kernel.org/project/linux-mediatek/patch/1617188527-3392-9-git-send-email-hsin-hsiung.w...@mediatek.com/
---
 arch/arm64/boot/dts/mediatek/mt8192-evb.dts |  7 ++
 arch/arm64/boot/dts/mediatek/mt8192.dtsi| 28 +
 2 files changed, 35 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8192-evb.dts 
b/arch/arm64/boot/dts/mediatek/mt8192-evb.dts
index 1769f3a9b510..688c97c46d44 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192-evb.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8192-evb.dts
@@ -65,3 +65,10 @@
 _vrf12_ldo_reg {
regulator-always-on;
 };
+
+ {
+   vsram-supply = <_vsram_md_ldo_reg>;
+   apu_top: power-domain@0 {
+   domain-supply = <_vproc1_buck_reg>;
+   };
+};
diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
index 561025d2ebab..90436757386e 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
@@ -1033,6 +1033,34 @@
#clock-cells = <1>;
};
 
+   apuspm: power-domain@190f {
+   compatible = "mediatek,mt8192-apu-pm", "syscon";
+   reg = <0 0x190f 0 0x1000>;
+   #address-cells = <1>;
+   #size-cells = <0>;
+   #power-domain-cells = <1>;
+   mediatek,scpsys = <>;
+   mediatek,apu-conn = <_conn>;
+   mediatek,apu-vcore = <_vcore>;
+
+   apu_top: power-domain@0 {
+   reg = <0>;
+   #power-domain-cells = <0>;
+   clocks = < CLK_TOP_DSP_SEL>,
+< CLK_TOP_IPU_IF_SEL>,
+<>,
+< CLK_TOP_UNIVPLL_D6_D2>;
+   clock-names = "clk_top_conn",
+ "clk_top_ipu_if",
+ "clk_off",
+ "clk_on_default";
+   assigned-clocks = < CLK_TOP_DSP_SEL>,
+ < 
CLK_TOP_IPU_IF_SEL>;
+   assigned-clock-parents = < 
CLK_TOP_UNIVPLL_D6_D2>,
+< 
CLK_TOP_UNIVPLL_D6_D2>;
+   };
+   };
+
larb13: larb@1a001000 {
compatible = "mediatek,mt8192-smi-larb";
reg = <0 0x1a001000 0 0x1000>;
-- 
2.18.0

[PATCH v2 5/7] soc: mediatek: apu: Add apusys and add apu power domain driver

2021-04-14 Thread Flora Fu

Add the apusys in soc.
Add driver for apu power domains.

Signed-off-by: Flora Fu 
---
 drivers/soc/mediatek/Kconfig |  10 +
 drivers/soc/mediatek/Makefile|   1 +
 drivers/soc/mediatek/apusys/Makefile |   2 +
 drivers/soc/mediatek/apusys/mtk-apu-pm.c | 612 +++
 4 files changed, 625 insertions(+)
 create mode 100644 drivers/soc/mediatek/apusys/Makefile
 create mode 100644 drivers/soc/mediatek/apusys/mtk-apu-pm.c

diff --git a/drivers/soc/mediatek/Kconfig b/drivers/soc/mediatek/Kconfig
index fdd8bc08569e..76ee7e354b27 100644
--- a/drivers/soc/mediatek/Kconfig
+++ b/drivers/soc/mediatek/Kconfig
@@ -5,6 +5,16 @@
 menu "MediaTek SoC drivers"
depends on ARCH_MEDIATEK || COMPILE_TEST
 
+config MTK_APUSYS
+   bool "MediaTek APUSYS Support"
+   select REGMAP
+   select PM_GENERIC_DOMAINS if PM
+   help
+ Say yes here to add support for the MediaTek AI Processing Unit
+ Subsystem(APUSYS).
+ The APUSYS is a proprietary hardware in SoC to support AI
+ operations.
+
 config MTK_CMDQ
tristate "MediaTek CMDQ Support"
depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/soc/mediatek/Makefile b/drivers/soc/mediatek/Makefile
index c916b6799baa..4ca2d59b75cb 100644
--- a/drivers/soc/mediatek/Makefile
+++ b/drivers/soc/mediatek/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MTK_APUSYS) += apusys/
 obj-$(CONFIG_MTK_CMDQ) += mtk-cmdq-helper.o
 obj-$(CONFIG_MTK_DEVAPC) += mtk-devapc.o
 obj-$(CONFIG_MTK_INFRACFG) += mtk-infracfg.o
diff --git a/drivers/soc/mediatek/apusys/Makefile 
b/drivers/soc/mediatek/apusys/Makefile
new file mode 100644
index ..01c339e35b80
--- /dev/null
+++ b/drivers/soc/mediatek/apusys/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_MTK_APUSYS) += mtk-apu-pm.o
diff --git a/drivers/soc/mediatek/apusys/mtk-apu-pm.c 
b/drivers/soc/mediatek/apusys/mtk-apu-pm.c
new file mode 100644
index ..c21f8b4085fd
--- /dev/null
+++ b/drivers/soc/mediatek/apusys/mtk-apu-pm.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021 MediaTek Inc.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define APU_PD_IPUIF_HW_CG BIT(0)
+#define APU_PD_RPC_AUTO_BUCK   BIT(1)
+#define APU_PD_CAPS(_pd, _x)   ((_pd)->data->caps & (_x))
+
+#define MTK_POLL_DELAY_US   10
+#define MTK_POLL_TIMEOUTUSEC_PER_SEC
+
+/*spm_cross_wake_m01*/
+#define WAKEUP_APU (0x1 << 0)
+
+/* spm_other_pwr_status*/
+#define PWR_STATUS (0x1 << 5)
+
+/* rpc_intf_pwr_rdy */
+#define PWR0_RDY   (0x1 << 0)
+
+/* rpc_top_con*/
+#define SLEEP_REQ  BIT(0)
+#define APU_BUCK_ELS_ENBIT(3)
+
+/*conn_clr, conn1_clr, vcore_clr */
+#define CG_CLR (0x)
+
+/* mt8192 rpc_sw_type */
+#define MT8192_RPC_SW_TYPE0(0x200)
+#define MT8192_RPC_SW_TYPE1(0x210)
+#define MT8192_RPC_SW_TYPE2(0x220)
+#define MT8192_RPC_SW_TYPE3(0x230)
+#define MT8192_RPC_SW_TYPE4(0x240)
+#define MT8192_RPC_SW_TYPE6(0x260)
+#define MT8192_RPC_SW_TYPE7(0x270)
+
+/* rpc_sw_type*/
+static const struct reg_sequence mt8192_rpc_sw_type[] = {
+   { MT8192_RPC_SW_TYPE0, 0xFF },
+   { MT8192_RPC_SW_TYPE2, 0x7 },
+   { MT8192_RPC_SW_TYPE3, 0x7 },
+   { MT8192_RPC_SW_TYPE6, 0x3 },
+};
+
+struct apu_top_domain {
+   u32 spm_ext_buck_iso;
+   u32 spm_ext_buck_iso_mask;
+   u32 spm_cross_wake_m01;
+   u32 spm_other_pwr;
+   u32 conn_clr;
+   u32 conn1_clr;
+   u32 vcore_clr;
+   u32 rpc_top_con;
+   u32 rpc_top_con_init_mask;
+   u32 rpc_top_sel;
+   u32 rpc_top_intf_pwr_rdy;
+   const struct reg_sequence *rpc_sw_type;
+   int rpc_sw_ntype;
+};
+
+static struct apu_top_domain mt8192_top_reg = {
+   .spm_ext_buck_iso = 0x39C,
+   .spm_ext_buck_iso_mask = 0x21,
+   .spm_cross_wake_m01 = 0x670,
+   .spm_other_pwr = 0x178,
+   .conn_clr = 0x008,
+   .vcore_clr = 0x008,
+   .rpc_top_con = 0x000,
+   .rpc_top_con_init_mask = 0x49E,
+   .rpc_top_sel = 0x004,
+   .rpc_top_intf_pwr_rdy = 0x044,
+   .rpc_sw_type = mt8192_rpc_sw_type,
+   .rpc_sw_ntype = ARRAY_SIZE(mt8192_rpc_sw_type),
+};
+
+struct apusys {
+   struct device *dev;
+   struct regmap *scpsys;
+   struct regmap *conn;
+   struct regmap *conn1;
+   struct regmap *vcore;
+   struct regmap *rpc;
+   struct regulator *vsram_supply;
+   const struct apu_pm_data *data;
+   struct genpd_onecell_data pd_data;
+   struct generic_pm_domain *domains[];
+};
+
+struct apu_domain {
+   struct generic_pm_domain genpd;
+   const struct apu_domain_data *data;
+   struct apusys *apusys;
+   struct regulator

[PATCH v2 6/7] arm64: dts: mt8192: Add APU node

2021-04-14 Thread Flora Fu

Add APU node to MT8192.

Signed-off-by: Flora Fu 
---
 arch/arm64/boot/dts/mediatek/mt8192.dtsi | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi 
b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
index eb17274c3719..561025d2ebab 100644
--- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi
@@ -1016,6 +1016,23 @@
#clock-cells = <1>;
};
 
+   apu_mbox: apu_mbox@1900 {
+   compatible = "mediatek,mt8192-apu-mbox", "syscon";
+   reg = <0 0x1900 0 0x1000>;
+   };
+
+   apu_conn: apu_conn@1902 {
+   compatible = "mediatek,mt8192-apu-conn", "syscon";
+   reg = <0 0x1902 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   apu_vcore: apu_vcore@19029000 {
+   compatible = "mediatek,mt8192-apu-vcore", "syscon";
+   reg = <0 0x19029000 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
larb13: larb@1a001000 {
compatible = "mediatek,mt8192-smi-larb";
reg = <0 0x1a001000 0 0x1000>;
-- 
2.18.0

[PATCH v2 4/7] dt-bindings: soc: mediatek: apusys: Add new document for APU power domain

2021-04-14 Thread Flora Fu

Document the bindings for APU power domain on MediaTek SoC.

Signed-off-by: Flora Fu 
---
Note:
This patch depends on MT8192 clock[1] patches which haven't yet been accepted.
[1] 
https://patchwork.kernel.org/project/linux-mediatek/patch/20210324104110.13383-7-chun-jie.c...@mediatek.com/
---
 .../soc/mediatek/mediatek,apu-pm.yaml | 145 ++
 1 file changed, 145 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/soc/mediatek/mediatek,apu-pm.yaml

diff --git 
a/Documentation/devicetree/bindings/soc/mediatek/mediatek,apu-pm.yaml 
b/Documentation/devicetree/bindings/soc/mediatek/mediatek,apu-pm.yaml
new file mode 100644
index ..6ff966920917
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mediatek/mediatek,apu-pm.yaml
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soc/mediatek/mediatek,apu-pm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Mediatek APU Power Domains
+
+maintainers:
+  - Flora Fu 
+
+description: |
+  Mediatek AI Process Unit (APU) include support for power domains which can be
+  powered up/down by software.
+  APU subsys belonging to a power domain should contain a 'power-domains'
+  property that is a phandle for apuspm node representing the domain.
+
+properties:
+  compatible:
+items:
+  - enum:
+  - mediatek,mt8192-apu-pm
+  - const: syscon
+
+  reg:
+description: Address range of the APU power domain controller.
+maxItems: 1
+
+  '#address-cells':
+const: 1
+
+  '#size-cells':
+const: 0
+
+  '#power-domain-cells':
+const: 1
+
+  vsram-supply:
+description: apu sram regulator supply.
+
+  mediatek,scpsys:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: |
+  phandle to the device containing the scpsys register range.
+
+  mediatek,apu-conn:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: |
+  phandle to the device containing the scpsys apu conn register range.
+
+  mediatek,apu-conn1:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: |
+  phandle to the device containing the scpsys apu conn1 register range.
+
+  mediatek,apu-vcore:
+$ref: /schemas/types.yaml#/definitions/phandle
+description: |
+  phandle to the device containing the scpsys apu vcore register range.
+
+patternProperties:
+  "^power-domain@[0-9a-f]+$":
+type: object
+description: |
+  Represents the power domains within the power controller node as
+  documented in Documentation/devicetree/bindings/power/power-domain.yaml.
+
+properties:
+  reg:
+description: |
+  Power domain index. Valid values are defined in:
+  "include/dt-bindings/power/mt8182-apu-power.h"
+maxItems: 1
+
+  '#power-domain-cells':
+description: |
+  Must be 0 for nodes representing a single PM domain and 1 for nodes
+  providing multiple PM.
+
+  clocks:
+description: |
+  List of phandles of clocks list. Specify by order according to
+  power-up sequence.
+
+  clock-names:
+description: |
+  List of names of clocks. Specify by order according to power-up
+  sequence.
+
+  assigned-clocks:
+maxItems: 2
+
+  assigned-clock-parents:
+maxItems: 2
+
+  domain-supply:
+description: domain regulator supply.
+
+required:
+  - reg
+  - '#power-domain-cells'
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - '#power-domain-cells'
+  - vsram-supply
+  - mediatek,scpsys
+
+additionalProperties: false
+
+examples:
+  - |
+#include 
+apuspm: power-domain@190f {
+compatible = "mediatek,mt8192-apu-pm", "syscon";
+reg = <0x190f 0x1000>;
+#address-cells = <1>;
+#size-cells = <0>;
+#power-domain-cells = <1>;
+vsram-supply = <_vsram_md_ldo_reg>;
+mediatek,scpsys = <>;
+mediatek,apu-conn = <_conn>;
+mediatek,apu-vcore = <_vcore>;
+
+apu_top: power-domain@0 {
+reg = <0>;
+#power-domain-cells = <0>;
+clocks = < CLK_TOP_DSP_SEL>,
+ < CLK_TOP_IPU_IF_SEL>,
+ <>,
+ < CLK_TOP_UNIVPLL_D6_D2>;
+clock-names = "clk_top_conn",
+  "clk_top_ipu_if",
+  "clk_off",
+  "clk_on_default";
+assigned-clocks = < CLK_TOP_DSP_SEL>,
+  < CLK_TOP_IPU_IF_SEL>;
+assigned-clock-parents = < CLK_TOP_UNIVPLL_D6_D2>,
+ < CLK_TOP_UNIVPLL_D6_D2>;
+domain-supply = <_vproc1_buck_reg>;
+};
+};
-- 
2.18.0

[PATCH v2 3/7] dt-bindings: arm: mediatek: Add new document bindings for APU

2021-04-14 Thread Flora Fu

Document the apusys bindings.

Signed-off-by: Flora Fu 
---
 .../arm/mediatek/mediatek,apusys.yaml | 56 +++
 1 file changed, 56 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/arm/mediatek/mediatek,apusys.yaml

diff --git 
a/Documentation/devicetree/bindings/arm/mediatek/mediatek,apusys.yaml 
b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apusys.yaml
new file mode 100644
index ..d46290548b34
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,apusys.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/arm/mediatek/mediatek,apusys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek APUSYS Controller
+
+maintainers:
+  - Flora Fu 
+
+description:
+  The Mediatek apusys controller provides functional configurations and clocks
+  to the system.
+
+properties:
+  compatible:
+items:
+  - enum:
+  - mediatek,mt8192-apu-mbox
+  - mediatek,mt8192-apu-conn
+  - mediatek,mt8192-apu-vcore
+  - const: syscon
+
+  reg:
+maxItems: 1
+
+  '#clock-cells':
+const: 1
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+apu_mbox: apu_mbox@1900 {
+compatible = "mediatek,mt8192-apu-mbox", "syscon";
+reg = <0x1900 0x1000>;
+};
+
+  - |
+apu_conn: apu_conn@1902 {
+compatible = "mediatek,mt8192-apu-conn", "syscon";
+reg = <0x1902 0x1000>;
+#clock-cells = <1>;
+};
+
+  - |
+apu_vcore: apu_vcore@19029000 {
+compatible = "mediatek,mt8192-apu-vcore", "syscon";
+reg = <0x19029000 0x1000>;
+#clock-cells = <1>;
+};
-- 
2.18.0

[PATCH v2 2/7] clk: mediatek: mt8192: Add APU clocks support

2021-04-14 Thread Flora Fu

Add APU clocks support on MT8192.

Signed-off-by: Flora Fu 
---
 drivers/clk/mediatek/clk-mt8192.c | 91 +++
 1 file changed, 91 insertions(+)

diff --git a/drivers/clk/mediatek/clk-mt8192.c 
b/drivers/clk/mediatek/clk-mt8192.c
index bf6a2084a348..4eb61f006306 100644
--- a/drivers/clk/mediatek/clk-mt8192.c
+++ b/drivers/clk/mediatek/clk-mt8192.c
@@ -244,6 +244,65 @@ static const char * const ccu_parents[] = {
"univpll_d6_d2"
 };
 
+static const char * const dsp_parents[] = {
+   "clk26m",
+   "univpll_d6_d2",
+   "univpll_d4_d2",
+   "univpll_d5",
+   "univpll_d4",
+   "mmpll_d4",
+   "mainpll_d3",
+   "univpll_d3"
+};
+
+static const char * const dsp1_parents[] = {
+   "clk26m",
+   "npupll_ck",
+   "mainpll_d4_d2",
+   "univpll_d5",
+   "univpll_d4",
+   "mainpll_d3",
+   "univpll_d3",
+   "apupll_ck"
+};
+
+static const char * const dsp1_npupll_parents[] = {
+   "dsp1_sel",
+   "npupll_ck"
+};
+
+static const char * const dsp2_parents[] = {
+   "clk26m",
+   "npupll_ck",
+   "mainpll_d4_d2",
+   "univpll_d5",
+   "univpll_d4",
+   "mainpll_d3",
+   "univpll_d3",
+   "apupll_ck"
+};
+
+static const char * const dsp2_npupll_parents[] = {
+   "dsp2_sel",
+   "npupll_ck"
+};
+
+static const char * const dsp5_parents[] = {
+   "clk26m",
+   "apupll_ck",
+   "univpll_d4_d2",
+   "mainpll_d4",
+   "univpll_d4",
+   "mmpll_d4",
+   "mainpll_d3",
+   "univpll_d3"
+};
+
+static const char * const dsp5_apupll_parents[] = {
+   "dsp5_sel",
+   "apupll_ck"
+};
+
 static const char * const dsp7_parents[] = {
"clk26m",
"mainpll_d4_d2",
@@ -255,6 +314,17 @@ static const char * const dsp7_parents[] = {
"mmpll_d4"
 };
 
+static const char * const ipu_if_parents[] = {
+   "clk26m",
+   "univpll_d6_d2",
+   "mainpll_d4_d2",
+   "univpll_d4_d2",
+   "univpll_d5",
+   "mainpll_d4",
+   "tvdpll_ck",
+   "univpll_d4"
+};
+
 static const char * const mfg_ref_parents[] = {
"clk26m",
"clk26m",
@@ -734,9 +804,26 @@ static const struct mtk_mux top_mtk_muxes[] = {
cam_parents, 0x030, 0x034, 0x038, 16, 4, 23, 0x004, 10),
MUX_GATE_CLR_SET_UPD(CLK_TOP_CCU_SEL, "ccu_sel",
ccu_parents, 0x030, 0x034, 0x038, 24, 4, 31, 0x004, 11),
+   /* CLK_CFG_3 */
+   MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP_SEL, "dsp_sel",
+   dsp_parents, 0x040, 0x044, 0x048, 0, 3, 7, 0x004, 12),
+   MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP1_SEL, "dsp1_sel",
+   dsp1_parents, 0x040, 0x044, 0x048, 8, 3, 15, 0x004, 13),
+   MUX_CLR_SET_UPD(CLK_TOP_DSP1_NPUPLL_SEL, "dsp1_npupll_sel",
+   dsp1_npupll_parents, 0x040, 0x044, 0x048, 11, 1, -1, -1),
+   MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP2_SEL, "dsp2_sel",
+   dsp2_parents, 0x040, 0x044, 0x048, 16, 3, 23, 0x004, 14),
+   MUX_CLR_SET_UPD(CLK_TOP_DSP2_NPUPLL_SEL, "dsp2_npupll_sel",
+   dsp2_npupll_parents, 0x040, 0x044, 0x048, 19, 1, -1, -1),
+   MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP5_SEL, "dsp5_sel",
+   dsp5_parents, 0x040, 0x044, 0x048, 24, 3, 31, 0x004, 15),
+   MUX_CLR_SET_UPD(CLK_TOP_DSP5_APUPLL_SEL, "dsp5_apupll_sel",
+   dsp5_apupll_parents, 0x040, 0x044, 0x048, 27, 1, -1, -1),
/* CLK_CFG_4 */
MUX_GATE_CLR_SET_UPD(CLK_TOP_DSP7_SEL, "dsp7_sel",
dsp7_parents, 0x050, 0x054, 0x058, 0, 3, 7, 0x004, 16),
+   MUX_GATE_CLR_SET_UPD(CLK_TOP_IPU_IF_SEL, "ipu_if_sel",
+   ipu_if_parents, 0x050, 0x054, 0x058, 8, 3, 15, 0x004, 17),
MUX_GATE_CLR_SET_UPD(CLK_TOP_MFG_REF_SEL, "mfg_ref_sel",
mfg_ref_parents, 0x050, 0x054, 0x058, 16, 2, 23, 0x004, 18),
MUX_CLR_SET_UPD(CLK_TOP_MFG_PLL_SEL, "mfg_pll_sel",
@@ -1175,6 +1262,10 @@ static const struct mtk_pll_data plls[] = {
0, 0, 32, 0x031c, 24, 0x0040, 0x000c, 0, 0x0320, 0),
PLL_B(CLK_APMIXED_APLL2, "apll2", 0x032c, 0x033c, 0x,
0, 0, 32, 0x0330, 24, 0, 0, 0, 0x0334, 0),
+   PLL_B(CLK_APMIXED_APUPLL, "apupll", 0x03a0, 0x03ac, 0xff01,
+   HAVE_RST_BAR, BIT(23), 22, 0x03a4, 24, 0, 0, 0, 0x03a4, 0),
+   PLL_B(CLK_APMIXED_NPUPLL, "npupll", 0x03b4, 0x03c0, 0x0001,
+   0, 0, 22, 0x03b8, 24, 0, 0, 0, 0x03b8, 0),
 };
 
 static struct clk_onecell_data *top_clk_data;
-- 
2.18.0

[PATCH v2 1/7] dt-bindings: clock: Add MT8192 APU clock bindings

2021-04-14 Thread Flora Fu

Add clock bindings for APU on MT8192.

Signed-off-by: Flora Fu 
Acked-by: Rob Herring 
---
 include/dt-bindings/clock/mt8192-clk.h | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/include/dt-bindings/clock/mt8192-clk.h 
b/include/dt-bindings/clock/mt8192-clk.h
index ece5b4c0356c..71e07858f5dc 100644
--- a/include/dt-bindings/clock/mt8192-clk.h
+++ b/include/dt-bindings/clock/mt8192-clk.h
@@ -164,7 +164,15 @@
 #define CLK_TOP_APLL12_DIV9152
 #define CLK_TOP_SSUSB_TOP_REF  153
 #define CLK_TOP_SSUSB_PHY_REF  154
-#define CLK_TOP_NR_CLK 155
+#define CLK_TOP_DSP_SEL155
+#define CLK_TOP_DSP1_SEL   156
+#define CLK_TOP_DSP1_NPUPLL_SEL157
+#define CLK_TOP_DSP2_SEL   158
+#define CLK_TOP_DSP2_NPUPLL_SEL159
+#define CLK_TOP_DSP5_SEL   160
+#define CLK_TOP_DSP5_APUPLL_SEL161
+#define CLK_TOP_IPU_IF_SEL 162
+#define CLK_TOP_NR_CLK 163
 
 /* INFRACFG */
 
@@ -309,7 +317,9 @@
 #define CLK_APMIXED_APLL1  8
 #define CLK_APMIXED_APLL2  9
 #define CLK_APMIXED_MIPID26M   10
-#define CLK_APMIXED_NR_CLK 11
+#define CLK_APMIXED_APUPLL 11
+#define CLK_APMIXED_NPUPLL 12
+#define CLK_APMIXED_NR_CLK 13
 
 /* SCP_ADSP */
 
-- 
2.18.0

[PATCH v2 0/7] Add Support for MediaTek MT8192 APU Power

2021-04-14 Thread Flora Fu

The MediaTek AI Processing Unit (APU) is a proprietary hardware
in the SoC to support AI operations.
The series is to create apusys in the SoC folder for developing
the related drivers. Add the apu clocks, basic apu nodes and the
power domain to provide the power controller of APU subsystem.

This series is based on MT8192 clock[1] and PMIC[2] patches.
[1] https://patchwork.kernel.org/project/linux-mediatek/list/?series=454523
[2] https://patchwork.kernel.org/project/linux-mediatek/list/?series=458733

Change notes:
v1->v2:
  1) update expression "s/_/-/" in dt-bindings documents.
  2) drop apu power domain header file for mt8192.

v1: https://patchwork.kernel.org/project/linux-mediatek/list/?series=461999

Flora Fu (7):
  dt-bindings: clock: Add MT8192 APU clock bindings
  clk: mediatek: mt8192: Add APU clocks support
  dt-bindings: arm: mediatek: Add new document bindings for APU
  dt-bindings: soc: mediatek: apusys: Add new document for APU power
domain
  soc: mediatek: apu: Add apusys and add apu power domain driver
  arm64: dts: mt8192: Add APU node
  arm64: dts: mt8192: Add APU power domain node

 .../arm/mediatek/mediatek,apusys.yaml |  56 ++
 .../soc/mediatek/mediatek,apu-pm.yaml | 145 +
 arch/arm64/boot/dts/mediatek/mt8192-evb.dts   |   7 +
 arch/arm64/boot/dts/mediatek/mt8192.dtsi  |  45 ++
 drivers/clk/mediatek/clk-mt8192.c |  91 +++
 drivers/soc/mediatek/Kconfig  |  10 +
 drivers/soc/mediatek/Makefile |   1 +
 drivers/soc/mediatek/apusys/Makefile  |   2 +
 drivers/soc/mediatek/apusys/mtk-apu-pm.c  | 612 ++
 include/dt-bindings/clock/mt8192-clk.h|  14 +-
 10 files changed, 981 insertions(+), 2 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/arm/mediatek/mediatek,apusys.yaml
 create mode 100644 
Documentation/devicetree/bindings/soc/mediatek/mediatek,apu-pm.yaml
 create mode 100644 drivers/soc/mediatek/apusys/Makefile
 create mode 100644 drivers/soc/mediatek/apusys/mtk-apu-pm.c

-- 
2.18.0

Re: [PATCH 1/3] vDPA/ifcvf: deduce VIRTIO device ID when probe

2021-04-14 Thread Zhu Lingshan





On 4/15/2021 11:30 AM, Jason Wang wrote:


在 2021/4/14 下午5:18, Zhu Lingshan 写道:

This commit deduces VIRTIO device ID as device type when probe,
then ifcvf_vdpa_get_device_id() can simply return the ID.
ifcvf_vdpa_get_features() and ifcvf_vdpa_get_config_size()
can work properly based on the device ID.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_base.h |  1 +
  drivers/vdpa/ifcvf/ifcvf_main.c | 22 ++
  2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h 
b/drivers/vdpa/ifcvf/ifcvf_base.h

index b2eeb16b9c2c..1c04cd256fa7 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -84,6 +84,7 @@ struct ifcvf_hw {
  u32 notify_off_multiplier;
  u64 req_features;
  u64 hw_features;
+    u32 dev_type;
  struct virtio_pci_common_cfg __iomem *common_cfg;
  void __iomem *net_cfg;
  struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c 
b/drivers/vdpa/ifcvf/ifcvf_main.c

index 44d7586019da..99b0a6b4c227 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -323,19 +323,9 @@ static u32 ifcvf_vdpa_get_generation(struct 
vdpa_device *vdpa_dev)

    static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
  {
-    struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
-    struct pci_dev *pdev = adapter->pdev;
-    u32 ret = -ENODEV;
-
-    if (pdev->device < 0x1000 || pdev->device > 0x107f)
-    return ret;
-
-    if (pdev->device < 0x1040)
-    ret =  pdev->subsystem_device;
-    else
-    ret =  pdev->device - 0x1040;
+    struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
  -    return ret;
+    return vf->dev_type;
  }
    static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
@@ -466,6 +456,14 @@ static int ifcvf_probe(struct pci_dev *pdev, 
const struct pci_device_id *id)

  pci_set_drvdata(pdev, adapter);
    vf = >vf;
+    if (pdev->device < 0x1000 || pdev->device > 0x107f)
+    return -EOPNOTSUPP;
+
+    if (pdev->device < 0x1040)
+    vf->dev_type =  pdev->subsystem_device;
+    else
+    vf->dev_type =  pdev->device - 0x1040;



So a question here, is the device a transtional device or modern one?

If it's a transitonal one, can it swtich endianess automatically or not?

Thanks

Hi Jason,

This driver should drive both modern and transitional devices as we 
discussed before.
If it's a transitional one, it will act as a modern device by default, 
legacy mode is a fail-over path.
For vDPA, it has to support VIRTIO_1 and ACCESS_PLATFORM, so it must in 
modern mode.

I think we don't need to worry about endianess for legacy mode.

Thanks
Zhu Lingshan




+
  vf->base = pcim_iomap_table(pdev);
    adapter->pdev = pdev;

Re: [PATCH v2] staging: media: atomisp: pci: Format comments according to coding-style in file atomisp_cmd.c

2021-04-14 Thread Dan Carpenter

On Wed, Apr 14, 2021 at 05:42:44PM -0300, Aline Santana Cordeiro wrote:
> @@ -90,18 +92,14 @@ struct camera_mipi_info 
> *atomisp_to_sensor_mipi_info(struct v4l2_subdev *sd)
>   return (struct camera_mipi_info *)v4l2_get_subdev_hostdata(sd);
>  }
>  
> -/*
> - * get struct atomisp_video_pipe from v4l2 video_device
> - */
> +/* get struct atomisp_video_pipe from v4l2 video_device */

This code is obvious and the comment doesn't add anything except noise.
Just delete it.  Same for a lot of the other one line comments
describing functions in this patch.


>  struct atomisp_video_pipe *atomisp_to_video_pipe(struct video_device *dev)
>  {
>   return (struct atomisp_video_pipe *)
>  container_of(dev, struct atomisp_video_pipe, vdev);
>  }
>  
> -/*
> - * get struct atomisp_acc_pipe from v4l2 video_device
> - */
> +/* get struct atomisp_acc_pipe from v4l2 video_device */
>  struct atomisp_acc_pipe *atomisp_to_acc_pipe(struct video_device *dev)
>  {
>   return (struct atomisp_acc_pipe *)
> @@ -269,7 +267,7 @@ int atomisp_freq_scaling(struct atomisp_device *isp,
>   ATOMISP_RUN_MODE_CONTINUOUS_CAPTURE;
>   }
>  
> - /* search for the target frequency by looping freq rules*/
> + /* search for the target frequency by looping freq rules */
>   for (i = 0; i < dfs->dfs_table_size; i++) {
>   if (curr_rules.width != dfs->dfs_table[i].width &&
>   dfs->dfs_table[i].width != ISP_FREQ_RULE_ANY)
> @@ -307,9 +305,7 @@ int atomisp_freq_scaling(struct atomisp_device *isp,
>   return ret;
>  }
>  
> -/*
> - * reset and restore ISP
> - */
> +/* reset and restore ISP */

Obvious

>  int atomisp_reset(struct atomisp_device *isp)
>  {
>   /* Reset ISP by power-cycling it */
> @@ -338,9 +334,7 @@ int atomisp_reset(struct atomisp_device *isp)
>   return ret;
>  }
>  
> -/*
> - * interrupt disable functions
> - */
> +/* interrupt disable functions */

Obvious

>  static void disable_isp_irq(enum hrt_isp_css_irq irq)
>  {
>   irq_disable_channel(IRQ0_ID, irq);
> @@ -351,9 +345,7 @@ static void disable_isp_irq(enum hrt_isp_css_irq irq)
>   cnd_sp_irq_enable(SP0_ID, false);
>  }
>  
> -/*
> - * interrupt clean function
> - */
> +/* interrupt clean function */

Obvious

>  static void clear_isp_irq(enum hrt_isp_css_irq irq)
>  {
>   irq_clear_all(IRQ0_ID);

[ snip ]

> @@ -1918,10 +1914,7 @@ irqreturn_t atomisp_isr_thread(int irq, void *isp_ptr)
>   return IRQ_HANDLED;
>  }
>  
> -/*
> - * utils for buffer allocation/free
> - */
> -
> +/* utils for buffer allocation/free */

What?  This one seems actively wrong.

>  int atomisp_get_frame_pgnr(struct atomisp_device *isp,
>  const struct ia_css_frame *frame, u32 *p_pgnr)
>  {

etc.

regards,
dan carpenter

Re: [PATCH 2/2] ptrace: is_syscall_success: Add syscall return code handling for compat task

2021-04-14 Thread Oleg Nesterov

On 04/15, He Zhe wrote:
>
>
> On 4/15/21 12:55 AM, Oleg Nesterov wrote:
> > I think in_compat_syscall() should be used instead.
> >
> > But this doesn't matter, I still can't understand the problem.
>
> Sorry for not enough clarification.
>
> This was found on an arm64 kernel running with 32-bit user-space application.

OK, but then I think you should add the arm64 version of is_syscall_success()
into arch/arm4/include/asm/ptrace.h and do not touch the generic version ?

Something like arch/arm64/include/asm/syscall.h:syscall_get_error() which uses
is_compat_thread(). Perhaps you can even do

#define is_syscall_success(regs)\
(syscall_get_error(current, regs) == 0)

Oleg.

[PATCH v2 0/7] Add Support for MediaTek MT8192 APU Power

2021-04-14 Thread Flora Fu

The MediaTek AI Processing Unit (APU) is a proprietary hardware
in the SoC to support AI operations.
The series is to create apusys in the SoC folder for developing
the related drivers. Add the apu clocks, basic apu nodes and the
power domain to provide the power controller of APU subsystem.

This series is based on MT8192 clock[1] and PMIC[2] patches.
[1] https://patchwork.kernel.org/project/linux-mediatek/list/?series=454523
[2] https://patchwork.kernel.org/project/linux-mediatek/list/?series=458733

Change notes:
v1->v2:
  1) update expression "s/_/-/" in dt-bindings documents.
  2) drop apu power domain header file for mt8192.

v1: https://patchwork.kernel.org/project/linux-mediatek/list/?series=461999

Flora Fu (7):
  dt-bindings: clock: Add MT8192 APU clock bindings
  clk: mediatek: mt8192: Add APU clocks support
  dt-bindings: arm: mediatek: Add new document bindings for APU
  dt-bindings: soc: mediatek: apusys: Add new document for APU power
domain
  soc: mediatek: apu: Add apusys and add apu power domain driver
  arm64: dts: mt8192: Add APU node
  arm64: dts: mt8192: Add APU power domain node

 .../arm/mediatek/mediatek,apusys.yaml |  56 ++
 .../soc/mediatek/mediatek,apu-pm.yaml | 145 +
 arch/arm64/boot/dts/mediatek/mt8192-evb.dts   |   7 +
 arch/arm64/boot/dts/mediatek/mt8192.dtsi  |  45 ++
 drivers/clk/mediatek/clk-mt8192.c |  91 +++
 drivers/soc/mediatek/Kconfig  |  10 +
 drivers/soc/mediatek/Makefile |   1 +
 drivers/soc/mediatek/apusys/Makefile  |   2 +
 drivers/soc/mediatek/apusys/mtk-apu-pm.c  | 612 ++
 include/dt-bindings/clock/mt8192-clk.h|  14 +-
 10 files changed, 981 insertions(+), 2 deletions(-)
 create mode 100644 
Documentation/devicetree/bindings/arm/mediatek/mediatek,apusys.yaml
 create mode 100644 
Documentation/devicetree/bindings/soc/mediatek/mediatek,apu-pm.yaml
 create mode 100644 drivers/soc/mediatek/apusys/Makefile
 create mode 100644 drivers/soc/mediatek/apusys/mtk-apu-pm.c

-- 
2.18.0

Re: Candidate Linux ABI for Intel AMX and hypothetical new related features

2021-04-14 Thread Borislav Petkov

On Thu, Apr 15, 2021 at 07:29:38AM +0200, Willy Tarreau wrote:
> What Len is saying is that not being interested in a feature is not an
> argument for rejecting its adoption,

Oh, I'm not rejecting its adoption - no, don't mean that.

> which I'm perfectly fine with. But conversely not being interested in
> a feature is also an argument for insisting that its adoption doesn't
> harm other use cases (generally speaking, not this specific case
> here).

Pretty much.

What I'd like to see is 0-overhead for current use cases and only
overhead for those who want to use it. If that can't be done
automagically, then users should request it explicitly. So basically you
blow up the xsave buffer only for processes which want to do AMX.

And this brings the question about libraries which, if they start using
AMX by default - which doesn't sound like they will want to because AMX
reportedly will have only a limited? set of users - if libraries start
using it by default, then it better be worth the handling of the 8kb
buffer per process.

If not, this should also be requestable per process so that a simple
pipe in Linux:

 | grep | awk | sed ...

and so on is not penalized to allocate and handle by default 8kb for
*each* process' buffer in that pipe just because each is linking against
glibc which has detected AMX support in CPUID and is using it too for
some weird reason like some microbenchmark saying so.

All AFAIU, ofc.

But my initial question was on the "establishing" part and was asking
where we have established anything wrt AMX.

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH v2 0/2] Fix binfmt_flat loader for RISC-V

2021-04-14 Thread Palmer Dabbelt


On Wed, 14 Apr 2021 17:32:10 PDT (-0700), Damien Le Moal wrote:

On 2021/04/08 0:49, Damien Le Moal wrote:
RISC-V NOMMU flat binaries cannot tolerate a gap between the text and
data section as the toolchain fully resolves at compile time the PC
relative global pointer (__global_pointer$ value loaded in gp register).
Without a relocation entry provided, the flat bin loader cannot fix the
value if a gap is introduced and executables fail to run.

This series fixes this problem by allowing an architecture to request
the flat loader to suppress the gap between the text and data sections.
The first patch fixes binfmt_flat flat_load_file() using the new
configuration option CONFIG_BINFMT_FLAT_NO_TEXT_DATA_GAP. The second
patch enables this option for RISCV NOMMU builds.

These patches do not change the binfmt_flat loader behavior for other
architectures.

Changes from v1:
* Replace FLAT_TEXT_DATA_NO_GAP macro with
  CONFIG_BINFMT_FLAT_NO_TEXT_DATA_GAP config option (patch 1).
* Remove the addition of riscv/include/asm/flat.h and set
  CONFIG_BINFMT_FLAT_NO_TEXT_DATA_GAP for RISCV and !MMU

Damien Le Moal (2):
  binfmt_flat: allow not offsetting data start
  riscv: Disable text-data gap in flat binaries

 arch/riscv/Kconfig |  1 +
 fs/Kconfig.binfmt  |  3 +++
 fs/binfmt_flat.c   | 21 +++--
 3 files changed, 19 insertions(+), 6 deletions(-)



Ping ?

Any comment on these patches ?

Without them, RISC-V NOMMU user space does not run... I would really like to get
these in this cycle if possible.


This LGTM, but it's pretty far out of my area of expertise.  I'm happy 
to take them via my tree, but I'd prefer to get an Ack from someone.


Al, get_maintainer suggests you?

Acked-by: Palmer Dabbelt

[PATCH] greybus: es2: fix kernel-doc warnings

2021-04-14 Thread Randy Dunlap

Fix these kernel-doc complaints:

../drivers/greybus/es2.c:79: warning: bad line: 
../drivers/greybus/es2.c:100: warning: cannot understand function prototype: 
'struct es2_ap_dev '
es2.c:126: warning: Function parameter or member 'cdsi1_in_use' not described 
in 'es2_ap_dev'

Signed-off-by: Randy Dunlap 
Cc: Johan Hovold 
Cc: Alex Elder 
Cc: Greg Kroah-Hartman 
Cc: greybus-...@lists.linaro.org (moderated for non-subscribers)
---
 drivers/greybus/es2.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- linux-next-20210414.orig/drivers/greybus/es2.c
+++ linux-next-20210414/drivers/greybus/es2.c
@@ -72,11 +72,11 @@ struct es2_cport_in {
 };
 
 /**
- * es2_ap_dev - ES2 USB Bridge to AP structure
+ * struct es2_ap_dev - ES2 USB Bridge to AP structure
  * @usb_dev: pointer to the USB device we are.
  * @usb_intf: pointer to the USB interface we are bound to.
  * @hd: pointer to our gb_host_device structure
-
+ *
  * @cport_in: endpoint, urbs and buffer for cport in messages
  * @cport_out_endpoint: endpoint for for cport out messages
  * @cport_out_urb: array of urbs for the CPort out messages
@@ -85,7 +85,7 @@ struct es2_cport_in {
  * @cport_out_urb_cancelled: array of flags indicating whether the
  * corresponding @cport_out_urb is being cancelled
  * @cport_out_urb_lock: locks the @cport_out_urb_busy "list"
- *
+ * @cdsi1_in_use: true if cport CDSI1 is in use
  * @apb_log_task: task pointer for logging thread
  * @apb_log_dentry: file system entry for the log file interface
  * @apb_log_enable_dentry: file system entry for enabling logging

[PATCH v6] docs/zh_CN: add translations in zh_CN/dev-tools/gcov

2021-04-14 Thread Wu XiangCheng

From: Bernard Zhao 

Add new zh translations
* zh_CN/dev-tools/gcov.rst
* zh_CN/dev-tools/index.rst
and link them to zh_CN/index.rst

Signed-off-by: Bernard Zhao 
Reviewed-by: Wu XiangCheng 
Reviewed-by: Alex Shi 
Reviewed-by: Fangrui Song 
Signed-off-by: Wu XiangCheng 
---
base: linux-next
commit 269dd42f4776 ("docs/zh_CN: add riscv to zh_CN index")

Changes since V5:
* modified some words under Fangrui Song's advices
* rewrite beginning paragragh since Bernard and Fangrui's advice

Changes since V4:
* modified some words under Alex Shi's advices

Changes since V3:
* update to newest linux-next
* fix ``
* fix tags
* fix list indent

Changes since V2:
* fix some inaccurate translation

Changes since V1:
* add index.rst in dev-tools and link to to zh_CN/index.rst
* fix some inaccurate translation

Thanks!

 .../translations/zh_CN/dev-tools/gcov.rst | 265 ++
 .../translations/zh_CN/dev-tools/index.rst|  35 +++
 Documentation/translations/zh_CN/index.rst|   1 +
 3 files changed, 301 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/dev-tools/gcov.rst
 create mode 100644 Documentation/translations/zh_CN/dev-tools/index.rst

diff --git a/Documentation/translations/zh_CN/dev-tools/gcov.rst 
b/Documentation/translations/zh_CN/dev-tools/gcov.rst
new file mode 100644
index ..7515b488bc4e
--- /dev/null
+++ b/Documentation/translations/zh_CN/dev-tools/gcov.rst
@@ -0,0 +1,264 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/dev-tools/gcov.rst
+:Translator: 赵军奎 Bernard Zhao 
+
+在Linux内核里使用gcov做代码覆盖率检查
+=
+
+gcov分析核心支持在Linux内核中启用GCC的覆盖率测试工具 gcov_ ，Linux内核
+运行时的代码覆盖率数据会以gcov兼容的格式导出到“gcov”debugfs目录中，可
+以通过gcov的 ``-o`` 选项（如下示例）获得指定文件的代码运行覆盖率统计数据
+（需要跳转到内核编译路径下并且要有root权限）::
+
+# cd /tmp/linux-out
+# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
+
+这将在当前目录中创建带有执行计数注释的源代码文件。
+在获得这些统计文件后，可以使用图形化的gcov前端工具（比如 lcov_ ），来实现
+自动化处理Linux内核的覆盖率运行数据，同时生成易于阅读的HTML格式文件。
+
+可能的用途:
+
+* 调试（用来判断每一行的代码是否已经运行过）
+* 测试改进（如何修改测试代码，尽可能地覆盖到没有运行过的代码）
+* 内核最小化配置（对于某一个选项配置，如果关联的代码从来没有运行过，
+  是否还需要这个配置）
+
+.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _lcov: http://ltp.sourceforge.net/coverage/lcov.php
+
+
+准备
+
+
+内核打开如下配置::
+
+CONFIG_DEBUG_FS=y
+CONFIG_GCOV_KERNEL=y
+
+获取整个内核的覆盖率数据，还需要打开::
+
+CONFIG_GCOV_PROFILE_ALL=y
+
+需要注意的是，整个内核开启覆盖率统计会造成内核镜像文件尺寸的增大，
+同时内核运行也会变慢一些。
+另外，并不是所有的架构都支持整个内核开启覆盖率统计。
+
+代码运行覆盖率数据只在debugfs挂载完成后才可以访问::
+
+mount -t debugfs none /sys/kernel/debug
+
+
+定制化
+--
+
+如果要单独针对某一个路径或者文件进行代码覆盖率统计，可以在内核相应路
+径的Makefile中增加如下的配置:
+
+- 单独统计单个文件（例如main.o）::
+
+GCOV_PROFILE_main.o := y
+
+- 单独统计某一个路径::
+
+GCOV_PROFILE := y
+
+如果要在整个内核的覆盖率统计（开启CONFIG_GCOV_PROFILE_ALL）中单独排除
+某一个文件或者路径，可以使用如下的方法::
+
+GCOV_PROFILE_main.o := n
+
+和::
+
+GCOV_PROFILE := n
+
+此机制仅支持链接到内核镜像或编译为内核模块的文件。
+
+
+相关文件
+
+
+gcov功能需要在debugfs中创建如下文件:
+
+``/sys/kernel/debug/gcov``
+gcov相关功能的根路径
+
+``/sys/kernel/debug/gcov/reset``
+全局复位文件:向该文件写入数据后会将所有的gcov统计数据清0
+
+``/sys/kernel/debug/gcov/path/to/compile/dir/file.gcda``
+gcov工具可以识别的覆盖率统计数据文件，向该文件写入数据后
+ 会将本文件的gcov统计数据清0
+
+``/sys/kernel/debug/gcov/path/to/compile/dir/file.gcno``
+gcov工具需要的软连接文件（指向编译时生成的信息统计文件），这个文件是
+在gcc编译时如果配置了选项 ``-ftest-coverage`` 时生成的。
+
+
+针对模块的统计
+--
+
+内核中的模块会动态的加载和卸载，模块卸载时对应的数据会被清除掉。
+gcov提供了一种机制，通过保留相关数据的副本来收集这部分卸载模块的覆盖率数据。
+模块卸载后这些备份数据在debugfs中会继续存在。
+一旦这个模块重新加载，模块关联的运行统计会被初始化成debugfs中备份的数据。
+
+可以通过对内核参数gcov_persist的修改来停用gcov对模块的备份机制::
+
+gcov_persist = 0
+
+在运行时，用户还可以通过写入模块的数据文件或者写入gcov复位文件来丢弃已卸
+载模块的数据。
+
+
+编译机和测试机分离
+--
+
+gcov的内核分析插桩支持内核的编译和运行是在同一台机器上，也可以编译和运
+行是在不同的机器上。
+如果内核编译和运行是不同的机器，那么需要额外的准备工作，这取决于gcov工具
+是在哪里使用的:
+
+.. _gcov-test_zh:
+
+a) 若gcov运行在测试机上
+
+测试机上面gcov工具的版本必须要跟内核编译机器使用的gcc版本相兼容，
+同时下面的文件要从编译机拷贝到测试机上:
+
+从源代码中:
+  - 所有的C文件和头文件
+
+从编译目录中:
+  - 所有的C文件和头文件
+  - 所有的.gcda文件和.gcno文件
+  - 所有目录的链接
+
+特别需要注意，测试机器上面的目录结构跟编译机器上面的目录机构必须
+完全一致。
+如果文件是软链接，需要替换成真正的目录文件（这是由make的当前工作
+目录变量CURDIR引起的）。
+
+.. _gcov-build_zh:
+
+b) 若gcov运行在编译机上
+
+测试用例运行结束后，如下的文件需要从测试机中拷贝到编译机上:
+
+从sysfs中的gcov目录中:
+  - 所有的.gcda文件
+  - 所有的.gcno文件软链接
+
+这些文件可以拷贝到编译机的任意目录下，gcov使用-o选项指定拷贝的
+目录。
+
+比如一个是示例的目录结构如下::
+
+  /tmp/linux:内核源码目录
+  /tmp/out:  内核编译文件路径（make O=指定）
+  /tmp/coverage: 从测试机器上面拷贝的数据文件路径
+
+  [user@build] cd /tmp/out
+  [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+
+
+关于编译器的注意事项
+
+
+GCC和LLVM gcov工具不一定兼容。
+如果编译器是GCC，使用 gcov_ 来处理.gcno和.gcda文件，如果是Clang编译器，
+则使用 llvm-cov_ 。
+
+.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html
+
+GCC和Clang gcov之间的版本差异由Kconfig处理的。
+kconfig会根据编译工具链的检查自动选择合适的gcov格式。
+
+问题定位
+
+
+可能出现的问题1
+编译到链接阶段报错终止
+
+问题原因
+分析标志指定在了源文件但是没有链接到主内核，或者客制化了链接程序
+
+解决方法
+通过在相应的Makefile中使用

Re: [PATCH 2/2] iommu/sva: Remove mm parameter from SVA bind API

2021-04-14 Thread Lu Baolu


Hi Jason,

On 4/14/21 7:26 PM, Jason Gunthorpe wrote:

On Wed, Apr 14, 2021 at 02:22:09PM +0800, Lu Baolu wrote:


I still worry about supervisor pasid allocation.

If we use iommu_sva_alloc_pasid() to allocate a supervisor pasid, which
mm should the pasid be set? I've ever thought about passing _mm to
iommu_sva_alloc_pasid(). But if you add "mm != current->mm", this seems
not to work. Or do you prefer a separated interface for supervisor pasid
allocation/free?


Without a mm_struct it is not SVA, so don't use SVA APIs for whatever
a 'supervisor pasid' is


The supervisor PASID has its mm_struct. The only difference is that the
device will set priv=1 in its DMA transactions with the PASID.

Best regards,
baolu

[PATCH 15/15] usb: dwc2: Get rid of useless error checks in suspend interrupt

2021-04-14 Thread Artur Petrosyan

Squashed from Douglas Anderson's suggested commit
"usb: dwc2: Get rid of useless error checks for
hibernation/partial power down"

 - After this commit there should never be any
case where dwc2_enter_partial_power_down() and
dwc2_enter_hibernation() are called when
'params.power_down' is not correct.  Get rid of
the pile of error checking.

- As part of this cleanup some of the error messages
not to have __func__ in them.  That's not needed
for dev_err() calls since they already have the
device name as part of the message.

Signed-off-by: Artur Petrosyan 
Signed-off-by: Douglas Anderson 
---
 drivers/usb/dwc2/core.c  |  3 ---
 drivers/usb/dwc2/core_intr.c | 18 +++---
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index 576c262dba55..6f70ab9577b4 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -391,9 +391,6 @@ static bool dwc2_iddig_filter_enabled(struct dwc2_hsotg 
*hsotg)
  */
 int dwc2_enter_hibernation(struct dwc2_hsotg *hsotg, int is_host)
 {
-   if (hsotg->params.power_down != DWC2_POWER_DOWN_PARAM_HIBERNATION)
-   return -ENOTSUPP;
-
if (is_host)
return dwc2_host_enter_hibernation(hsotg);
else
diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 470458ac664b..a5ab03808da6 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -535,13 +535,10 @@ static void dwc2_handle_usb_suspend_intr(struct 
dwc2_hsotg *hsotg)
switch (hsotg->params.power_down) {
case DWC2_POWER_DOWN_PARAM_PARTIAL:
ret = dwc2_enter_partial_power_down(hsotg);
-   if (ret) {
-   if (ret != -ENOTSUPP)
-   dev_err(hsotg->dev,
-   "%s: enter 
partial_power_down failed\n",
-   __func__);
-   goto skip_power_saving;
-   }
+   if (ret)
+   dev_err(hsotg->dev,
+   "enter partial_power_down 
failed\n");
+
udelay(100);
 
/* Ask phy to be suspended */
@@ -550,10 +547,9 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg 
*hsotg)
break;
case DWC2_POWER_DOWN_PARAM_HIBERNATION:
ret = dwc2_enter_hibernation(hsotg, 0);
-   if (ret && ret != -ENOTSUPP)
+   if (ret)
dev_err(hsotg->dev,
-   "%s: enter hibernation 
failed\n",
-   __func__);
+   "enter hibernation failed\n");
break;
case DWC2_POWER_DOWN_PARAM_NONE:
/*
@@ -562,7 +558,7 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg 
*hsotg)
 */
dwc2_gadget_enter_clock_gating(hsotg);
}
-skip_power_saving:
+
/*
 * Change to L2 (suspend) state before releasing
 * spinlock
-- 
2.25.1

[PATCH 14/15] usb: dwc2: Update dwc2_handle_usb_suspend_intr function.

2021-04-14 Thread Artur Petrosyan

To avoid working in two modes (partial power down
and hibernation) changed conditions for entering
partial power down or hibernation.

Instead of checking hw_params.power_optimized and
hw_params.hibernation now checking power_down
param which already set to one of the options
(Hibernation or Partial Power Down) based on
OTG_EN_PWROPT.

Signed-off-by: Artur Petrosyan 
Signed-off-by: Minas Harutyunyan 
Acked-by: Minas Harutyunyan 
---
 drivers/usb/dwc2/core_intr.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index f8963c0cf6af..470458ac664b 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -532,7 +532,8 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg 
*hsotg)
return;
}
if (dsts & DSTS_SUSPSTS) {
-   if (hsotg->hw_params.power_optimized) {
+   switch (hsotg->params.power_down) {
+   case DWC2_POWER_DOWN_PARAM_PARTIAL:
ret = dwc2_enter_partial_power_down(hsotg);
if (ret) {
if (ret != -ENOTSUPP)
@@ -541,21 +542,22 @@ static void dwc2_handle_usb_suspend_intr(struct 
dwc2_hsotg *hsotg)
__func__);
goto skip_power_saving;
}
-
udelay(100);
 
/* Ask phy to be suspended */
if (!IS_ERR_OR_NULL(hsotg->uphy))
usb_phy_set_suspend(hsotg->uphy, true);
-   } else if (hsotg->hw_params.hibernation) {
+   break;
+   case DWC2_POWER_DOWN_PARAM_HIBERNATION:
ret = dwc2_enter_hibernation(hsotg, 0);
if (ret && ret != -ENOTSUPP)
dev_err(hsotg->dev,
"%s: enter hibernation 
failed\n",
__func__);
-   } else {
+   break;
+   case DWC2_POWER_DOWN_PARAM_NONE:
/*
-* If not hibernation nor partial power down 
are supported,
+* If neither hibernation nor partial power 
down are supported,
 * clock gating is used to save power.
 */
dwc2_gadget_enter_clock_gating(hsotg);
-- 
2.25.1

[PATCH 13/15] usb: dwc2: Add exit hibernation mode before removing drive

2021-04-14 Thread Artur Petrosyan

When dwc2 core is in hibernation mode loading
driver again causes driver fail. Because in
that mode registers are not accessible.

In order to exit from hibernation checking
dwc2 core power saving state in "dwc2_driver_remove()"
function. If core is in hibernation, then checking the
operational mode of the driver. To check whether dwc2 core
is operating in host mode or device mode there is one way
which is retrieving the backup value of "gotgctl" and compare
the "CurMod" value. If previously core entered hibernation
in host mode then the exit is performed for host if not then
exit is performed for device mode. The introduced checking
is because in hibernation state all registers are not
accessible.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/platform.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index f8b819cfa80e..2ae4748ed5ec 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -316,8 +316,24 @@ static int dwc2_lowlevel_hw_init(struct dwc2_hsotg *hsotg)
 static int dwc2_driver_remove(struct platform_device *dev)
 {
struct dwc2_hsotg *hsotg = platform_get_drvdata(dev);
+   struct dwc2_gregs_backup *gr;
int ret = 0;
 
+   /* Exit Hibernation when driver is removed. */
+   if (hsotg->hibernated) {
+   if (gr->gotgctl & GOTGCTL_CURMODE_HOST) {
+   ret = dwc2_exit_hibernation(hsotg, 0, 0, 1);
+   if (ret)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
+   } else {
+   ret = dwc2_exit_hibernation(hsotg, 0, 0, 0);
+   if (ret)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
+   }
+   }
+
/* Exit Partial Power Down when driver is removed. */
if (hsotg->in_ppd) {
ret = dwc2_exit_partial_power_down(hsotg, 0, true);
-- 
2.25.1

[PATCH 12/15] usb: dwc2: Add hibernation exiting flow by system resume

2021-04-14 Thread Artur Petrosyan

Adds a new flow of exiting hibernation when PC is resumed
from suspend state.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index db8eb1940d17..c92307775863 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -4470,6 +4470,16 @@ static int _dwc2_hcd_resume(struct usb_hcd *hcd)
set_bit(HCD_FLAG_HW_ACCESSIBLE, >flags);
break;
case DWC2_POWER_DOWN_PARAM_HIBERNATION:
+   ret = dwc2_exit_hibernation(hsotg, 0, 0, 1);
+   if (ret)
+   dev_err(hsotg->dev, "exit hibernation failed.\n");
+
+   /*
+* Set HW accessible bit before powering on the controller
+* since an interrupt may rise.
+*/
+   set_bit(HCD_FLAG_HW_ACCESSIBLE, >flags);
+   break;
case DWC2_POWER_DOWN_PARAM_NONE:
/*
 * If not hibernation nor partial power down are supported,
-- 
2.25.1

[PATCH 11/15] usb: dwc2: Add hibernation entering flow by system suspend

2021-04-14 Thread Artur Petrosyan

Adds a new flow of entering hibernation when PC is
hibernated or suspended.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 3b03b2d73aaa..db8eb1940d17 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -4387,6 +4387,16 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd)
clear_bit(HCD_FLAG_HW_ACCESSIBLE, >flags);
break;
case DWC2_POWER_DOWN_PARAM_HIBERNATION:
+   /* Enter hibernation */
+   spin_unlock_irqrestore(>lock, flags);
+   ret = dwc2_enter_hibernation(hsotg, 1);
+   if (ret)
+   dev_err(hsotg->dev, "enter hibernation failed\n");
+   spin_lock_irqsave(>lock, flags);
+
+   /* After entering suspend, hardware is not accessible */
+   clear_bit(HCD_FLAG_HW_ACCESSIBLE, >flags);
+   break;
case DWC2_POWER_DOWN_PARAM_NONE:
/*
 * If not hibernation nor partial power down are supported,
-- 
2.25.1

[PATCH v3] Documentation: dev-tools: Add Testing Overview

2021-04-14 Thread David Gow

The kernel now has a number of testing and debugging tools, and we've
seen a bit of confusion about what the differences between them are.

Add a basic documentation outlining the testing tools, when to use each,
and how they interact.

This is a pretty quick overview rather than the idealised "kernel
testing guide" that'd probably be optimal, but given the number of times
questions like "When do you use KUnit and when do you use Kselftest?"
are being asked, it seemed worth at least having something. Hopefully
this can form the basis for more detailed documentation later.

Signed-off-by: David Gow 
Reviewed-by: Marco Elver 
Reviewed-by: Daniel Latypov 
---

Thanks again. Assuming no-one has any objections, I think this is good
to go.

-- David

Changes since v2:
https://lore.kernel.org/linux-kselftest/20210414081428.337494-1-david...@google.com/
- A few typo fixes (Thanks Daniel)
- Reworded description of dynamic analysis tools.
- Updated dev-tools index page to not use ':doc:' syntax, but to provide
  a path instead.
- Added Marco and Daniel's Reviewed-by tags.

Changes since v1:
https://lore.kernel.org/linux-kselftest/20210410070529.4113432-1-david...@google.com/
- Note KUnit's speed and that one should provide selftests for syscalls
- Mention lockdep as a Dynamic Analysis Tool
- Refer to "Dynamic Analysis Tools" instead of "Sanitizers"
- A number of minor formatting tweaks and rewordings for clarity

 Documentation/dev-tools/index.rst|   4 +
 Documentation/dev-tools/testing-overview.rst | 117 +++
 2 files changed, 121 insertions(+)
 create mode 100644 Documentation/dev-tools/testing-overview.rst

diff --git a/Documentation/dev-tools/index.rst 
b/Documentation/dev-tools/index.rst
index 1b1cf4f5c9d9..929d916ffd4c 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -7,6 +7,9 @@ be used to work on the kernel. For now, the documents have been 
pulled
 together without any significant effort to integrate them into a coherent
 whole; patches welcome!
 
+A brief overview of testing-specific tools can be found in
+Documentation/dev-tools/testing-overview.rst
+
 .. class:: toc-title
 
   Table of contents
@@ -14,6 +17,7 @@ whole; patches welcome!
 .. toctree::
:maxdepth: 2
 
+   testing-overview
coccinelle
sparse
kcov
diff --git a/Documentation/dev-tools/testing-overview.rst 
b/Documentation/dev-tools/testing-overview.rst
new file mode 100644
index ..b5b46709969c
--- /dev/null
+++ b/Documentation/dev-tools/testing-overview.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+
+Kernel Testing Guide
+
+
+
+There are a number of different tools for testing the Linux kernel, so knowing
+when to use each of them can be a challenge. This document provides a rough
+overview of their differences, and how they fit together.
+
+
+Writing and Running Tests
+=
+
+The bulk of kernel tests are written using either the kselftest or KUnit
+frameworks. These both provide infrastructure to help make running tests and
+groups of tests easier, as well as providing helpers to aid in writing new
+tests.
+
+If you're looking to verify the behaviour of the Kernel — particularly specific
+parts of the kernel — then you'll want to use KUnit or kselftest.
+
+
+The Difference Between KUnit and kselftest
+--
+
+KUnit (Documentation/dev-tools/kunit/index.rst) is an entirely in-kernel system
+for "white box" testing: because test code is part of the kernel, it can access
+internal structures and functions which aren't exposed to userspace.
+
+KUnit tests therefore are best written against small, self-contained parts
+of the kernel, which can be tested in isolation. This aligns well with the
+concept of 'unit' testing.
+
+For example, a KUnit test might test an individual kernel function (or even a
+single codepath through a function, such as an error handling case), rather
+than a feature as a whole.
+
+This also makes KUnit tests very fast to build and run, allowing them to be
+run frequently as part of the development process.
+
+There is a KUnit test style guide which may give further pointers in
+Documentation/dev-tools/kunit/style.rst
+
+
+kselftest (Documentation/dev-tools/kselftest.rst), on the other hand, is
+largely implemented in userspace, and tests are normal userspace scripts or
+programs.
+
+This makes it easier to write more complicated tests, or tests which need to
+manipulate the overall system state more (e.g., spawning processes, etc.).
+However, it's not possible to call kernel functions directly from kselftest.
+This means that only kernel functionality which is exposed to userspace somehow
+(e.g. by a syscall, device, filesystem, etc.) can be tested with kselftest.  To
+work around this, some tests include a companion kernel module which exposes
+more information or functionality. If a test runs mostly or

[PATCH 10/15] usb: dwc2: Allow exit hibernation in urb enqueue

2021-04-14 Thread Artur Petrosyan

When core is in hibernation state and an external
hub is connected, upper layer sends URB enqueue request,
which results in port reset issue.

- Added exit from hibernation state to avoid port
reset issue and process upper layer request properly.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index cc9ad6cf02d9..3b03b2d73aaa 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -4631,12 +4631,29 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, 
struct urb *urb,
struct dwc2_qh *qh;
bool qh_allocated = false;
struct dwc2_qtd *qtd;
+   struct dwc2_gregs_backup *gr;
+
+   gr = >gr_backup;
 
if (dbg_urb(urb)) {
dev_vdbg(hsotg->dev, "DWC OTG HCD URB Enqueue\n");
dwc2_dump_urb_info(hcd, urb, "urb_enqueue");
}
 
+   if (hsotg->hibernated) {
+   if (gr->gotgctl & GOTGCTL_CURMODE_HOST) {
+   retval = dwc2_exit_hibernation(hsotg, 0, 0, 1);
+   if (retval)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
+   } else {
+   retval = dwc2_exit_hibernation(hsotg, 0, 0, 0);
+   if (retval)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
+   }
+   }
+
if (hsotg->in_ppd) {
retval = dwc2_exit_partial_power_down(hsotg, 0, true);
if (retval)
-- 
2.25.1

[PATCH 09/15] usb: dwc2: Move exit hibernation to dwc2_port_resume() function

2021-04-14 Thread Artur Petrosyan

This move is done to call hibernation exit handler in
"dwc2_port_resume()" function when core receives port resume.
Otherwise it could be confusing to exit hibernation in
"dwc2_hcd_hub_control()" function but other power saving modes
in "dwc2_port_resume()" function.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 43a2298b7d42..cc9ad6cf02d9 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -3383,6 +3383,11 @@ int dwc2_port_resume(struct dwc2_hsotg *hsotg)
"exit partial_power_down failed.\n");
break;
case DWC2_POWER_DOWN_PARAM_HIBERNATION:
+   /* Exit host hibernation. */
+   ret = dwc2_exit_hibernation(hsotg, 0, 0, 1);
+   if (ret)
+   dev_err(hsotg->dev, "exit hibernation failed.\n");
+   break;
case DWC2_POWER_DOWN_PARAM_NONE:
/*
 * If not hibernation nor partial power down are supported,
@@ -3446,12 +3451,8 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg 
*hsotg, u16 typereq,
dev_dbg(hsotg->dev,
"ClearPortFeature USB_PORT_FEAT_SUSPEND\n");
 
-   if (hsotg->bus_suspended) {
-   if (hsotg->hibernated)
-   dwc2_exit_hibernation(hsotg, 0, 0, 1);
-   else
-   dwc2_port_resume(hsotg);
-   }
+   if (hsotg->bus_suspended)
+   retval = dwc2_port_resume(hsotg);
break;
 
case USB_PORT_FEAT_POWER:
-- 
2.25.1

[PATCH 08/15] usb: dwc2: Move enter hibernation to dwc2_port_suspend() function

2021-04-14 Thread Artur Petrosyan

This move is done to call enter hibernation handler in
"dwc2_port_suspend()" function when core receives port suspend.
Otherwise it could be confusing to enter to hibernation in
"dwc2_hcd_hub_control()" function but other power saving modes
in "dwc2_port_suspend()" function.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index ff945c40ef8a..43a2298b7d42 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -3321,6 +3321,18 @@ int dwc2_port_suspend(struct dwc2_hsotg *hsotg, u16 
windex)
"enter partial_power_down failed.\n");
break;
case DWC2_POWER_DOWN_PARAM_HIBERNATION:
+   /*
+* Perform spin unlock and lock because in
+* "dwc2_host_enter_hibernation()" function there is a spinlock
+* logic which prevents servicing of any IRQ during entering
+* hibernation.
+*/
+   spin_unlock_irqrestore(>lock, flags);
+   ret = dwc2_enter_hibernation(hsotg, 1);
+   if (ret)
+   dev_err(hsotg->dev, "enter hibernation failed.\n");
+   spin_lock_irqsave(>lock, flags);
+   break;
case DWC2_POWER_DOWN_PARAM_NONE:
/*
 * If not hibernation nor partial power down are supported,
@@ -3650,10 +3662,8 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg 
*hsotg, u16 typereq,
"SetPortFeature - USB_PORT_FEAT_SUSPEND\n");
if (windex != hsotg->otg_port)
goto error;
-   if (hsotg->params.power_down == 
DWC2_POWER_DOWN_PARAM_HIBERNATION)
-   dwc2_enter_hibernation(hsotg, 1);
-   else
-   dwc2_port_suspend(hsotg, windex);
+   if (!hsotg->bus_suspended)
+   retval = dwc2_port_suspend(hsotg, windex);
break;
 
case USB_PORT_FEAT_POWER:
-- 
2.25.1

Re: 答复: [PATCH v5] docs/zh_CN: add translations in zh_CN/dev-tools/gcov

2021-04-14 Thread Wu X.C.

On Thu, Apr 15, 2021 at 04:01:55AM +, Zengtao (B) wrote:
> > -邮件原件-
> > 发件人: Wu XiangCheng [mailto:bob...@email.cn]
> > 发送时间: 2021年4月14日 21:21
> > 收件人: Alex Shi ; Bernard Zhao 
> > 抄送: Jonathan Corbet ; YanTeng Si
> > ; Nathan Chancellor ; Nick
> > Desaulniers ; linux-...@vger.kernel.org;
> > linux-kernel@vger.kernel.org; clang-built-li...@googlegroups.com
> > 主题: [PATCH v5] docs/zh_CN: add translations in zh_CN/dev-tools/gcov
> > 
> > From: Bernard Zhao 
> > 
> > Add new zh translations
> > * zh_CN/dev-tools/gcov.rst
> > * zh_CN/dev-tools/index.rst
> > and link them to zh_CN/index.rst
> > 
> > Signed-off-by: Bernard Zhao 
> > Reviewed-by: Wu XiangCheng 
> > Signed-off-by: Wu XiangCheng 
> > ---
> > base: linux-next
> > commit 269dd42f4776 ("docs/zh_CN: add riscv to zh_CN index")
> > 
> > Changes since V4:
> > * modified some words under Alex Shi's advices
> > 
> > Changes since V3:
> > * update to newest linux-next
> > * fix ``
> > * fix tags
> > * fix list indent
> > 
> > Changes since V2:
> > * fix some inaccurate translation
> > 
> > Changes since V1:
> > * add index.rst in dev-tools and link to to zh_CN/index.rst
> > * fix some inaccurate translation
> > 
> >  .../translations/zh_CN/dev-tools/gcov.rst | 265 ++
> >  .../translations/zh_CN/dev-tools/index.rst|  35 +++
> >  Documentation/translations/zh_CN/index.rst|   1 +
> >  3 files changed, 301 insertions(+)
> >  create mode 100644 Documentation/translations/zh_CN/dev-tools/gcov.rst
> >  create mode 100644 Documentation/translations/zh_CN/dev-tools/index.rst
> > 
> > diff --git a/Documentation/translations/zh_CN/dev-tools/gcov.rst
> > b/Documentation/translations/zh_CN/dev-tools/gcov.rst
> > new file mode 100644
> > index ..7515b488bc4e
> > --- /dev/null
> > +++ b/Documentation/translations/zh_CN/dev-tools/gcov.rst
> > @@ -0,0 +1,265 @@
> > +.. include:: ../disclaimer-zh_CN.rst
> > +
> > +:Original: Documentation/dev-tools/gcov.rst
> > +:Translator: 赵军奎 Bernard Zhao 
> > +
> > +在Linux内核里使用gcov做代码覆盖率检查
> > +=
> > +
> > +gcov是linux中已经集成的一个分析模块，该模块在内核中对GCC的代码
> > 覆盖率统
> 
> Gcov is a tool/function, misleading for " gcov是linux中已经集成的一个分析
> 模块"
> 
> I 'd suggest:
> "Linux内核中已经集成一个特性支持gcov功能，该特性让用户可以使用gcov
>  工具对内核代码覆盖率进行统计"
> 
> Thanks.

Have rewrited it.

Thanks
Wu

[PATCH 07/15] usb: dwc2: Clear GINTSTS_RESTOREDONE bit after restore is generated.

2021-04-14 Thread Artur Petrosyan

When hibernation exit is performed the dwc2_hib_restore_common()
function is called. In that function we wait until GINTSTS_RESTOREDONE
bit is set. However, after the setting of that bit we get a lot of
(dwc2_hsotg_irq:) interrupts which indicates that (GINTSTS.RstrDoneInt)
restore done interrupt is asserted.

To avoid restore done interrupt storm after restore is generated
clear GINTSTS_RESTOREDONE bit in GINTSTS register.

Signed-off-by: Artur Petrosyan 
Acked-by: Minas Harutyunyan 
---
 drivers/usb/dwc2/core.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index eccd96fa164e..576c262dba55 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -299,6 +299,12 @@ void dwc2_hib_restore_common(struct dwc2_hsotg *hsotg, int 
rem_wakeup,
__func__);
} else {
dev_dbg(hsotg->dev, "restore done  generated here\n");
+
+   /*
+* To avoid restore done interrupt storm after restore is
+* generated clear GINTSTS_RESTOREDONE bit.
+*/
+   dwc2_writel(hsotg, GINTSTS_RESTOREDONE, GINTSTS);
}
 }
 
-- 
2.25.1

[PATCH 06/15] usb: dwc2: Clear fifo_map when resetting core.

2021-04-14 Thread Artur Petrosyan

Switching from device mode to host mode by disconnecting
device cable core enters and exits form hibernation.
However, the fifo map remains not cleared. It results
to a WARNING (WARNING: CPU: 5 PID: 0 at drivers/usb/dwc2/
gadget.c:307 dwc2_hsotg_init_fifo+0x12/0x152 [dwc2])
if in host mode we disconnect the micro a to b host
cable. Because core reset occurs.

To avoid the WARNING, fifo_map should be cleared
in dwc2_core_reset() function by taking into account configs.
fifo_map must be cleared only if driver is configured in
"CONFIG_USB_DWC2_PERIPHERAL" or "CONFIG_USB_DWC2_DUAL_ROLE"
mode.

- Added "static inline void dwc2_clear_fifo_map()" helper
function to clear fifo_map with peripheral or dual role mode.

- Added a dummy version of "dwc2_clear_fifo_map()" helper
for host-only mode.

Signed-off-by: Artur Petrosyan 
Acked-by: Minas Harutyunyan 
---
 drivers/usb/dwc2/core.c | 16 
 drivers/usb/dwc2/core.h |  3 +++
 2 files changed, 19 insertions(+)

diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index cb65f7f60573..eccd96fa164e 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -470,6 +470,22 @@ int dwc2_core_reset(struct dwc2_hsotg *hsotg, bool 
skip_wait)
dwc2_writel(hsotg, greset, GRSTCTL);
}
 
+   /*
+* Switching from device mode to host mode by disconnecting
+* device cable core enters and exits form hibernation.
+* However, the fifo map remains not cleared. It results
+* to a WARNING (WARNING: CPU: 5 PID: 0 at drivers/usb/dwc2/
+* gadget.c:307 dwc2_hsotg_init_fifo+0x12/0x152 [dwc2])
+* if in host mode we disconnect the micro a to b host
+* cable. Because core reset occurs.
+* To avoid the WARNING, fifo_map should be cleared
+* in dwc2_core_reset() function by taking into account configs.
+* fifo_map must be cleared only if driver is configured in
+* "CONFIG_USB_DWC2_PERIPHERAL" or "CONFIG_USB_DWC2_DUAL_ROLE"
+* mode.
+*/
+   dwc2_clear_fifo_map(hsotg);
+
/* Wait for AHB master IDLE state */
if (dwc2_hsotg_wait_bit_set(hsotg, GRSTCTL, GRSTCTL_AHBIDLE, 1)) {
dev_warn(hsotg->dev, "%s: HANG! AHB Idle timeout GRSTCTL 
GRSTCTL_AHBIDLE\n",
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 8c12b3061f7f..e1f432095565 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -1423,6 +1423,8 @@ int dwc2_hsotg_tx_fifo_total_depth(struct dwc2_hsotg 
*hsotg);
 int dwc2_hsotg_tx_fifo_average_depth(struct dwc2_hsotg *hsotg);
 void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg);
 void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg);
+static inline void dwc2_clear_fifo_map(struct dwc2_hsotg *hsotg)
+{ hsotg->fifo_map = 0; }
 #else
 static inline int dwc2_hsotg_remove(struct dwc2_hsotg *dwc2)
 { return 0; }
@@ -1467,6 +1469,7 @@ static inline int dwc2_hsotg_tx_fifo_average_depth(struct 
dwc2_hsotg *hsotg)
 { return 0; }
 static inline void dwc2_gadget_init_lpm(struct dwc2_hsotg *hsotg) {}
 static inline void dwc2_gadget_program_ref_clk(struct dwc2_hsotg *hsotg) {}
+static inline void dwc2_clear_fifo_map(struct dwc2_hsotg *hsotg) {}
 #endif
 
 #if IS_ENABLED(CONFIG_USB_DWC2_HOST) || IS_ENABLED(CONFIG_USB_DWC2_DUAL_ROLE)
-- 
2.25.1

[PATCH 05/15] usb: dwc2: Allow exiting hibernation from gpwrdn rst detect

2021-04-14 Thread Artur Petrosyan

When device cable is disconnected core receives suspend
interrupt and enters hibernation. After entering
into hibernation GPWRDN_RST_DET and GPWRDN_STS_CHGINT
interrupts are asserted.

Allowed exit from gadget hibernation from
GPWRDN_RST_DET by checking only linestate.

Changed the return type of "dwc2_handle_gpwrdn_intr()"
function from void to int because exit from hibernation
functions have a return value.

Signed-off-by: Artur Petrosyan 
Acked-by: Minas Harutyunyan 
---
 drivers/usb/dwc2/core_intr.c | 23 ++-
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 27d729fad227..f8963c0cf6af 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -751,10 +751,11 @@ static inline void dwc_handle_gpwrdn_disc_det(struct 
dwc2_hsotg *hsotg,
  * The GPWRDN interrupts are those that occur in both Host and
  * Device mode while core is in hibernated state.
  */
-static void dwc2_handle_gpwrdn_intr(struct dwc2_hsotg *hsotg)
+static int dwc2_handle_gpwrdn_intr(struct dwc2_hsotg *hsotg)
 {
u32 gpwrdn;
int linestate;
+   int ret = 0;
 
gpwrdn = dwc2_readl(hsotg, GPWRDN);
/* clear all interrupt */
@@ -778,17 +779,27 @@ static void dwc2_handle_gpwrdn_intr(struct dwc2_hsotg 
*hsotg)
if (hsotg->hw_params.hibernation &&
hsotg->hibernated) {
if (gpwrdn & GPWRDN_IDSTS) {
-   dwc2_exit_hibernation(hsotg, 0, 0, 0);
+   ret = dwc2_exit_hibernation(hsotg, 0, 0, 0);
+   if (ret)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
call_gadget(hsotg, resume);
} else {
-   dwc2_exit_hibernation(hsotg, 1, 0, 1);
+   ret = dwc2_exit_hibernation(hsotg, 1, 0, 1);
+   if (ret)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
}
}
} else if ((gpwrdn & GPWRDN_RST_DET) &&
   (gpwrdn & GPWRDN_RST_DET_MSK)) {
dev_dbg(hsotg->dev, "%s: GPWRDN_RST_DET\n", __func__);
-   if (!linestate && (gpwrdn & GPWRDN_BSESSVLD))
-   dwc2_exit_hibernation(hsotg, 0, 1, 0);
+   if (!linestate) {
+   ret = dwc2_exit_hibernation(hsotg, 0, 1, 0);
+   if (ret)
+   dev_err(hsotg->dev,
+   "exit hibernation failed.\n");
+   }
} else if ((gpwrdn & GPWRDN_STS_CHGINT) &&
   (gpwrdn & GPWRDN_STS_CHGINT_MSK)) {
dev_dbg(hsotg->dev, "%s: GPWRDN_STS_CHGINT\n", __func__);
@@ -800,6 +811,8 @@ static void dwc2_handle_gpwrdn_intr(struct dwc2_hsotg 
*hsotg)
 */
dwc_handle_gpwrdn_disc_det(hsotg, gpwrdn);
}
+
+   return ret;
 }
 
 /*
-- 
2.25.1

[PATCH 04/15] usb: dwc2: Fix hibernation between host and device modes.

2021-04-14 Thread Artur Petrosyan

When core is in hibernation in host mode and a device cable
was connected then driver exited from device hibernation.
However, registers saved for host mode and when exited from
device hibernation register restore would be done for device
register which was wrong because there was no device registers
stored to restore.

- Added dwc_handle_gpwrdn_disc_det() function which handles
  gpwrdn disconnect detect flow and exits hibernation
  without restoring the registers.
- Updated exiting from hibernation in GPWRDN_STS_CHGINT with
  calling dwc_handle_gpwrdn_disc_det() function. Here no register
  is restored which is the solution described above.

Fixes: 65c9c4c6b01f ("usb: dwc2: Add dwc2_handle_gpwrdn_intr() handler")
Signed-off-by: Artur Petrosyan 
Signed-off-by: Minas Harutyunyan 
Acked-by: Minas Harutyunyan 
---
 drivers/usb/dwc2/core_intr.c | 154 +++
 1 file changed, 83 insertions(+), 71 deletions(-)

diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c
index 550c52c1a0c7..27d729fad227 100644
--- a/drivers/usb/dwc2/core_intr.c
+++ b/drivers/usb/dwc2/core_intr.c
@@ -680,6 +680,71 @@ static u32 dwc2_read_common_intr(struct dwc2_hsotg *hsotg)
return 0;
 }
 
+/**
+ * dwc_handle_gpwrdn_disc_det() - Handles the gpwrdn disconnect detect.
+ * Exits hibernation without restoring registers.
+ *
+ * @hsotg: Programming view of DWC_otg controller
+ * @gpwrdn: GPWRDN register
+ */
+static inline void dwc_handle_gpwrdn_disc_det(struct dwc2_hsotg *hsotg,
+ u32 gpwrdn)
+{
+   u32 gpwrdn_tmp;
+
+   /* Switch-on voltage to the core */
+   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
+   gpwrdn_tmp &= ~GPWRDN_PWRDNSWTCH;
+   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
+   udelay(5);
+
+   /* Reset core */
+   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
+   gpwrdn_tmp &= ~GPWRDN_PWRDNRSTN;
+   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
+   udelay(5);
+
+   /* Disable Power Down Clamp */
+   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
+   gpwrdn_tmp &= ~GPWRDN_PWRDNCLMP;
+   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
+   udelay(5);
+
+   /* Deassert reset core */
+   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
+   gpwrdn_tmp |= GPWRDN_PWRDNRSTN;
+   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
+   udelay(5);
+
+   /* Disable PMU interrupt */
+   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
+   gpwrdn_tmp &= ~GPWRDN_PMUINTSEL;
+   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
+
+   /* De-assert Wakeup Logic */
+   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
+   gpwrdn_tmp &= ~GPWRDN_PMUACTV;
+   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
+
+   hsotg->hibernated = 0;
+   hsotg->bus_suspended = 0;
+
+   if (gpwrdn & GPWRDN_IDSTS) {
+   hsotg->op_state = OTG_STATE_B_PERIPHERAL;
+   dwc2_core_init(hsotg, false);
+   dwc2_enable_global_interrupts(hsotg);
+   dwc2_hsotg_core_init_disconnected(hsotg, false);
+   dwc2_hsotg_core_connect(hsotg);
+   } else {
+   hsotg->op_state = OTG_STATE_A_HOST;
+
+   /* Initialize the Core for Host mode */
+   dwc2_core_init(hsotg, false);
+   dwc2_enable_global_interrupts(hsotg);
+   dwc2_hcd_start(hsotg);
+   }
+}
+
 /*
  * GPWRDN interrupt handler.
  *
@@ -701,64 +766,14 @@ static void dwc2_handle_gpwrdn_intr(struct dwc2_hsotg 
*hsotg)
 
if ((gpwrdn & GPWRDN_DISCONN_DET) &&
(gpwrdn & GPWRDN_DISCONN_DET_MSK) && !linestate) {
-   u32 gpwrdn_tmp;
-
dev_dbg(hsotg->dev, "%s: GPWRDN_DISCONN_DET\n", __func__);
-
-   /* Switch-on voltage to the core */
-   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
-   gpwrdn_tmp &= ~GPWRDN_PWRDNSWTCH;
-   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
-   udelay(10);
-
-   /* Reset core */
-   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
-   gpwrdn_tmp &= ~GPWRDN_PWRDNRSTN;
-   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
-   udelay(10);
-
-   /* Disable Power Down Clamp */
-   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
-   gpwrdn_tmp &= ~GPWRDN_PWRDNCLMP;
-   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
-   udelay(10);
-
-   /* Deassert reset core */
-   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
-   gpwrdn_tmp |= GPWRDN_PWRDNRSTN;
-   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
-   udelay(10);
-
-   /* Disable PMU interrupt */
-   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
-   gpwrdn_tmp &= ~GPWRDN_PMUINTSEL;
-   dwc2_writel(hsotg, gpwrdn_tmp, GPWRDN);
-
-   /* De-assert Wakeup Logic */
-   gpwrdn_tmp = dwc2_readl(hsotg, GPWRDN);
-

[PATCH 03/15] usb: dwc2: Fix host mode hibernation exit with remote wakeup flow.

2021-04-14 Thread Artur Petrosyan

Added setting "port_connect_status_change" flag to "1" in order
to re-enumerate, because after exit from hibernation port
connection status is not detected.

Fixes: c5c403dc4336 ("usb: dwc2: Add host/device hibernation functions")
Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index cda3f931195d..ff945c40ef8a 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -5650,7 +5650,15 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, 
int rem_wakeup,
return ret;
}
 
-   dwc2_hcd_rem_wakeup(hsotg);
+   if (rem_wakeup) {
+   dwc2_hcd_rem_wakeup(hsotg);
+   /*
+* Change "port_connect_status_change" flag to re-enumerate,
+* because after exit from hibernation port connection status
+* is not detected.
+*/
+   hsotg->flags.b.port_connect_status_change = 1;
+   }
 
hsotg->hibernated = 0;
hsotg->bus_suspended = 0;
-- 
2.25.1

[PATCH 01/15] usb: dwc2: Update exit hibernation when port reset is asserted

2021-04-14 Thread Artur Petrosyan

No need to check for "DWC2_POWER_DOWN_PARAM_HIBERNATION" param
as "hsotg->hibernated" flag is already enough for exiting from
hibernation mode.

- Removes checking of "DWC2_POWER_DOWN_PARAM_HIBERNATION" param.

- For code readability Hibernation exit code moved after
debug message print.

- Added "dwc2_exit_hibernation()" function error checking.

Signed-off-by: Artur Petrosyan 
---
 drivers/usb/dwc2/hcd.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index 04a1b53d65af..cda3f931195d 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -3668,9 +3668,17 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg 
*hsotg, u16 typereq,
break;
 
case USB_PORT_FEAT_RESET:
-   if (hsotg->params.power_down == 
DWC2_POWER_DOWN_PARAM_HIBERNATION &&
-   hsotg->hibernated)
-   dwc2_exit_hibernation(hsotg, 0, 1, 1);
+   dev_dbg(hsotg->dev,
+   "SetPortFeature - USB_PORT_FEAT_RESET\n");
+
+   hprt0 = dwc2_read_hprt0(hsotg);
+
+   if (hsotg->hibernated) {
+   retval = dwc2_exit_hibernation(hsotg, 0, 1, 1);
+   if (retval)
+   dev_err(hsotg->dev,
+   "exit hibernation failed\n");
+   }
 
if (hsotg->in_ppd) {
retval = dwc2_exit_partial_power_down(hsotg, 1,
@@ -3684,9 +3692,6 @@ static int dwc2_hcd_hub_control(struct dwc2_hsotg *hsotg, 
u16 typereq,
DWC2_POWER_DOWN_PARAM_NONE && hsotg->bus_suspended)
dwc2_host_exit_clock_gating(hsotg, 0);
 
-   hprt0 = dwc2_read_hprt0(hsotg);
-   dev_dbg(hsotg->dev,
-   "SetPortFeature - USB_PORT_FEAT_RESET\n");
pcgctl = dwc2_readl(hsotg, PCGCTL);
pcgctl &= ~(PCGCTL_ENBL_SLEEP_GATING | PCGCTL_STOPPCLK);
dwc2_writel(hsotg, pcgctl, PCGCTL);
-- 
2.25.1

[PATCH 02/15] usb: dwc2: Reset DEVADDR after exiting gadget hibernation.

2021-04-14 Thread Artur Petrosyan

Initially resetting device address was done in dwc2_hsotg_irq()
interrupt handler. However, when core is hibernated USB RESET
is not handled in dwc2_hsotg_irq() handler, instead USB RESET
interrupt is handled in dwc2_handle_gpwrdn_intr() handler.

- Added reset device address to zero when core exits from gadget
  hibernation.

Signed-off-by: Artur Petrosyan 
Signed-off-by: Minas Harutyunyan 
Acked-by: Minas Harutyunyan 
---
 drivers/usb/dwc2/gadget.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 2f50f3e62caa..e6bb1bdb2760 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -5305,6 +5305,10 @@ int dwc2_gadget_exit_hibernation(struct dwc2_hsotg 
*hsotg,
dwc2_writel(hsotg, dr->dcfg, DCFG);
dwc2_writel(hsotg, dr->dctl, DCTL);
 
+   /* On USB Reset, reset device address to zero */
+   if (reset)
+   dwc2_clear_bit(hsotg, DCFG, DCFG_DEVADDR_MASK);
+
/* De-assert Wakeup Logic */
gpwrdn = dwc2_readl(hsotg, GPWRDN);
gpwrdn &= ~GPWRDN_PMUACTV;
-- 
2.25.1

Re: [PATCH v5] docs/zh_CN: add translations in zh_CN/dev-tools/gcov

2021-04-14 Thread Wu X.C.

On Wed, Apr 14, 2021 at 10:07:55PM -0700, Fangrui Song wrote:
> Reviewed-by: Fangrui Song 
> 
> Inlined some suggestions.

Thanks for your review!

> 
> On 2021-04-14, Alex Shi wrote:
> > Reviewed-by: Alex Shi 
> > 
> > On 2021/4/14 下午9:21, Wu XiangCheng wrote:
> > > From: Bernard Zhao 
> > > 
> > > Add new zh translations
> > > * zh_CN/dev-tools/gcov.rst
> > > * zh_CN/dev-tools/index.rst
> > > and link them to zh_CN/index.rst
> > > 
> > > Signed-off-by: Bernard Zhao 
> > > Reviewed-by: Wu XiangCheng 
> > > Signed-off-by: Wu XiangCheng 
> > > ---
> > > +
[...]
> > > +在Linux内核里使用gcov做代码覆盖率检查
> > > +=
> > > +
> > > +gcov是linux中已经集成的一个分析模块，该模块在内核中对GCC的代码覆盖率统
> 
> instrumentation 一般译作 插桩，而非 分析。

Have rewrited this paragraph.
Here is "gcov profiling kernel"，gcov分析核心

> 
> > > +计提供了支持。
> > > +linux内核运行时的代码覆盖率数据会以gcov兼容的格式存储在debug-fs中，可
> 
> 专有名词 Linux 应大写。

Done.

> 
> > > +以通过gcov的 ``-o`` 选项（如下示例）获得指定文件的代码运行覆盖率统计数据
> > > +（需要跳转到内核编译路径下并且要有root权限）::
> > > +
> > > +# cd /tmp/linux-out
> > > +# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
> > > +
> > > +这将在当前目录中创建带有执行计数注释的源代码文件。
> > > +在获得这些统计文件后，可以使用图形化的 gcov_ 前端工具（比如 lcov_ ），来实现
> > > +自动化处理linux内核的覆盖率运行数据，同时生成易于阅读的HTML格式文件。
> > > +
> > > +可能的用途:
> > > +
> > > +* 调试（用来判断每一行的代码是否已经运行过）
> > > +* 测试改进（如何修改测试代码，尽可能地覆盖到没有运行过的代码）
> > > +* 内核配置优化（对于某一个选项配置，如果关联的代码从来没有运行过，是
> > > +  否还需要这个配置）
> 
> minimizing: 优化 -> 最小化/简化

Done.

> 
> > > +.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
> > > +.. _lcov: http://ltp.sourceforge.net/coverage/lcov.php
> > > +
> > > +
> > > +准备
> > > +
> > > +
> > > +内核打开如下配置::
> > > +
> > > +CONFIG_DEBUG_FS=y
> > > +CONFIG_GCOV_KERNEL=y
> > > +
> > > +获取整个内核的覆盖率数据，还需要打开::
> > > +
> > > +CONFIG_GCOV_PROFILE_ALL=y
> > > +
> > > +需要注意的是，整个内核开启覆盖率统计会造成内核镜像文件尺寸的增大，
> > > +同时内核运行的也会变慢一些。
> 
> s/的//

Done.

> 
> > > +另外，并不是所有的架构都支持整个内核开启覆盖率统计。
> > > +
> > > +代码运行覆盖率数据只在debugfs挂载完成后才可以访问::
> > > +
[...]
> > > +
> > > +编译机和测试机分离
> > > +--
> > > +
> > > +gcov的内核分析架构支持内核的编译和运行是在同一台机器上，也可以编译和运
> 
> 分析 -> 插桩

Done.

> 
> > > +行是在不同的机器上。
[...]

Thanks!

Wu


signature.asc
Description: PGP signature

Re: [RFC] Improve workload error in 'perf record'

2021-04-14 Thread Ian Rogers

On Wed, Apr 14, 2021 at 6:16 AM Arnaldo Carvalho de Melo
 wrote:
>
> Hi,
>
> Please take a look,
>
> Best regards,

Acked-by: Ian Rogers 

Having been confused by this for a case in the past, thanks! It'd be
nice for code coverage's sake to have a shell test on this.

Thanks,
Ian

> - Arnaldo
>
> Arnaldo Carvalho de Melo (2):
>   perf evlist: Add a method to return the list of evsels as a string
>   perf record: Improve 'Workload failed' message printing events + what
> was exec'ed
>
>  tools/perf/builtin-record.c |  8 ++--
>  tools/perf/util/evlist.c| 19 +++
>  tools/perf/util/evlist.h|  2 ++
>  3 files changed, 27 insertions(+), 2 deletions(-)
>
> --
> 2.26.2
>

Re: Candidate Linux ABI for Intel AMX and hypothetical new related features

2021-04-14 Thread Willy Tarreau

On Thu, Apr 15, 2021 at 06:43:43AM +0200, Borislav Petkov wrote:
> On Wed, Apr 14, 2021 at 05:57:22PM -0400, Len Brown wrote:
> > I'm pretty sure that the "it isn't my use case of interest, so it
> > doesn't matter" line of reasoning has long been established as -EINVAL
> > ;-)
> 
> I have only a very faint idea what you're trying to say here. Please
> explain properly and more verbosely what exactly has been established
> where?

What Len is saying is that not being interested in a feature is not an
argument for rejecting its adoption, which I'm perfectly fine with. But
conversely not being interested in a feature is also an argument for
insisting that its adoption doesn't harm other use cases (generally
speaking, not this specific case here).

Willy

[git pull] Input updates for v5.12-rc7

2021-04-14 Thread Dmitry Torokhov

Hi Linus,

Please pull from:

git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input.git for-linus

to receive updates for the input subsystem. Just a few driver fixes
here.

Changelog:
-

Arnd Bergmann (1):
  Input: i8042 - fix Pegatron C15B ID entry

Caleb Connolly (1):
  Input: s6sy761 - fix coordinate read bit shift

Dmitry Osipenko (2):
  Input: elants_i2c - fix division by zero if firmware reports zero phys 
size
  Input: elants_i2c - drop zero-checking of ABS_MT_TOUCH_MAJOR resolution

Fabian Vogt (1):
  Input: nspire-keypad - enable interrupts only when opened

Wei Yongjun (1):
  Input: n64joy - fix return value check in n64joy_probe()

Diffstat:


 drivers/input/joystick/n64joy.c|  4 +--
 drivers/input/keyboard/nspire-keypad.c | 56 +++---
 drivers/input/serio/i8042-x86ia64io.h  |  1 +
 drivers/input/touchscreen/elants_i2c.c |  5 ++-
 drivers/input/touchscreen/s6sy761.c|  4 +--
 5 files changed, 38 insertions(+), 32 deletions(-)

Thanks.


-- 
Dmitry

Re: Re: [PATCH] [v2] spi: spi-zynqmp-gqspi: Fix runtime PM imbalance in zynqmp_qspi_probe

2021-04-14 Thread dinghao . liu

> Hi Dinghao,
> On Mon, Apr 12, 2021 at 03:31:54PM +0800, Dinghao Liu wrote:
> > There is a PM usage counter decrement after zynqmp_qspi_init_hw()
> > without any refcount increment, which leads to refcount leak.Add
> > a refcount increment to balance the refcount. Also set
> > auto_runtime_pm to resume suspended spi controller.
> > 
> > Signed-off-by: Dinghao Liu 
> > ---
> > changelog:
> > 
> > v2: - Add a refcount increment to fix refcout leak instead of the
> >   refcount decrement on error.
> >   Set ctlr->auto_runtime_pm = true.
> > ---
> >  drivers/spi/spi-zynqmp-gqspi.c | 2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c
> > index c8fa6ee18ae7..8b21c7b0e7eb 100644
> > --- a/drivers/spi/spi-zynqmp-gqspi.c
> > +++ b/drivers/spi/spi-zynqmp-gqspi.c
> > @@ -1160,6 +1160,7 @@ static int zynqmp_qspi_probe(struct platform_device 
> > *pdev)
> > pm_runtime_set_autosuspend_delay(>dev, SPI_AUTOSUSPEND_TIMEOUT);
> > pm_runtime_set_active(>dev);
> > pm_runtime_enable(>dev);
> > +   pm_runtime_get_sync(>dev);
> Please check the return value here, if ret is "< 0", goto error label,
> and a pm_runtime_put_sync is needed in error label
> > /* QSPI controller initializations */
> > zynqmp_qspi_init_hw(xqspi);
> >  
> > @@ -1187,6 +1188,7 @@ static int zynqmp_qspi_probe(struct platform_device 
> > *pdev)
> > ctlr->mode_bits = SPI_CPOL | SPI_CPHA | SPI_RX_DUAL | SPI_RX_QUAD |
> > SPI_TX_DUAL | SPI_TX_QUAD;
> > ctlr->dev.of_node = np;
> > +   ctlr->auto_runtime_pm = true;
> >  
> > ret = devm_spi_register_controller(>dev, ctlr);
> > if (ret) {
> These 2 function
>  pm_runtime_mark_last_busy(>dev);
>   pm_runtime_put_autosuspend(>dev);
> are the last operations in probe function since if they runs,
> spi_controller will enter suspend state and disable clks after 3s
> passing. So please move them just before "return 0".
> 
> And would you please cc me when you send V3? I am preparing to send a patch 
> series
> to fix clk and suspend/resume issues which bases on the pm_runtime issue.
> 

Thanks for your advice and I will send a new patch soon.

Regards,
Dinghao

[PATCH] sound: virtio: correct the function name in kernel-doc comment

2021-04-14 Thread Randy Dunlap

Fix kernel-doc warning that the wrong function name is used in a
kernel-doc comment:

../sound/virtio/virtio_ctl_msg.c:70: warning: expecting prototype for 
virtsnd_ctl_msg_request(). Prototype was for virtsnd_ctl_msg_response() instead

Signed-off-by: Randy Dunlap 
Cc: Anton Yakovlev 
Cc: "Michael S. Tsirkin" 
Cc: virtualizat...@lists.linux-foundation.org
Cc: alsa-de...@alsa-project.org
---
 sound/virtio/virtio_ctl_msg.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- linux-next-20210414.orig/sound/virtio/virtio_ctl_msg.c
+++ linux-next-20210414/sound/virtio/virtio_ctl_msg.c
@@ -61,7 +61,7 @@ void *virtsnd_ctl_msg_request(struct vir
 }
 
 /**
- * virtsnd_ctl_msg_request() - Get a pointer to the response header.
+ * virtsnd_ctl_msg_response() - Get a pointer to the response header.
  * @msg: Control message.
  *
  * Context: Any context.

Re: [PATCH 37/57] staging: rtl8188eu: os_dep: ioctl_linux: Move 2 large data buffers into the heap

2021-04-14 Thread Dan Carpenter

On Thu, Apr 15, 2021 at 08:20:16AM +0300, Dan Carpenter wrote:
> On Wed, Apr 14, 2021 at 07:11:09PM +0100, Lee Jones wrote:
> > ---
> >  drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 12 +++-
> >  1 file changed, 11 insertions(+), 1 deletion(-)
> > 
> > diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c 
> > b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> > index c95ae4d6a3b6b..cc14f00947781 100644
> > --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> > +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> > @@ -224,7 +224,7 @@ static char *translate_scan(struct adapter *padapter,
> > /* parsing WPA/WPA2 IE */
> > {
> > u8 *buf;
> > -   u8 wpa_ie[255], rsn_ie[255];
> > +   u8 *wpa_ie, *rsn_ie;
> > u16 wpa_len = 0, rsn_len = 0;
> > u8 *p;
> >  
> > @@ -232,6 +232,14 @@ static char *translate_scan(struct adapter *padapter,
> > if (!buf)
> > return start;

Arnd, added this return...  I don't understand why we aren't returning
-ENOMEM here.

> >  
> > +   wpa_ie = kzalloc(255, GFP_ATOMIC);
> > +   if (!wpa_ie)
> > +   return start;
> 
> kfree(buf);
> 
> > +
> > +   rsn_ie = kzalloc(255, GFP_ATOMIC);
> > +   if (!rsn_ie)
> > +   return start;
> 

regards,
dan carpenter

Re: [PATCH 1/5] mm/swapfile: add percpu_ref support for swap

2021-04-14 Thread Huang, Ying

Dennis Zhou  writes:

> On Wed, Apr 14, 2021 at 01:44:58PM +0800, Huang, Ying wrote:
>> Dennis Zhou  writes:
>> 
>> > On Wed, Apr 14, 2021 at 11:59:03AM +0800, Huang, Ying wrote:
>> >> Dennis Zhou  writes:
>> >> 
>> >> > Hello,
>> >> >
>> >> > On Wed, Apr 14, 2021 at 10:06:48AM +0800, Huang, Ying wrote:
>> >> >> Miaohe Lin  writes:
>> >> >> 
>> >> >> > On 2021/4/14 9:17, Huang, Ying wrote:
>> >> >> >> Miaohe Lin  writes:
>> >> >> >> 
>> >> >> >>> On 2021/4/12 15:24, Huang, Ying wrote:
>> >> >>  "Huang, Ying"  writes:
>> >> >> 
>> >> >> > Miaohe Lin  writes:
>> >> >> >
>> >> >> >> We will use percpu-refcount to serialize against concurrent 
>> >> >> >> swapoff. This
>> >> >> >> patch adds the percpu_ref support for later fixup.
>> >> >> >>
>> >> >> >> Signed-off-by: Miaohe Lin 
>> >> >> >> ---
>> >> >> >>  include/linux/swap.h |  2 ++
>> >> >> >>  mm/swapfile.c| 25 ++---
>> >> >> >>  2 files changed, 24 insertions(+), 3 deletions(-)
>> >> >> >>
>> >> >> >> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> >> >> >> index 144727041e78..849ba5265c11 100644
>> >> >> >> --- a/include/linux/swap.h
>> >> >> >> +++ b/include/linux/swap.h
>> >> >> >> @@ -240,6 +240,7 @@ struct swap_cluster_list {
>> >> >> >>   * The in-memory structure used to track swap areas.
>> >> >> >>   */
>> >> >> >>  struct swap_info_struct {
>> >> >> >> +   struct percpu_ref users;/* serialization 
>> >> >> >> against concurrent swapoff */
>> >> >> >> unsigned long   flags;  /* SWP_USED etc: see 
>> >> >> >> above */
>> >> >> >> signed shortprio;   /* swap priority of 
>> >> >> >> this type */
>> >> >> >> struct plist_node list; /* entry in 
>> >> >> >> swap_active_head */
>> >> >> >> @@ -260,6 +261,7 @@ struct swap_info_struct {
>> >> >> >> struct block_device *bdev;  /* swap device or bdev 
>> >> >> >> of swap file */
>> >> >> >> struct file *swap_file; /* seldom referenced */
>> >> >> >> unsigned int old_block_size;/* seldom referenced */
>> >> >> >> +   struct completion comp; /* seldom referenced */
>> >> >> >>  #ifdef CONFIG_FRONTSWAP
>> >> >> >> unsigned long *frontswap_map;   /* frontswap in-use, 
>> >> >> >> one bit per page */
>> >> >> >> atomic_t frontswap_pages;   /* frontswap pages 
>> >> >> >> in-use counter */
>> >> >> >> diff --git a/mm/swapfile.c b/mm/swapfile.c
>> >> >> >> index 149e77454e3c..724173cd7d0c 100644
>> >> >> >> --- a/mm/swapfile.c
>> >> >> >> +++ b/mm/swapfile.c
>> >> >> >> @@ -39,6 +39,7 @@
>> >> >> >>  #include 
>> >> >> >>  #include 
>> >> >> >>  #include 
>> >> >> >> +#include 
>> >> >> >>  
>> >> >> >>  #include 
>> >> >> >>  #include 
>> >> >> >> @@ -511,6 +512,15 @@ static void swap_discard_work(struct 
>> >> >> >> work_struct *work)
>> >> >> >> spin_unlock(>lock);
>> >> >> >>  }
>> >> >> >>  
>> >> >> >> +static void swap_users_ref_free(struct percpu_ref *ref)
>> >> >> >> +{
>> >> >> >> +   struct swap_info_struct *si;
>> >> >> >> +
>> >> >> >> +   si = container_of(ref, struct swap_info_struct, users);
>> >> >> >> +   complete(>comp);
>> >> >> >> +   percpu_ref_exit(>users);
>> >> >> >
>> >> >> > Because percpu_ref_exit() is used, we cannot use 
>> >> >> > percpu_ref_tryget() in
>> >> >> > get_swap_device(), better to add comments there.
>> >> >> 
>> >> >>  I just noticed that the comments of percpu_ref_tryget_live() says,
>> >> >> 
>> >> >>   * This function is safe to call as long as @ref is between init 
>> >> >>  and exit.
>> >> >> 
>> >> >>  While we need to call get_swap_device() almost at any time, so 
>> >> >>  it's
>> >> >>  better to avoid to call percpu_ref_exit() at all.  This will 
>> >> >>  waste some
>> >> >>  memory, but we need to follow the API definition to avoid 
>> >> >>  potential
>> >> >>  issues in the long term.
>> >> >> >>>
>> >> >> >>> I have to admit that I'am not really familiar with percpu_ref. So 
>> >> >> >>> I read the
>> >> >> >>> implementation code of the percpu_ref and found 
>> >> >> >>> percpu_ref_tryget_live() could
>> >> >> >>> be called after exit now. But you're right we need to follow the 
>> >> >> >>> API definition
>> >> >> >>> to avoid potential issues in the long term.
>> >> >> >>>
>> >> >> 
>> >> >>  And we need to call percpu_ref_init() before insert the 
>> >> >>  swap_info_struct
>> >> >>  into the swap_info[].
>> >> >> >>>
>> >> >> >>> If we remove the call to percpu_ref_exit(), we should not use 
>> >> >> >>> percpu_ref_init()
>> >> >> >>> here because *percpu_ref->data is assumed to be NULL* in 
>> >> >> >>>

Re: High kmalloc-32 slab cache consumption with 10k containers

2021-04-14 Thread Bharata B Rao

On Wed, Apr 07, 2021 at 08:28:07AM +1000, Dave Chinner wrote:
> 
> Another approach may be to identify filesystem types that do not
> need memcg awareness and feed that into alloc_super() to set/clear
> the SHRINKER_MEMCG_AWARE flag. This could be based on fstype - most
> virtual filesystems that expose system information do not really
> need full memcg awareness because they are generally only visible to
> a single memcg instance...

Would something like below be appropriate?

>From f314083ad69fde2a420a1b74febd6d3f7a25085f Mon Sep 17 00:00:00 2001
From: Bharata B Rao 
Date: Wed, 14 Apr 2021 11:21:24 +0530
Subject: [PATCH 1/1] fs: Let filesystems opt out of memcg awareness

All filesystem mounts by default are memcg aware and end hence
end up creating shrinker list_lrus for all the memcgs. Due to
the way the memcg_nr_cache_ids grow and the list_lru heads are
allocated for all memcgs, huge amount of memory gets consumed
by kmalloc-32 slab cache when running thousands of containers.

Improve this situation by allowing filesystems to opt out
of memcg awareness. In this patch, tmpfs, proc and ramfs
opt out of memcg awareness. This leads to considerable memory
savings when running 10k containers.

Signed-off-by: Bharata B Rao 
---
 fs/proc/root.c |  1 +
 fs/ramfs/inode.c   |  1 +
 fs/super.c | 27 +++
 include/linux/fs_context.h |  2 ++
 mm/shmem.c |  1 +
 5 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/fs/proc/root.c b/fs/proc/root.c
index c7e3b1350ef8..7856bc2ca9f4 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -257,6 +257,7 @@ static int proc_init_fs_context(struct fs_context *fc)
fc->user_ns = get_user_ns(ctx->pid_ns->user_ns);
fc->fs_private = ctx;
fc->ops = _fs_context_ops;
+   fc->memcg_optout = true;
return 0;
 }
 
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 9ebd17d7befb..576a88bb7407 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -278,6 +278,7 @@ int ramfs_init_fs_context(struct fs_context *fc)
fsi->mount_opts.mode = RAMFS_DEFAULT_MODE;
fc->s_fs_info = fsi;
fc->ops = _context_ops;
+   fc->memcg_optout = true;
return 0;
 }
 
diff --git a/fs/super.c b/fs/super.c
index 8c1baca35c16..59aa22c678e6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -198,7 +198,8 @@ static void destroy_unused_super(struct super_block *s)
  * returns a pointer new superblock or %NULL if allocation had failed.
  */
 static struct super_block *alloc_super(struct file_system_type *type, int 
flags,
-  struct user_namespace *user_ns)
+  struct user_namespace *user_ns,
+  bool memcg_optout)
 {
struct super_block *s = kzalloc(sizeof(struct super_block),  GFP_USER);
static const struct super_operations default_op;
@@ -266,13 +267,22 @@ static struct super_block *alloc_super(struct 
file_system_type *type, int flags,
s->s_shrink.scan_objects = super_cache_scan;
s->s_shrink.count_objects = super_cache_count;
s->s_shrink.batch = 1024;
-   s->s_shrink.flags = SHRINKER_NUMA_AWARE | SHRINKER_MEMCG_AWARE;
+   s->s_shrink.flags = SHRINKER_NUMA_AWARE;
+   if (!memcg_optout)
+   s->s_shrink.flags |= SHRINKER_MEMCG_AWARE;
if (prealloc_shrinker(>s_shrink))
goto fail;
-   if (list_lru_init_memcg(>s_dentry_lru, >s_shrink))
-   goto fail;
-   if (list_lru_init_memcg(>s_inode_lru, >s_shrink))
-   goto fail;
+   if (memcg_optout) {
+   if (list_lru_init(>s_dentry_lru))
+   goto fail;
+   if (list_lru_init(>s_inode_lru))
+   goto fail;
+   } else {
+   if (list_lru_init_memcg(>s_dentry_lru, >s_shrink))
+   goto fail;
+   if (list_lru_init_memcg(>s_inode_lru, >s_shrink))
+   goto fail;
+   }
return s;
 
 fail:
@@ -527,7 +537,8 @@ struct super_block *sget_fc(struct fs_context *fc,
}
if (!s) {
spin_unlock(_lock);
-   s = alloc_super(fc->fs_type, fc->sb_flags, user_ns);
+   s = alloc_super(fc->fs_type, fc->sb_flags, user_ns,
+   fc->memcg_optout);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
@@ -610,7 +621,7 @@ struct super_block *sget(struct file_system_type *type,
}
if (!s) {
spin_unlock(_lock);
-   s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns);
+   s = alloc_super(type, (flags & ~SB_SUBMOUNT), user_ns, false);
if (!s)
return ERR_PTR(-ENOMEM);
goto retry;
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index

Re: [PATCH v2 7/8] cxl/port: Introduce cxl_port objects

2021-04-14 Thread Dan Williams

On Tue, Apr 13, 2021 at 6:15 PM Bjorn Helgaas  wrote:
>
> On Thu, Apr 08, 2021 at 07:13:38PM -0700, Dan Williams wrote:
> > Hi Bjorn, thanks for taking a look.
> >
> > On Thu, Apr 8, 2021 at 3:42 PM Bjorn Helgaas  wrote:
> > >
> > > [+cc Greg, Rafael, Matthew: device model questions]
> > >
> > > Hi Dan,
> > >
> > > On Thu, Apr 01, 2021 at 07:31:20AM -0700, Dan Williams wrote:
> > > > Once the cxl_root is established then other ports in the hierarchy can
> > > > be attached. The cxl_port object, unlike cxl_root that is associated
> > > > with host bridges, is associated with PCIE Root Ports or PCIE Switch
> > > > Ports. Add cxl_port instances for all PCIE Root Ports in an ACPI0016
> > > > host bridge.
>
> Incidentally, "PCIe" is the abbreviation used in the PCIe specs, so I
> try to use that instead of "PCIE" in drivers/pci/.

Noted.

>
> > > I'm not a device model expert, but I'm not sure about adding a new
> > > /sys/bus/cxl/devices hierarchy.  I'm under the impression that CXL
> > > devices will be enumerated by the PCI core as PCIe devices.
> >
> > Yes, PCIe is involved, but mostly only for the CXL.io slow path
> > (configuration and provisioning via mailbox) when we're talking about
> > memory expander devices (CXL calls these Type-3). So-called "Type-3"
> > support is the primary driver of this infrastructure.
> >
> > You might be thinking of CXL accelerator devices that will look like
> > plain PCIe devices that happen to participate in the CPU cache
> > hierarchy (CXL calls these Type-1). There will also be accelerator
> > devices that want to share coherent memory with the system (CXL calls
> > these Type-2).
>
> IIUC all these CXL devices will be enumerated by the PCI core.  They
> seem to have regular PCI BARs (separate from the HDM stuff), so the
> PCI core will presumably manage address allocation for them.  It looks
> like Function Level Reset and hotplug are supposed to use the regular
> PCIe code.  I guess this will all be visible via lspci just like
> regular PCI devices, right?

Yes. the CXL.io protocol is synonymous with PCIe. Hotplug is native
PCIe hotplug to negotiate getting the card online and offline.
Although, for offline an additional constraint is to deny removal
whenever the card has active pages in the page allocator. Similar to
what happens today for ACPI memory hotplug where the OS can say "nope,
there's still active pages in the range you asked to eject".

FLR has no effect on CXL.cache or CXL.mem state, only CXL.io.

> > The infrastructure being proposed here is primarily for the memory
> > expander (Type-3) device case where the PCI sysfs hierarchy is wholly
> > unsuited for modeling it. A single CXL memory region device may span
> > multiple endpoints, switches, and host bridges. It poses similar
> > stress to an OS device model as RAID where there is a driver for the
> > component contributors to an upper level device / driver that exposes
> > the RAID Volume (CXL memory region interleave set). The CXL memory
> > decode space (HDM: Host Managed Device Memory) is independent of the
> > PCIe MMIO BAR space.
>
> It looks like you add a cxl_port for each ACPI0016 device and every
> PCIe Root Port below it.  So I guess the upper level spanning is at a
> higher level than cxl_port?

A memory interleave can span any level of the hierarchy. It can be
across host bridges at the top level, but also incorporate a leaf
device at the bottom of a CXL switch hierarchy. There will be a
cxl_port instance for each side of each link.

> > That's where the /sys/bus/cxl hierarchy is needed, to manage the HDM
> > space across the CXL topology in a way that is foreign to PCIE (HDM
> > Decoder hierarchy).
>
> When we do FLR on the PCIe device, what happens to these CXL clients?
> Do they care?  Are they notified?  Do they need to do anything before
> or after the FLR?

Per CXL Spec:

"FLR has no effect on the CXL.cache and CXL.mem protocol. Any
CXL.cache and CXL.mem related control registers including CXL DVSEC
structures and state held by the CXL device are not affected by FLR.
The memory controller hosting the HDM is not reset by FLR."

> What about hotplug?  Spec says it leverages PCIe hotplug, but it looks
> like maybe this all requires ACPI hotplug (acpiphp) for adding
> ACPI0017 devices and notifying of hot remove requests?  If it uses
> PCIe native hotplug (pciehp), what connects the CXL side to the PCI
> side?

No ACPI hotplug is not involved. ACPI0017 is essentially just a dummy
anchor device to hang the interleave set coordination. The connect
from native hotplug to CXL is the cxl_mem driver. When that it detects
a new device it walks the cxl_port hierarchy to see if one is a parent
of this endpoint. Then it registers its HDM decoders with the CXL core
and the CXL core can online it as a standalone interneleave set or
consolidate it with others to make a wider set. For persistent memory
there is on-device metadata to recall whether this device was part of
a set previously. For

Re: [PATCH 37/57] staging: rtl8188eu: os_dep: ioctl_linux: Move 2 large data buffers into the heap

2021-04-14 Thread Dan Carpenter

On Wed, Apr 14, 2021 at 07:11:09PM +0100, Lee Jones wrote:
> ---
>  drivers/staging/rtl8188eu/os_dep/ioctl_linux.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c 
> b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> index c95ae4d6a3b6b..cc14f00947781 100644
> --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
> @@ -224,7 +224,7 @@ static char *translate_scan(struct adapter *padapter,
>   /* parsing WPA/WPA2 IE */
>   {
>   u8 *buf;
> - u8 wpa_ie[255], rsn_ie[255];
> + u8 *wpa_ie, *rsn_ie;
>   u16 wpa_len = 0, rsn_len = 0;
>   u8 *p;
>  
> @@ -232,6 +232,14 @@ static char *translate_scan(struct adapter *padapter,
>   if (!buf)
>   return start;
>  
> + wpa_ie = kzalloc(255, GFP_ATOMIC);
> + if (!wpa_ie)
> + return start;

kfree(buf);

> +
> + rsn_ie = kzalloc(255, GFP_ATOMIC);
> + if (!rsn_ie)
> + return start;

kfree(buf);
kfree(wpa_ie);

> +
>   rtw_get_sec_ie(pnetwork->network.ies, 
> pnetwork->network.ie_length, rsn_ie, _len, wpa_ie, _len);
>   RT_TRACE(_module_rtl871x_mlme_c_, _drv_info_, 
> ("rtw_wx_get_scan: ssid =%s\n", pnetwork->network.ssid.ssid));
>   RT_TRACE(_module_rtl871x_mlme_c_, _drv_info_, 
> ("rtw_wx_get_scan: wpa_len =%d rsn_len =%d\n", wpa_len, rsn_len));

regards,
dan carpenter

Re: [PATCH net v2] i40e: fix the panic when running bpf in xdpdrv mode

2021-04-14 Thread Jason Xing

On Thu, Apr 15, 2021 at 10:08 AM Jesse Brandeburg
 wrote:
>
> Jason Xing wrote:
>
> > On Wed, Apr 14, 2021 at 12:27 AM Jesse Brandeburg
> >  wrote:
> > >
> > > kerneljasonx...@gmail.com wrote:
> > >
> > > > From: Jason Xing 
> > >
> > > Hi Jason,
> > >
> > > Sorry, I missed this on the first time: Added intel-wired-lan,
> > > please include on any future submissions for Intel drivers.
> > > get-maintainers script might help here?
> > >
> >
> > Probably I got this wrong in the last email. Did you mean that I should add
> > intel-wired-lan in the title not the cc list? It seems I should put
> > this together on
> > the next submission like this:
> >
> > [Intel-wired-lan] [PATCH net v4]
>
> Your v3 submittal was correct. My intent was to make sure
> intel-wired-lan was in CC:
>

Well, I get to know more about the whole thing.

> If Kuba or Dave wants us to take the fix in via intel-wired-lan trees,
> then we can do that, or they can apply it directly. I'll ack it on the
> v3.

Thanks, Jesse:)

Jason

>

Re: [PATCH 2/2] ptrace: is_syscall_success: Add syscall return code handling for compat task

2021-04-14 Thread He Zhe

On 4/15/21 12:55 AM, Oleg Nesterov wrote:
> On 04/14, David Laight wrote:
>> From: Oleg Nesterov
>>> Sent: 14 April 2021 16:08
>>>
>>> Add audit maintainers...
>>>
>>> On 04/14, He Zhe wrote:
 When 32-bit userspace application is running on 64-bit kernel, the 32-bit
 syscall return code would be changed from u32 to u64 in regs_return_value
 and then changed to s64. Hence the negative return code would be treated
 as a positive number and results in a non-error in, for example, audit
 like below.
>>> Sorry, can understand. At least on x86_64 even the 32-bit syscall returns
>>> long, not u32.
>>>
>>> Hmm. And afaics on x86 is_compat_task() is only defined if !CONFIG_COMPAT,
>>> so this patch looks wrong anyway.
>> And, as with the other patch a x64_64 64bit process can make both types
>> of 32bit system call - so it needs to depend on the system call entry type
>> not any type of the task.
> I don't understand... but iirc is_compat_task() used to check TS_COMPAT and
> this is what we need to detect the 32-bit syscall. But it looks deprecated,
> I think in_compat_syscall() should be used instead.
>
> But this doesn't matter, I still can't understand the problem.

Sorry for not enough clarification.

This was found on an arm64 kernel running with 32-bit user-space application.
The arm64 version of regs_return_value returns unsigned long.

static inline unsigned long regs_return_value(struct pt_regs *regs)
{
    return regs->regs[0];
}

But when the syscall fails, with -13 in my case, the return code has been saved
as a 32 bit long negative number, 0xFFF3, in regs[0] by the time
regs_return_value gets called in audit_syscall_exit.

Then in audit_syscall_exit, the return value of regs_return_value is changed to
a 64 bit signed long, from when on it is treated as a positive number.

Similarly in is_syscall_success, 0xFFF3 would be out of error
number range, resulting in a "success".

These two patches are to do the sign extension.

David, thanks, is_compat_syscall should be the right one to use. I didn't notice
the difference between is_compat_syscall and is_compat_task and thought
is_compat_task would be harmless to other architectures.

Zhe

>
> Oleg.
>

Re: [PATCH v1] usb: typec: tcpm: Fix error while calculating PPS out values

2021-04-14 Thread Guenter Roeck

On 4/14/21 10:01 PM, Badhri Jagan Sridharan wrote:
> "usb: typec: tcpm: Address incorrect values of tcpm psy for pps supply"
> introduced a regression for req_out_volt and req_op_curr calculation.
> 
> req_out_volt should consider the newly calculated max voltage instead
> of previously accepted max voltage by the port partner. Likewise,
> req_op_curr should consider the newly calculated max current instead
> of previously accepted max current by the port partner.
> 
> Fixes: e3a072022487 ("usb: typec: tcpm: Address incorrect values of tcpm psy 
> for pps supply")
> Signed-off-by: Badhri Jagan Sridharan 

Reviewed-by: Guenter Roeck 

> ---
>  drivers/usb/typec/tcpm/tcpm.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
> index 1c32bdf62852..04652aa1f54e 100644
> --- a/drivers/usb/typec/tcpm/tcpm.c
> +++ b/drivers/usb/typec/tcpm/tcpm.c
> @@ -3132,10 +3132,10 @@ static unsigned int tcpm_pd_select_pps_apdo(struct 
> tcpm_port *port)
>   port->pps_data.req_max_volt = min(pdo_pps_apdo_max_voltage(src),
> 
> pdo_pps_apdo_max_voltage(snk));
>   port->pps_data.req_max_curr = min_pps_apdo_current(src, snk);
> - port->pps_data.req_out_volt = min(port->pps_data.max_volt,
> -   max(port->pps_data.min_volt,
> + port->pps_data.req_out_volt = min(port->pps_data.req_max_volt,
> +   
> max(port->pps_data.req_min_volt,
> 
> port->pps_data.req_out_volt));
> - port->pps_data.req_op_curr = min(port->pps_data.max_curr,
> + port->pps_data.req_op_curr = min(port->pps_data.req_max_curr,
>port->pps_data.req_op_curr);
>   }
>  
>

Re: [PATCH v5] docs/zh_CN: add translations in zh_CN/dev-tools/gcov

2021-04-14 Thread Fangrui Song


Reviewed-by: Fangrui Song 

Inlined some suggestions.

On 2021-04-14, Alex Shi wrote:

Reviewed-by: Alex Shi 

On 2021/4/14 下午9:21, Wu XiangCheng wrote:

From: Bernard Zhao 

Add new zh translations
* zh_CN/dev-tools/gcov.rst
* zh_CN/dev-tools/index.rst
and link them to zh_CN/index.rst

Signed-off-by: Bernard Zhao 
Reviewed-by: Wu XiangCheng 
Signed-off-by: Wu XiangCheng 
---
base: linux-next
commit 269dd42f4776 ("docs/zh_CN: add riscv to zh_CN index")

Changes since V4:
* modified some words under Alex Shi's advices

Changes since V3:
* update to newest linux-next
* fix ``
* fix tags
* fix list indent

Changes since V2:
* fix some inaccurate translation

Changes since V1:
* add index.rst in dev-tools and link to to zh_CN/index.rst
* fix some inaccurate translation

 .../translations/zh_CN/dev-tools/gcov.rst | 265 ++
 .../translations/zh_CN/dev-tools/index.rst|  35 +++
 Documentation/translations/zh_CN/index.rst|   1 +
 3 files changed, 301 insertions(+)
 create mode 100644 Documentation/translations/zh_CN/dev-tools/gcov.rst
 create mode 100644 Documentation/translations/zh_CN/dev-tools/index.rst

diff --git a/Documentation/translations/zh_CN/dev-tools/gcov.rst 
b/Documentation/translations/zh_CN/dev-tools/gcov.rst
new file mode 100644
index ..7515b488bc4e
--- /dev/null
+++ b/Documentation/translations/zh_CN/dev-tools/gcov.rst
@@ -0,0 +1,265 @@
+.. include:: ../disclaimer-zh_CN.rst
+
+:Original: Documentation/dev-tools/gcov.rst
+:Translator: 赵军奎 Bernard Zhao 
+
+在Linux内核里使用gcov做代码覆盖率检查
+=
+
+gcov是linux中已经集成的一个分析模块，该模块在内核中对GCC的代码覆盖率统


instrumentation 一般译作 插桩，而非 分析。


+计提供了支持。
+linux内核运行时的代码覆盖率数据会以gcov兼容的格式存储在debug-fs中，可


专有名词 Linux 应大写。


+以通过gcov的 ``-o`` 选项（如下示例）获得指定文件的代码运行覆盖率统计数据
+（需要跳转到内核编译路径下并且要有root权限）::
+
+# cd /tmp/linux-out
+# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
+
+这将在当前目录中创建带有执行计数注释的源代码文件。
+在获得这些统计文件后，可以使用图形化的 gcov_ 前端工具（比如 lcov_ ），来实现
+自动化处理linux内核的覆盖率运行数据，同时生成易于阅读的HTML格式文件。
+
+可能的用途:
+
+* 调试（用来判断每一行的代码是否已经运行过）
+* 测试改进（如何修改测试代码，尽可能地覆盖到没有运行过的代码）
+* 内核配置优化（对于某一个选项配置，如果关联的代码从来没有运行过，是
+  否还需要这个配置）


minimizing: 优化 -> 最小化/简化


+.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _lcov: http://ltp.sourceforge.net/coverage/lcov.php
+
+
+准备
+
+
+内核打开如下配置::
+
+CONFIG_DEBUG_FS=y
+CONFIG_GCOV_KERNEL=y
+
+获取整个内核的覆盖率数据，还需要打开::
+
+CONFIG_GCOV_PROFILE_ALL=y
+
+需要注意的是，整个内核开启覆盖率统计会造成内核镜像文件尺寸的增大，
+同时内核运行的也会变慢一些。


s/的//


+另外，并不是所有的架构都支持整个内核开启覆盖率统计。
+
+代码运行覆盖率数据只在debugfs挂载完成后才可以访问::
+
+mount -t debugfs none /sys/kernel/debug
+
+
+定制化
+--
+
+如果要单独针对某一个路径或者文件进行代码覆盖率统计，可以在内核相应路
+径的Makefile中增加如下的配置:
+
+- 单独统计单个文件（例如main.o）::
+
+GCOV_PROFILE_main.o := y
+
+- 单独统计某一个路径::
+
+GCOV_PROFILE := y
+
+如果要在整个内核的覆盖率统计（开启CONFIG_GCOV_PROFILE_ALL）中单独排除
+某一个文件或者路径，可以使用如下的方法::
+
+GCOV_PROFILE_main.o := n
+
+和::
+
+GCOV_PROFILE := n
+
+此机制仅支持链接到内核镜像或编译为内核模块的文件。
+
+
+相关文件
+
+
+gcov功能需要在debugfs中创建如下文件:
+
+``/sys/kernel/debug/gcov``
+gcov相关功能的根路径
+
+``/sys/kernel/debug/gcov/reset``
+全局复位文件:向该文件写入数据后会将所有的gcov统计数据清0
+
+``/sys/kernel/debug/gcov/path/to/compile/dir/file.gcda``
+gcov工具可以识别的覆盖率统计数据文件，向该文件写入数据后
+ 会将本文件的gcov统计数据清0
+
+``/sys/kernel/debug/gcov/path/to/compile/dir/file.gcno``
+gcov工具需要的软连接文件（指向编译时生成的信息统计文件），这个文件是
+在gcc编译时如果配置了选项 ``-ftest-coverage`` 时生成的。
+
+
+针对模块的统计
+--
+
+内核中的模块会动态的加载和卸载，模块卸载时对应的数据会被清除掉。
+gcov提供了一种机制，通过保留相关数据的副本来收集这部分卸载模块的覆盖率数据。
+模块卸载后这些备份数据在debugfs中会继续存在。
+一旦这个模块重新加载，模块关联的运行统计会被初始化成debugfs中备份的数据。
+
+可以通过对内核参数gcov_persist的修改来停用gcov对模块的备份机制::
+
+gcov_persist = 0
+
+在运行时，用户还可以通过写入模块的数据文件或者写入gcov复位文件来丢弃已卸
+载模块的数据。
+
+
+编译机和测试机分离
+--
+
+gcov的内核分析架构支持内核的编译和运行是在同一台机器上，也可以编译和运


分析 -> 插桩


+行是在不同的机器上。
+如果内核编译和运行是不同的机器，那么需要额外的准备工作，这取决于gcov工具
+是在哪里使用的:
+
+.. _gcov-test_zh:
+
+a) 若gcov运行在测试机上
+
+测试机上面gcov工具的版本必须要跟内核编译机器使用的gcc版本相兼容，
+同时下面的文件要从编译机拷贝到测试机上:
+
+从源代码中:
+  - 所有的C文件和头文件
+
+从编译目录中:
+  - 所有的C文件和头文件
+  - 所有的.gcda文件和.gcno文件
+  - 所有目录的链接
+
+特别需要注意，测试机器上面的目录结构跟编译机器上面的目录机构必须
+完全一致。
+如果文件是软链接，需要替换成真正的目录文件（这是由make的当前工作
+目录变量CURDIR引起的）。
+
+.. _gcov-build_zh:
+
+b) 若gcov运行在编译机上
+
+测试用例运行结束后，如下的文件需要从测试机中拷贝到编译机上:
+
+从sysfs中的gcov目录中:
+  - 所有的.gcda文件
+  - 所有的.gcno文件软链接
+
+这些文件可以拷贝到编译机的任意目录下，gcov使用-o选项指定拷贝的
+目录。
+
+比如一个是示例的目录结构如下::
+
+  /tmp/linux:内核源码目录
+  /tmp/out:  内核编译文件路径（make O=指定）
+  /tmp/coverage: 从测试机器上面拷贝的数据文件路径
+
+  [user@build] cd /tmp/out
+  [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+
+
+关于编译器的注意事项
+
+
+GCC和LLVM gcov工具不一定兼容。
+如果编译器是GCC，使用 gcov_ 来处理.gcno和.gcda文件，如果是Clang编译器，
+则使用 llvm-cov_ 。
+
+.. _gcov: https://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+.. _llvm-cov: https://llvm.org/docs/CommandGuide/llvm-cov.html
+
+GCC和Clang gcov之间的版本差异由Kconfig处理的。
+kconfig会根据编译工具链的检查自动选择合适的gcov格式。
+
+问题定位
+
+
+可能出现的问题1
+编译到链接阶段报错终止
+
+问题原因
+

Re: [PATCH v1] usb: typec: tcpm: Fix error while calculating PPS out values

2021-04-14 Thread Badhri Jagan Sridharan

On Wed, Apr 14, 2021 at 10:01 PM Badhri Jagan Sridharan
 wrote:
>
> "usb: typec: tcpm: Address incorrect values of tcpm psy for pps supply"
> introduced a regression for req_out_volt and req_op_curr calculation.
>
> req_out_volt should consider the newly calculated max voltage instead
> of previously accepted max voltage by the port partner. Likewise,
> req_op_curr should consider the newly calculated max current instead
> of previously accepted max current by the port partner.
>
> Fixes: e3a072022487 ("usb: typec: tcpm: Address incorrect values of tcpm psy 
> for pps supply")
> Signed-off-by: Badhri Jagan Sridharan 
> ---
>  drivers/usb/typec/tcpm/tcpm.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
> index 1c32bdf62852..04652aa1f54e 100644
> --- a/drivers/usb/typec/tcpm/tcpm.c
> +++ b/drivers/usb/typec/tcpm/tcpm.c
> @@ -3132,10 +3132,10 @@ static unsigned int tcpm_pd_select_pps_apdo(struct 
> tcpm_port *port)
> port->pps_data.req_max_volt = 
> min(pdo_pps_apdo_max_voltage(src),
>   
> pdo_pps_apdo_max_voltage(snk));
> port->pps_data.req_max_curr = min_pps_apdo_current(src, snk);
> -   port->pps_data.req_out_volt = min(port->pps_data.max_volt,
> - max(port->pps_data.min_volt,
> +   port->pps_data.req_out_volt = min(port->pps_data.req_max_volt,
> + 
> max(port->pps_data.req_min_volt,
>   
> port->pps_data.req_out_volt));
> -   port->pps_data.req_op_curr = min(port->pps_data.max_curr,
> +   port->pps_data.req_op_curr = min(port->pps_data.req_max_curr,
>  port->pps_data.req_op_curr);
> }
>
> --
> 2.31.1.295.g9ea45b61b8-goog
>

Re: [tip: core/rcu] softirq: Don't try waking ksoftirqd before it has been spawned

2021-04-14 Thread Paul E. McKenney

On Thu, Apr 15, 2021 at 01:54:18AM +0200, Thomas Gleixner wrote:
> Paul,
> 
> On Wed, Apr 14 2021 at 11:11, Paul E. McKenney wrote:
> > On Wed, Apr 14, 2021 at 10:57:57AM +0200, Uladzislau Rezki wrote:
> >> On Wed, Apr 14, 2021 at 09:13:22AM +0200, Sebastian Andrzej Siewior wrote:
> >> At the same time Paul made another patch:
> >> 
> >> softirq: Don't try waking ksoftirqd before it has been spawned
> >> 
> >> it allows us to keep RCU-tasks initialization before even
> >> early_initcall() where it is now and let our rcu-self-test
> >> to be completed without any hanging.
> >
> > In short, this window of time in which it is not possible to reliably
> > wait on a softirq handler has caused trouble, just as several other
> > similar boot-sequence time windows have caused trouble in the past.
> > It therefore makes sense to just eliminate this problem, and prevent
> > future developers from facing inexplicable silent boot-time hangs.
> >
> > We can move the spawning of ksoftirqd kthreads earlier, but that
> > simply narrows the window.  It does not eliminate the problem.
> >
> > I can easily believe that this might have -rt consequences that need
> > attention.  For your amusement, I will make a few guesses as to what
> > these might be:
> >
> > o   Back-of-interrupt softirq handlers degrade real-time response.
> > This should not be a problem this early in boot, and once the
> > ksoftirqd kthreads are spawned, there will never be another
> > back-of-interrupt softirq handler in kernels that have
> > force_irqthreads set, which includes -rt kernels.
> 
> Not a problem obviously.
> 
> > o   That !__this_cpu_read(ksoftirqd) check remains at runtime, even
> > though it always evaluates to false.  I would be surprised if
> > this overhead is measurable at the system level, but if it is,
> > static branches should take care of this.
> 
> Agreed.
> 
> > o   There might be a -rt lockdep check that isn't happy with
> > back-of-interrupt softirq handlers.  But such a lockdep check
> > could be conditioned on __this_cpu_read(ksoftirqd), thus
> > preventing it from firing during that short window at boot time.
> 
> It's not like there are only a handful of lockdep invocations which need
> to be taken care of. The lockdep checks are mostly inside of lock
> operations and if lockdep has recorded back-of-interrupt context once
> during boot it will complain about irqs enabled context usage later on
> no matter what.
> 
> If you can come up with a reasonable implementation of that without
> losing valuable lockdep coverage and without creating a major mess in
> the code then I'm all ears.

My naive thought was something vaguely like this in invoke_softirq():

static inline void invoke_softirq(void)
{
if (ksoftirqd_running(local_softirq_pending()))
return;

if (!force_irqthreads || !__this_cpu_read(ksoftirqd)) {
if (force_irqthreads && !__this_cpu_read(ksoftirqd))
lockdep_off();
#ifdef CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK
/*
 * We can safely execute softirq on the current stack if
 * it is the irq stack, because it should be near empty
 * at this stage.
 */
__do_softirq();
#else
/*
 * Otherwise, irq_exit() is called on the task stack that can
 * be potentially deep already. So call softirq in its own stack
 * to prevent from any overrun.
 */
do_softirq_own_stack();
#endif
if (force_irqthreads && !__this_cpu_read(ksoftirqd))
lockdep_on();
} else {
wakeup_softirqd();
}
}

If I am reading the code correctly (ha!), this prevents locks from being
recorded during that short piece of the boot process, but vanilla kernels
would collect lockdep information during that time as well.

Similar changes would be needed elsewhere, which could easily get into
"mess" territory, and maybe even "major mess" territory.

> But lockdep is just one of the problems
> 
> > o   The -rt kernels might be using locks to implement things like
> > local_bh_disable(), in which case back-of-interrupt softirq
> > handlers could result in self-deadlock.  This could be addressed
> > by disabling bh the old way up to the time that the ksoftirqd
> > kthreads are created.  Because these are created while the system
> > is running on a single CPU (right?), a simple flag (or static
> > branch) could be used to switch this behavior into lock-only
> > mode long before the first real-time application can be spawned.
> 
> That has absolutely nothing to do with the first real-time application
> at all and just looking at the local_bh_disable() part does not cut it
> either.
> 
> The point is that the fundamental assumption of RT to break the non-rt
> semantics of interrupts

Re: [PATCH] block: fix io hung by block throttle

2021-04-14 Thread Junxiao Bi


On 4/14/21 9:11 PM, Hillf Danton wrote:


On Wed, 14 Apr 2021 14:18:30 Junxiao Bi wrote:

There is a race bug which can cause io hung when multiple processes
run parallel in rq_qos_wait().
Let assume there were 4 processes P1/P2/P3/P4, P1/P2 were at the entry
of rq_qos_wait, and P3/P4 were waiting for io done, 2 io were inflight,
the inflight io limit was 2. See race below.

void rq_qos_wait()
{
...
 bool has_sleeper;

 P3/P4 were in sleeper list, has_sleeper was true for both P1 and 
P2.
 has_sleeper = wq_has_sleeper(>wait);
 if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
 return;

 2 inflight io done, P3/P4 were waken up to issue 2 new io.
 2 new io done, no inflight io.

 P1/P2 were added to the sleeper list, 2 entry in the list
 prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);

 P1/P2 were in the sleeper list, has_sleeper was true for P1/P2.
 has_sleeper = !wq_has_single_sleeper(>wait);
 do {
 /* The memory barrier in set_task_state saves us here. */
 if (data.got_token)
 break;
 if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
 finish_wait(>wait, );

 /*
  * We raced with wbt_wake_function() getting a token,
  * which means we now have two. Put our local token
  * and wake anyone else potentially waiting for one.
  */
 smp_rmb();
 if (data.got_token)
 cleanup_cb(rqw, private_data);
 break;
 }

 P1/P2 hung here forever. New io requests will also hung here.
 io_schedule();
 has_sleeper = true;
 set_current_state(TASK_UNINTERRUPTIBLE);
 } while (1);
 finish_wait(>wait, );
}

Cc: sta...@vger.kernel.org
Signed-off-by: Junxiao Bi 
---
  block/blk-rq-qos.c | 9 +++--
  1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 656460636ad3..04d888c99bc0 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -260,19 +260,17 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
.cb = acquire_inflight_cb,
.private_data = private_data,
};
-   bool has_sleeper;
  
-	has_sleeper = wq_has_sleeper(>wait);

-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
+   if (!wq_has_sleeper(>wait)
+   && acquire_inflight_cb(rqw, private_data))
return;
  
  	prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);

-   has_sleeper = !wq_has_single_sleeper(>wait);
do {
/* The memory barrier in set_task_state saves us here. */
if (data.got_token)
break;
-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
+   if (acquire_inflight_cb(rqw, private_data)) {
finish_wait(>wait, );
  
  			/*

@@ -286,7 +284,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
break;
}
io_schedule();
-   has_sleeper = true;
set_current_state(TASK_UNINTERRUPTIBLE);
} while (1);
finish_wait(>wait, );
--
2.24.3 (Apple Git-128)


No wakeup may cause the hang.

--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -287,7 +287,8 @@ void rq_qos_wait(struct rq_wait *rqw, vo
}
io_schedule();
has_sleeper = true;
-   set_current_state(TASK_UNINTERRUPTIBLE);
+   prepare_to_wait_exclusive(>wait, ,
+   TASK_UNINTERRUPTIBLE);


From rq_qos_wake_function(), the process can be waken up and removed 
from the sleeper list only when it get the budget. Looks not necessary 
to re-add it to sleeper list again.


Thanks,

Junxiao.


} while (1);
finish_wait(>wait, );
  }

[PATCH v1] usb: typec: tcpm: Fix error while calculating PPS out values

2021-04-14 Thread Badhri Jagan Sridharan

"usb: typec: tcpm: Address incorrect values of tcpm psy for pps supply"
introduced a regression for req_out_volt and req_op_curr calculation.

req_out_volt should consider the newly calculated max voltage instead
of previously accepted max voltage by the port partner. Likewise,
req_op_curr should consider the newly calculated max current instead
of previously accepted max current by the port partner.

Fixes: e3a072022487 ("usb: typec: tcpm: Address incorrect values of tcpm psy 
for pps supply")
Signed-off-by: Badhri Jagan Sridharan 
---
 drivers/usb/typec/tcpm/tcpm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 1c32bdf62852..04652aa1f54e 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -3132,10 +3132,10 @@ static unsigned int tcpm_pd_select_pps_apdo(struct 
tcpm_port *port)
port->pps_data.req_max_volt = min(pdo_pps_apdo_max_voltage(src),
  
pdo_pps_apdo_max_voltage(snk));
port->pps_data.req_max_curr = min_pps_apdo_current(src, snk);
-   port->pps_data.req_out_volt = min(port->pps_data.max_volt,
- max(port->pps_data.min_volt,
+   port->pps_data.req_out_volt = min(port->pps_data.req_max_volt,
+ 
max(port->pps_data.req_min_volt,
  
port->pps_data.req_out_volt));
-   port->pps_data.req_op_curr = min(port->pps_data.max_curr,
+   port->pps_data.req_op_curr = min(port->pps_data.req_max_curr,
 port->pps_data.req_op_curr);
}
 
-- 
2.31.1.295.g9ea45b61b8-goog

Re: [PATCH] kconfig: redo fake deps at include/config/*.h

2021-04-14 Thread Masahiro Yamada

On Thu, Apr 15, 2021 at 7:01 AM Alexey Dobriyan  wrote:
>
> Make include/config/foo/bar.h fake deps files generation simpler.
>
> * delete .h suffix
> those aren't header files, shorten filenames,
>
> * delete tolower()
> Linux filesystems can deal with both upper and lowercase
> filenames very well,
>
> * put everything in 1 directory
> Presumably 'mkdir -p' split is from dark times when filesystems
> handled huge directories badly, disks were round adding to
> seek times.


I am not sure about the impact of this change
given various file systems in the wild,
but this simplification is attractive.

With a quick search, I found a comment
'performance issues past 10,000' on ext2  [1]
but that may not be what we care about much...

[1]: 
https://webmasters.stackexchange.com/questions/99539/what-is-a-recommended-maximum-number-of-files-in-a-directory-on-your-webserver





> @@ -124,36 +124,12 @@ static void xprintf(const char *format, ...)
> va_end(ap);
>  }
>
> -static void xputchar(int c)
> -{
> -   int ret;
> -
> -   ret = putchar(c);
> -   if (ret == EOF) {
> -   perror("fixdep");
> -   exit(1);
> -   }
> -}
> -
>  /*
>   * Print out a dependency path from a symbol name
>   */
>  static void print_dep(const char *m, int slen, const char *dir)
>  {
> -   int c, prev_c = '/', i;
> -
> -   xprintf("$(wildcard %s/", dir);
> -   for (i = 0; i < slen; i++) {
> -   c = m[i];
> -   if (c == '_')
> -   c = '/';
> -   else
> -   c = tolower(c);
> -   if (c != '/' || prev_c != '/')
> -   xputchar(c);
> -   prev_c = c;
> -   }
> -   xprintf(".h) \\\n");
> +   xprintf("$(wildcard %s/%.*s) \\\n", dir, slen, m);



Since this function now contains just one line,
can you hard-code

xprintf("$(wildcard include/config/%.*s) \\\n", slen, m);

in use_config() ?



>  }
>
>  struct item {
> --- a/scripts/kconfig/confdata.c
> +++ b/scripts/kconfig/confdata.c
> @@ -130,19 +130,14 @@ static size_t depfile_prefix_len;
>  static int conf_touch_dep(const char *name)
>  {
> int fd, ret;
> -   const char *s;
> -   char *d, c;
> +   char *d;
>
> /* check overflow: prefix + name + ".h" + '\0' must fit in buffer. */
> if (depfile_prefix_len + strlen(name) + 3 > sizeof(depfile_path))

Since you dropped the ".h" suffix,
please fix up this line.





Also, you can fix

  # changed, Kconfig touches the corresponding timestamp file
include/config/*.h.

in kernel/gen_kheaders.sh





> return -1;
>
> d = depfile_path + depfile_prefix_len;
> -   s = name;
> -
> -   while ((c = *s++))
> -   *d++ = (c == '_') ? '/' : tolower(c);
> -   strcpy(d, ".h");
> +   strcpy(d, name);
>
> /* Assume directory path already exists. */
> fd = open(depfile_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
> @@ -465,7 +460,7 @@ int conf_read_simple(const char *name, int def)
>  * Reading from 
> include/config/auto.conf
>  * If CONFIG_FOO previously existed in
>  * auto.conf but it is missing now,
> -* include/config/foo.h must be 
> touched.
> +* include/config/FOO must be touched.
>  */
> conf_touch_dep(line + 
> strlen(CONFIG_));
> else



--
Best Regards
Masahiro Yamada

Re: [Ping for Dmitry] Re: [PATCH v5 3/3] iio: adc: add ADC driver for the TI TSC2046 controller

2021-04-14 Thread Dmitry Torokhov

Hi Oleksij, Jonathan,

On Tue, Apr 13, 2021 at 11:31:05AM +0200, Oleksij Rempel wrote:
> Hi Dmitry,
> 
> probably this mail passed under your radar. Can you please add your
> statement here.

Sorry, my bad, I saw "iio" and thought there is nothing for me to
comment on ;)

> 
> On Mon, Mar 29, 2021 at 11:58:26AM +0100, Jonathan Cameron wrote:
> > On Mon, 29 Mar 2021 09:31:31 +0200
> > Oleksij Rempel  wrote:
> > 
> > > Basically the TI TSC2046 touchscreen controller is 8 channel ADC 
> > > optimized for
> > > the touchscreen use case. By implementing it as an IIO ADC device, we can
> > > make use of resistive-adc-touch and iio-hwmon drivers.
> > > 
> > > Polled readings are currently not implemented to keep this patch small, so
> > > iio-hwmon will not work out of the box for now.
> > > 
> > > So far, this driver was tested with a custom version of 
> > > resistive-adc-touch driver,
> > > since it needs to be extended to make use of Z1 and Z2 channels. The X/Y
> > > are working without additional changes.
> > > 
> > > Signed-off-by: Oleksij Rempel 
> > > Reviewed-by: Andy Shevchenko 
> > Hi Oleksij,
> > 
> > Couple of things in here I missed before, but big question is still whether
> > Dmitry is happy with what you mention in the cover letter:
> > 
> > "This driver can replace drivers/input/touchscreen/ads7846.c and has
> > following advantages over it:
> > - less code to maintain
> > - shared code paths (resistive-adc-touch, iio-hwmon, etc)
> > - can be used as plain IIO ADC to investigate signaling issues or test
> >   real capacity of the plates and attached low-pass filters
> >   (or use the touchscreen as a microphone if you like ;) )"

I am all for code unification and reuse, so please go ahead. If there
are regressions we can re-evaluate and see if they can be addressed in
this driver or if we need to resurrect ads7846.

Thanks.

-- 
Dmitry

Re: [PATCH v5 1/3] riscv: Move kernel mapping outside of linear mapping

2021-04-14 Thread Alex Ghiti


Le 4/15/21 à 12:20 AM, Palmer Dabbelt a écrit :

On Sun, 11 Apr 2021 09:41:44 PDT (-0700), a...@ghiti.fr wrote:

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could 
use

the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear 
mapping,
two different virtual addresses cannot point to the same physical 
address,
the kernel mapping needs to lie outside the linear mapping so that we 
don't

have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann 
Signed-off-by: Alexandre Ghiti 
---
 arch/riscv/boot/loader.lds.S    |  3 +-
 arch/riscv/include/asm/page.h   | 17 +-
 arch/riscv/include/asm/pgtable.h    | 37 
 arch/riscv/include/asm/set_memory.h |  1 +
 arch/riscv/kernel/head.S    |  3 +-
 arch/riscv/kernel/module.c  |  6 +-
 arch/riscv/kernel/setup.c   |  5 ++
 arch/riscv/kernel/vmlinux.lds.S |  3 +-
 arch/riscv/mm/fault.c   | 13 +
 arch/riscv/mm/init.c    | 87 ++---
 arch/riscv/mm/kasan_init.c  |  9 +++
 arch/riscv/mm/physaddr.c    |  2 +-
 12 files changed, 146 insertions(+), 40 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */

 #include 
+#include 

 OUTPUT_ARCH(riscv)
 ENTRY(_start)

 SECTIONS
 {
-    . = PAGE_OFFSET;
+    . = KERNEL_LINK_ADDR;

 .payload : {
 *(.payload)
diff --git a/arch/riscv/include/asm/page.h 
b/arch/riscv/include/asm/page.h

index adc9d26f3d75..22cfb2be60dc 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,15 +90,28 @@ typedef struct page *pgtable_t;

 #ifdef CONFIG_MMU
 extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
 extern unsigned long pfn_base;
 #define ARCH_PFN_OFFSET    (pfn_base)
 #else
 #define va_pa_offset    0
+#define va_kernel_pa_offset    0
 #define ARCH_PFN_OFFSET    (PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */

-#define __pa_to_va_nodebug(x)    ((void *)((unsigned long) (x) + 
va_pa_offset))

-#define __va_to_pa_nodebug(x)    ((unsigned long)(x) - va_pa_offset)
+extern unsigned long kernel_virt_addr;
+
+#define linear_mapping_pa_to_va(x)    ((void *)((unsigned long)(x) + 
va_pa_offset))
+#define kernel_mapping_pa_to_va(x)    ((void *)((unsigned long)(x) + 
va_kernel_pa_offset))

+#define __pa_to_va_nodebug(x)    linear_mapping_pa_to_va(x)
+
+#define linear_mapping_va_to_pa(x)    ((unsigned long)(x) - 
va_pa_offset)
+#define kernel_mapping_va_to_pa(x)    ((unsigned long)(x) - 
va_kernel_pa_offset)

+#define __va_to_pa_nodebug(x)    ({    \
+    unsigned long _x = x;    \
+    (_x < kernel_virt_addr) ?    \
+    linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x);    \
+    })

 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h 
b/arch/riscv/include/asm/pgtable.h

index ebf817c1bdf4..80e63a93e903 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,30 @@

 #include 

-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include 
-#include 
-#include 
-#include 
+#ifndef CONFIG_MMU
+#define KERNEL_LINK_ADDR    PAGE_OFFSET
+#else

-#ifdef CONFIG_MMU
+#define ADDRESS_SPACE_END    (UL(-1))
+/*
+ * Leave 2GB for kernel and BPF at the end of the address space
+ */
+#define KERNEL_LINK_ADDR    (ADDRESS_SPACE_END - SZ_2G + 1)

 #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END  (PAGE_OFFSET - 1)
 #define VMALLOC_START    (PAGE_OFFSET - VMALLOC_SIZE)

+/* KASLR should leave at least 128MB for BPF after the kernel */
 #define BPF_JIT_REGION_SIZE    (SZ_128M)
-#define BPF_JIT_REGION_START    (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END    (VMALLOC_END)
+#define BPF_JIT_REGION_START    PFN_ALIGN((unsigned long)&_end)
+#define BPF_JIT_REGION_END    (BPF_JIT_REGION_START + 
BPF_JIT_REGION_SIZE)

+
+/* Modules always live before the kernel */
+#ifdef CONFIG_64BIT
+#define MODULES_VADDR    (PFN_ALIGN((unsigned long)&_end) - SZ_2G)

Re: Candidate Linux ABI for Intel AMX and hypothetical new related features

2021-04-14 Thread Borislav Petkov

On Wed, Apr 14, 2021 at 05:57:22PM -0400, Len Brown wrote:
> I'm pretty sure that the "it isn't my use case of interest, so it
> doesn't matter" line of reasoning has long been established as -EINVAL
> ;-)

I have only a very faint idea what you're trying to say here. Please
explain properly and more verbosely what exactly has been established
where?

Thx.

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH] mm: Define ARCH_HAS_FIRST_USER_ADDRESS

2021-04-14 Thread Anshuman Khandual

On 4/14/21 11:40 AM, Christophe Leroy wrote:
> 
> 
> Le 14/04/2021 à 07:59, Anshuman Khandual a écrit :
>>
>>
>> On 4/14/21 10:52 AM, Christophe Leroy wrote:
>>>
>>>
>>> Le 14/04/2021 à 04:54, Anshuman Khandual a écrit :
 Currently most platforms define FIRST_USER_ADDRESS as 0UL duplicating the
 same code all over. Instead define a new option ARCH_HAS_FIRST_USER_ADDRESS
 for those platforms which would override generic default FIRST_USER_ADDRESS
 value 0UL. This makes it much cleaner with reduced code.

 Cc: linux-al...@vger.kernel.org
 Cc: linux-snps-...@lists.infradead.org
 Cc: linux-arm-ker...@lists.infradead.org
 Cc: linux-c...@vger.kernel.org
 Cc: linux-hexa...@vger.kernel.org
 Cc: linux-i...@vger.kernel.org
 Cc: linux-m...@lists.linux-m68k.org
 Cc: linux-m...@vger.kernel.org
 Cc: openr...@lists.librecores.org
 Cc: linux-par...@vger.kernel.org
 Cc: linuxppc-...@lists.ozlabs.org
 Cc: linux-ri...@lists.infradead.org
 Cc: linux-s...@vger.kernel.org
 Cc: linux...@vger.kernel.org
 Cc: sparcli...@vger.kernel.org
 Cc: linux...@lists.infradead.org
 Cc: linux-xte...@linux-xtensa.org
 Cc: x...@kernel.org
 Cc: linux...@kvack.org
 Cc: linux-kernel@vger.kernel.org
 Signed-off-by: Anshuman Khandual 
 ---
    arch/alpha/include/asm/pgtable.h | 1 -
    arch/arc/include/asm/pgtable.h   | 6 --
    arch/arm/Kconfig | 1 +
    arch/arm64/include/asm/pgtable.h | 2 --
    arch/csky/include/asm/pgtable.h  | 1 -
    arch/hexagon/include/asm/pgtable.h   | 3 ---
    arch/ia64/include/asm/pgtable.h  | 1 -
    arch/m68k/include/asm/pgtable_mm.h   | 1 -
    arch/microblaze/include/asm/pgtable.h    | 2 --
    arch/mips/include/asm/pgtable-32.h   | 1 -
    arch/mips/include/asm/pgtable-64.h   | 1 -
    arch/nds32/Kconfig   | 1 +
    arch/nios2/include/asm/pgtable.h | 2 --
    arch/openrisc/include/asm/pgtable.h  | 1 -
    arch/parisc/include/asm/pgtable.h    | 2 --
    arch/powerpc/include/asm/book3s/pgtable.h    | 1 -
    arch/powerpc/include/asm/nohash/32/pgtable.h | 1 -
    arch/powerpc/include/asm/nohash/64/pgtable.h | 2 --
    arch/riscv/include/asm/pgtable.h | 2 --
    arch/s390/include/asm/pgtable.h  | 2 --
    arch/sh/include/asm/pgtable.h    | 2 --
    arch/sparc/include/asm/pgtable_32.h  | 1 -
    arch/sparc/include/asm/pgtable_64.h  | 3 ---
    arch/um/include/asm/pgtable-2level.h | 1 -
    arch/um/include/asm/pgtable-3level.h | 1 -
    arch/x86/include/asm/pgtable_types.h | 2 --
    arch/xtensa/include/asm/pgtable.h    | 1 -
    include/linux/mm.h   | 4 
    mm/Kconfig   | 4 
    29 files changed, 10 insertions(+), 43 deletions(-)

 diff --git a/include/linux/mm.h b/include/linux/mm.h
 index 8ba434287387..47098ccd715e 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -46,6 +46,10 @@ extern int sysctl_page_lock_unfairness;
      void init_mm_internals(void);
    +#ifndef ARCH_HAS_FIRST_USER_ADDRESS
>>>
>>> I guess you didn't test it . :)
>>
>> In fact I did :) Though just booted it on arm64 and cross compiled on
>> multiple others platforms.

I guess for all platforms, ARCH_HAS_FIRST_USER_ADDRESS would have just
evaluated to be false hence falling back on the generic definition. So
this never complained during build any where or during boot on arm64.

>>
>>>
>>> should be #ifndef CONFIG_ARCH_HAS_FIRST_USER_ADDRESS
>>
>> Right, meant that instead.
>>
>>>
 +#define FIRST_USER_ADDRESS    0UL
 +#endif
>>>
>>> But why do we need a config option at all for that ?
>>>
>>> Why not just:
>>>
>>> #ifndef FIRST_USER_ADDRESS
>>> #define FIRST_USER_ADDRESS    0UL
>>> #endif
>>
>> This sounds simpler. But just wondering, would not there be any possibility
>> of build problems due to compilation sequence between arch and generic code ?
>>
> 
> For sure it has to be addresses carefully, but there are already a lot of 
> stuff like that around pgtables.h
> 
> For instance, pte_offset_kernel() has a generic definition in 
> linux/pgtables.h based on whether it is already defined or not.
> 
> Taking into account that FIRST_USER_ADDRESS is today in the architectures's 
> asm/pgtables.h, I think putting the fallback definition in linux/pgtable.h 
> would do the trick.

Agreed,  includes  at the beginning and
if the arch defines FIRST_USER_ADDRESS, the generic one afterwards would
be skipped. The following change builds on multiple platforms.

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ad086e6d7155..5da96f5df48f 100644
---

Re: [PATCH 1/5] mm/swapfile: add percpu_ref support for swap

2021-04-14 Thread Dennis Zhou

On Thu, Apr 15, 2021 at 11:16:42AM +0800, Miaohe Lin wrote:
> On 2021/4/14 22:53, Dennis Zhou wrote:
> > On Wed, Apr 14, 2021 at 01:44:58PM +0800, Huang, Ying wrote:
> >> Dennis Zhou  writes:
> >>
> >>> On Wed, Apr 14, 2021 at 11:59:03AM +0800, Huang, Ying wrote:
>  Dennis Zhou  writes:
> 
> > Hello,
> >
> > On Wed, Apr 14, 2021 at 10:06:48AM +0800, Huang, Ying wrote:
> >> Miaohe Lin  writes:
> >>
> >>> On 2021/4/14 9:17, Huang, Ying wrote:
>  Miaohe Lin  writes:
> 
> > On 2021/4/12 15:24, Huang, Ying wrote:
> >> "Huang, Ying"  writes:
> >>
> >>> Miaohe Lin  writes:
> >>>
>  We will use percpu-refcount to serialize against concurrent 
>  swapoff. This
>  patch adds the percpu_ref support for later fixup.
> 
>  Signed-off-by: Miaohe Lin 
>  ---
>   include/linux/swap.h |  2 ++
>   mm/swapfile.c| 25 ++---
>   2 files changed, 24 insertions(+), 3 deletions(-)
> 
>  diff --git a/include/linux/swap.h b/include/linux/swap.h
>  index 144727041e78..849ba5265c11 100644
>  --- a/include/linux/swap.h
>  +++ b/include/linux/swap.h
>  @@ -240,6 +240,7 @@ struct swap_cluster_list {
>    * The in-memory structure used to track swap areas.
>    */
>   struct swap_info_struct {
>  +struct percpu_ref users;/* serialization 
>  against concurrent swapoff */
>   unsigned long   flags;  /* SWP_USED etc: see 
>  above */
>   signed shortprio;   /* swap priority of 
>  this type */
>   struct plist_node list; /* entry in 
>  swap_active_head */
>  @@ -260,6 +261,7 @@ struct swap_info_struct {
>   struct block_device *bdev;  /* swap device or bdev 
>  of swap file */
>   struct file *swap_file; /* seldom referenced */
>   unsigned int old_block_size;/* seldom referenced */
>  +struct completion comp; /* seldom referenced */
>   #ifdef CONFIG_FRONTSWAP
>   unsigned long *frontswap_map;   /* frontswap in-use, 
>  one bit per page */
>   atomic_t frontswap_pages;   /* frontswap pages 
>  in-use counter */
>  diff --git a/mm/swapfile.c b/mm/swapfile.c
>  index 149e77454e3c..724173cd7d0c 100644
>  --- a/mm/swapfile.c
>  +++ b/mm/swapfile.c
>  @@ -39,6 +39,7 @@
>   #include 
>   #include 
>   #include 
>  +#include 
>   
>   #include 
>   #include 
>  @@ -511,6 +512,15 @@ static void swap_discard_work(struct 
>  work_struct *work)
>   spin_unlock(>lock);
>   }
>   
>  +static void swap_users_ref_free(struct percpu_ref *ref)
>  +{
>  +struct swap_info_struct *si;
>  +
>  +si = container_of(ref, struct swap_info_struct, users);
>  +complete(>comp);
>  +percpu_ref_exit(>users);
> >>>
> >>> Because percpu_ref_exit() is used, we cannot use 
> >>> percpu_ref_tryget() in
> >>> get_swap_device(), better to add comments there.
> >>
> >> I just noticed that the comments of percpu_ref_tryget_live() says,
> >>
> >>  * This function is safe to call as long as @ref is between init 
> >> and exit.
> >>
> >> While we need to call get_swap_device() almost at any time, so it's
> >> better to avoid to call percpu_ref_exit() at all.  This will waste 
> >> some
> >> memory, but we need to follow the API definition to avoid potential
> >> issues in the long term.
> >
> > I have to admit that I'am not really familiar with percpu_ref. So I 
> > read the
> > implementation code of the percpu_ref and found 
> > percpu_ref_tryget_live() could
> > be called after exit now. But you're right we need to follow the 
> > API definition
> > to avoid potential issues in the long term.
> >
> >>
> >> And we need to call percpu_ref_init() before insert the 
> >> swap_info_struct
> >> into the swap_info[].
> >
> > If we remove the call to percpu_ref_exit(), we should not use 
> > percpu_ref_init()
> > here because *percpu_ref->data is assumed to be NULL* in 
> > percpu_ref_init() while
> > this

Re: [PATCH v5 1/3] riscv: Move kernel mapping outside of linear mapping

2021-04-14 Thread Palmer Dabbelt


On Sun, 11 Apr 2021 09:41:44 PDT (-0700), a...@ghiti.fr wrote:

This is a preparatory patch for relocatable kernel and sv48 support.

The kernel used to be linked at PAGE_OFFSET address therefore we could use
the linear mapping for the kernel mapping. But the relocated kernel base
address will be different from PAGE_OFFSET and since in the linear mapping,
two different virtual addresses cannot point to the same physical address,
the kernel mapping needs to lie outside the linear mapping so that we don't
have to copy it at the same physical offset.

The kernel mapping is moved to the last 2GB of the address space, BPF
is now always after the kernel and modules use the 2GB memory range right
before the kernel, so BPF and modules regions do not overlap. KASLR
implementation will simply have to move the kernel in the last 2GB range
and just take care of leaving enough space for BPF.

In addition, by moving the kernel to the end of the address space, both
sv39 and sv48 kernels will be exactly the same without needing to be
relocated at runtime.

Suggested-by: Arnd Bergmann 
Signed-off-by: Alexandre Ghiti 
---
 arch/riscv/boot/loader.lds.S|  3 +-
 arch/riscv/include/asm/page.h   | 17 +-
 arch/riscv/include/asm/pgtable.h| 37 
 arch/riscv/include/asm/set_memory.h |  1 +
 arch/riscv/kernel/head.S|  3 +-
 arch/riscv/kernel/module.c  |  6 +-
 arch/riscv/kernel/setup.c   |  5 ++
 arch/riscv/kernel/vmlinux.lds.S |  3 +-
 arch/riscv/mm/fault.c   | 13 +
 arch/riscv/mm/init.c| 87 ++---
 arch/riscv/mm/kasan_init.c  |  9 +++
 arch/riscv/mm/physaddr.c|  2 +-
 12 files changed, 146 insertions(+), 40 deletions(-)

diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
index 47a5003c2e28..62d94696a19c 100644
--- a/arch/riscv/boot/loader.lds.S
+++ b/arch/riscv/boot/loader.lds.S
@@ -1,13 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */

 #include 
+#include 

 OUTPUT_ARCH(riscv)
 ENTRY(_start)

 SECTIONS
 {
-   . = PAGE_OFFSET;
+   . = KERNEL_LINK_ADDR;

.payload : {
*(.payload)
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index adc9d26f3d75..22cfb2be60dc 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -90,15 +90,28 @@ typedef struct page *pgtable_t;

 #ifdef CONFIG_MMU
 extern unsigned long va_pa_offset;
+extern unsigned long va_kernel_pa_offset;
 extern unsigned long pfn_base;
 #define ARCH_PFN_OFFSET(pfn_base)
 #else
 #define va_pa_offset   0
+#define va_kernel_pa_offset0
 #define ARCH_PFN_OFFSET(PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */

-#define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
+extern unsigned long kernel_virt_addr;
+
+#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + 
va_pa_offset))
+#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + 
va_kernel_pa_offset))
+#define __pa_to_va_nodebug(x)  linear_mapping_pa_to_va(x)
+
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
+#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - 
va_kernel_pa_offset)
+#define __va_to_pa_nodebug(x)  ({  
\
+   unsigned long _x = x;   
\
+   (_x < kernel_virt_addr) ?\
+   linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x);  
\
+   })

 #ifdef CONFIG_DEBUG_VIRTUAL
 extern phys_addr_t __virt_to_phys(unsigned long x);
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index ebf817c1bdf4..80e63a93e903 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -11,23 +11,30 @@

 #include 

-#ifndef __ASSEMBLY__
-
-/* Page Upper Directory not used in RISC-V */
-#include 
-#include 
-#include 
-#include 
+#ifndef CONFIG_MMU
+#define KERNEL_LINK_ADDR   PAGE_OFFSET
+#else

-#ifdef CONFIG_MMU
+#define ADDRESS_SPACE_END  (UL(-1))
+/*
+ * Leave 2GB for kernel and BPF at the end of the address space
+ */
+#define KERNEL_LINK_ADDR   (ADDRESS_SPACE_END - SZ_2G + 1)

 #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
 #define VMALLOC_END  (PAGE_OFFSET - 1)
 #define VMALLOC_START(PAGE_OFFSET - VMALLOC_SIZE)

+/* KASLR should leave at least 128MB for BPF after the kernel */
 #define BPF_JIT_REGION_SIZE(SZ_128M)
-#define BPF_JIT_REGION_START   (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
-#define BPF_JIT_REGION_END (VMALLOC_END)
+#define BPF_JIT_REGION_START   PFN_ALIGN((unsigned long)&_end)
+#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+
+/* Modules always live before the kernel */
+#ifdef CONFIG_64BIT
+#define

Re: [PATCH v3 2/4] mfd: simple-mfd-i2c: Adds Ampere's Altra SMpro support

2021-04-14 Thread Quan Nguyen


On 14/04/2021 19:21, Guenter Roeck wrote:

On Fri, Apr 09, 2021 at 10:13:30AM +0700, Quan Nguyen wrote:

Adds an MFD driver for SMpro found on the Mt.Jade hardware reference
platform with Ampere's Altra processor family.

Signed-off-by: Quan Nguyen 
Reported-by: kernel test robot 
---
  drivers/mfd/Kconfig  | 10 ++
  drivers/mfd/simple-mfd-i2c.c |  6 ++
  2 files changed, 16 insertions(+)

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index d07e8cf93286..f7a6460f7aa0 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -77,6 +77,16 @@ config MFD_AS3711
help
  Support for the AS3711 PMIC from AMS
  
+config MFD_SMPRO

+   tristate "Ampere Computing MFD SMpro core driver"
+   select MFD_SIMPLE_MFD_I2C


This is missing "depends on I2C".


Thanks Guenter to help pointing this out.
This fixes the issue reported by kernel test robot. Will include this in 
next version.



+   help
+ Say yes here to enable SMpro driver support for Ampere's Altra
+ processor family.
+
+ Ampere's Altra SMpro exposes an I2C regmap interface that can
+ be accessed by child devices.
+
  config MFD_AS3722
tristate "ams AS3722 Power Management IC"
select MFD_CORE
diff --git a/drivers/mfd/simple-mfd-i2c.c b/drivers/mfd/simple-mfd-i2c.c
index 87f684cff9a1..9a44655f5592 100644
--- a/drivers/mfd/simple-mfd-i2c.c
+++ b/drivers/mfd/simple-mfd-i2c.c
@@ -21,6 +21,11 @@ static const struct regmap_config simple_regmap_config = {
.val_bits = 8,
  };
  
+static const struct regmap_config simple_word_regmap_config = {

+   .reg_bits = 8,
+   .val_bits = 16,
+};
+
  static int simple_mfd_i2c_probe(struct i2c_client *i2c)
  {
const struct regmap_config *config;
@@ -39,6 +44,7 @@ static int simple_mfd_i2c_probe(struct i2c_client *i2c)
  
  static const struct of_device_id simple_mfd_i2c_of_match[] = {

{ .compatible = "kontron,sl28cpld" },
+   { .compatible = "ampere,smpro", .data = _word_regmap_config },
{}
  };
  MODULE_DEVICE_TABLE(of, simple_mfd_i2c_of_match);

Re: [PATCH v3 3/4] hwmon: smpro: Add Ampere's Altra smpro-hwmon driver

2021-04-14 Thread Quan Nguyen


On 14/04/2021 19:33, Guenter Roeck wrote:

On Fri, Apr 09, 2021 at 10:13:31AM +0700, Quan Nguyen wrote:

This commit adds support for Ampere SMpro hwmon driver. This driver
supports accessing various CPU sensors provided by the SMpro co-processor
including temperature, power, voltages, and current.

Signed-off-by: Quan Nguyen 
---


Change log goes here. You are making it difficult to review your patches.


Will add changelog here in next version.


  drivers/hwmon/Kconfig   |   8 +
  drivers/hwmon/Makefile  |   1 +
  drivers/hwmon/smpro-hwmon.c | 491 
  3 files changed, 500 insertions(+)
  create mode 100644 drivers/hwmon/smpro-hwmon.c

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0ddc974b102e..ba4b5a911baf 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -67,6 +67,14 @@ config SENSORS_ABITUGURU3
  This driver can also be built as a module. If so, the module
  will be called abituguru3.
  
+config SENSORS_SMPRO

+   tristate "Ampere's Altra SMpro hardware monitoring driver"
+   depends on MFD_SMPRO
+   help
+ If you say yes here you get support for the thermal, voltage,
+ current and power sensors of Ampere's Altra processor family SoC
+ with SMpro co-processor.
+
  config SENSORS_AD7314
tristate "Analog Devices AD7314 and compatibles"
depends on SPI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 59e78bc212cf..b25391f9c651 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -174,6 +174,7 @@ obj-$(CONFIG_SENSORS_SHT3x) += sht3x.o
  obj-$(CONFIG_SENSORS_SHTC1)   += shtc1.o
  obj-$(CONFIG_SENSORS_SIS5595) += sis5595.o
  obj-$(CONFIG_SENSORS_SMM665)  += smm665.o
+obj-$(CONFIG_SENSORS_SMPRO)+= smpro-hwmon.o
  obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
  obj-$(CONFIG_SENSORS_SMSC47M1)+= smsc47m1.o
  obj-$(CONFIG_SENSORS_SMSC47M192)+= smsc47m192.o
diff --git a/drivers/hwmon/smpro-hwmon.c b/drivers/hwmon/smpro-hwmon.c
new file mode 100644
index ..a3389fcbad82
--- /dev/null
+++ b/drivers/hwmon/smpro-hwmon.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ampere Computing SoC's SMPro Hardware Monitoring Driver
+ *
+ * Copyright (c) 2021, Ampere Computing LLC
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Identification Registers */
+#define MANUFACTURER_ID_REG0x02
+#define AMPERE_MANUFACTURER_ID 0xCD3A
+
+/* Logical Power Sensor Registers */
+#define SOC_TEMP   0x00
+#define SOC_VRD_TEMP   0x01
+#define DIMM_VRD_TEMP  0x02
+#define CORE_VRD_TEMP  0x03
+#define CH0_DIMM_TEMP  0x04
+#define CH1_DIMM_TEMP  0x05
+#define CH2_DIMM_TEMP  0x06
+#define CH3_DIMM_TEMP  0x07
+#define CH4_DIMM_TEMP  0x08
+#define CH5_DIMM_TEMP  0x09
+#define CH6_DIMM_TEMP  0x0A
+#define CH7_DIMM_TEMP  0x0B
+#define RCA_VRD_TEMP   0x0C
+
+#define CORE_VRD_PWR   0x10
+#define SOC_PWR0x11
+#define DIMM_VRD1_PWR  0x12
+#define DIMM_VRD2_PWR  0x13
+#define CORE_VRD_PWR_MW0x16
+#define SOC_PWR_MW 0x17
+#define DIMM_VRD1_PWR_MW   0x18
+#define DIMM_VRD2_PWR_MW   0x19
+#define RCA_VRD_PWR0x1A
+#define RCA_VRD_PWR_MW 0x1B
+
+#define MEM_HOT_THRESHOLD  0x22
+#define SOC_VR_HOT_THRESHOLD   0x23
+#define CORE_VRD_VOLT  0x24
+#define SOC_VRD_VOLT   0x25
+#define DIMM_VRD1_VOLT 0x26
+#define DIMM_VRD2_VOLT 0x27
+#define RCA_VRD_VOLT   0x28
+
+#define CORE_VRD_CURR  0x29
+#define SOC_VRD_CURR   0x2A
+#define DIMM_VRD1_CURR 0x2B
+#define DIMM_VRD2_CURR 0x2C
+#define RCA_VRD_CURR   0x2D
+
+struct smpro_hwmon {
+   struct regmap *regmap;
+   u32 offset;
+};
+
+struct smpro_sensor {
+   const u8 reg;
+   const u8 reg_ext;
+   const char *label;
+};
+
+static const struct smpro_sensor temperature[] = {
+   {
+   .reg = SOC_TEMP,
+   .label = "temp1 SoC"
+   },
+   {
+   .reg = SOC_VRD_TEMP,
+   .reg_ext = SOC_VR_HOT_THRESHOLD,
+   .label = "temp2 SoC VRD"
+   },
+   {
+   .reg = DIMM_VRD_TEMP,
+   .label = "temp3 DIMM VRD"
+   },
+   {
+   .reg = CORE_VRD_TEMP,
+   .label = "temp4 CORE VRD"
+   },
+   {
+   .reg = CH0_DIMM_TEMP,
+   .reg_ext = MEM_HOT_THRESHOLD,
+   .label = "temp5 CH0 DIMM"
+   },
+   {
+   .reg = CH1_DIMM_TEMP,
+   .reg_ext = MEM_HOT_THRESHOLD,
+   .label = "temp6 CH1 DIMM"
+   },
+   {
+   .reg = CH2_DIMM_TEMP,
+   .reg_ext = MEM_HOT_THRESHOLD,
+   .label = "temp7

Re: [PATCH 02/10] mm/numa: automatically generate node migration order

2021-04-14 Thread Wei Xu

On Wed, Apr 14, 2021 at 1:08 AM Oscar Salvador  wrote:
>
> Hi Wei Xu,
>
> I have some questions about it
>
> Fast class/memory are pictured as those nodes with CPUs, while Slow 
> class/memory
> are PMEM, right?
> Then, what stands for medium class/memory?

That is Dave's example.  I think David's guess makes sense (HBM - fast, DRAM -
medium, PMEM - slow).  It may also be possible that we have DDR5 as fast,
CXL-DDR4 as medium, and CXL-PMEM as slow.  But the most likely use cases for
now should be just two tiers: DRAM vs PMEM or other types of slower
memory devices.

> In Dave's example, list is created in a way that stays local to the socket,
> and we go from the fast one to the slow one.
> In yours, lists are created taking the fastest nodes from all sockets and
> we work our way down, which means have cross-socket nodes in the list.
> How much of a penalty is that?

Cross-socket demotion is certainly more expensive.  But because it is
sequential access
and can also be optimized with non-temporal stores, it may not be much
slower than
demotion to a local node in the next tier.  The actual penalty will
depend on the devices.

> And while I get your point, I am not sure if that is what we pretend here.
> This patchset aims to place cold pages that are about to be reclaim in slower
> nodes to give them a second chance, while your design seems more to have kind
> of different memory clases and be able to place applications in one of those 
> tiers
> depending on its demands or sysadmin-demand.
>
> Could you expand some more?

Sure.  What I have described has the same goal as Dave's patchset,
i,e, to demote
cold pages to the slower nodes when they are about to be reclaimed.  The only
difference is that in my suggestion the demotion target of a fast tier
node is expanded
from a single node to a set of nodes from the slow tier and one node
in such a set
can be marked as the preferred/local demotion target.   This can help
enable more
flexible demotion policies to be configured, such as to allow a cgroup
to allocate from
all fast tier nodes, but only demote to a local slow tier node.  Such
a policy can reduce
memory stranding at the fast tier (compared to if memory hardwall is
used) and still
allow demotion from all fast tier nodes without incurring the expensive random
accesses to the demoted pages if they were demoted to remote slow tier nodes.

I understand that Dave started this patchset with a simplified
demotion path definition,
which I agree.  Meanwhile, I think this more generalized definition of
demotion path
is useful and can also be important for some use cases.

[PATCH v3 2/4] dt-bindings: arm: imx: Add i.mx6q DaSheng COM-9XX SBC

2021-04-14 Thread dillon . minfei

From: dillon min 

The DaSheng Com-9xx is and ARM based signle board computer (SBC)
featuring:
- i.MX6Q
- 2GiB LPDDR3 DRAM
- 8GiB eMMC 5.0 FLASH
- 4MiB SPI Flash
- USB 2.0 Host/Device
- Multiple multi-protocol RS232/RS485 Serial ports
- microSD socket
- 5V DC power input
- HDMI1.4a,1080p@60
- RGMIIx1 Gigabit Ethernet
- CSI0x1, connect with ov2659

Signed-off-by: dillon min 
Cc: Krzysztof Kozlowski 
---
v3: move 'ds,imx6q-sbc' after 'dmo,imx6q-edmqmx6' to follow the alphabetical
order.

 Documentation/devicetree/bindings/arm/fsl.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml 
b/Documentation/devicetree/bindings/arm/fsl.yaml
index 297c87f45db8..598fb44c1bb8 100644
--- a/Documentation/devicetree/bindings/arm/fsl.yaml
+++ b/Documentation/devicetree/bindings/arm/fsl.yaml
@@ -197,6 +197,7 @@ properties:
   - boundary,imx6q-nitrogen6x
   - compulab,cm-fx6   # CompuLab CM-FX6
   - dmo,imx6q-edmqmx6 # Data Modul eDM-QMX6 Board
+  - ds,imx6q-sbc  # Da Sheng COM-9XX Modules
   - embest,imx6q-marsboard# Embest MarS Board i.MX6Dual
   - emtrion,emcon-mx6 # emCON-MX6D or emCON-MX6Q SoM
   - emtrion,emcon-mx6-avari   # emCON-MX6D or emCON-MX6Q SoM on 
Avari Base
-- 
2.7.4

[PATCH v3 3/4] arm: dts: imx: Add i.mx6q DaSheng COM-9XX SBC board support

2021-04-14 Thread dillon . minfei

From: dillon min 

The DaSheng Com-9xx is and ARM based signle board computer (SBC)
featuring:
- i.MX6Q
- 2GiB LPDDR3 DRAM
- 8GiB eMMC 5.0 FLASH
- 4MiB SPI Flash
- USB 2.0 Host/Device
- Multiple multi-protocol RS232/RS485 Serial ports
- microSD socket
- 5V DC power input
- HDMI1.4a,1080p@60
- RGMIIx1 Gigabit Ethernet
- CSI0x1, connect with ov2659

Signed-off-by: dillon min 
---
v3: move imx6q-ds.dtb after imx6q-dms-ba16.dtb to follow the alphabetical order

 arch/arm/boot/dts/Makefile|   1 +
 arch/arm/boot/dts/imx6q-ds.dts|  17 ++
 arch/arm/boot/dts/imx6qdl-ds.dtsi | 465 ++
 3 files changed, 483 insertions(+)
 create mode 100644 arch/arm/boot/dts/imx6q-ds.dts
 create mode 100644 arch/arm/boot/dts/imx6qdl-ds.dtsi

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index a19c5ab9df84..425fe17ef7c1 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -510,6 +510,7 @@ dtb-$(CONFIG_SOC_IMX6Q) += \
imx6q-display5-tianma-tm070-1280x768.dtb \
imx6q-dmo-edmqmx6.dtb \
imx6q-dms-ba16.dtb \
+   imx6q-ds.dtb \
imx6q-emcon-avari.dtb \
imx6q-evi.dtb \
imx6q-gk802.dtb \
diff --git a/arch/arm/boot/dts/imx6q-ds.dts b/arch/arm/boot/dts/imx6q-ds.dts
new file mode 100644
index ..b0a63a133977
--- /dev/null
+++ b/arch/arm/boot/dts/imx6q-ds.dts
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2021 Dillon Min 
+//
+// Based on imx6qdl-sabresd.dtsi which is:
+// Copyright 2012 Freescale Semiconductor, Inc.
+// Copyright 2011 Linaro Ltd.
+
+/dts-v1/;
+
+#include "imx6q.dtsi"
+#include "imx6qdl-ds.dtsi"
+
+/ {
+   model = "DaSheng i.MX6 Quad Com-9xx Board";
+   compatible = "ds,imx6q-sbc", "fsl,imx6q";
+};
diff --git a/arch/arm/boot/dts/imx6qdl-ds.dtsi 
b/arch/arm/boot/dts/imx6qdl-ds.dtsi
new file mode 100644
index ..d28e065349cd
--- /dev/null
+++ b/arch/arm/boot/dts/imx6qdl-ds.dtsi
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2021 Dillon Min 
+//
+// Based on imx6qdl-sabresd.dtsi which is:
+// Copyright 2012 Freescale Semiconductor, Inc.
+// Copyright 2011 Linaro Ltd.
+
+#include 
+#include 
+#include 
+
+/ {
+   chosen {
+   stdout-path = 
+   };
+
+   memory@1000 {
+   device_type = "memory";
+   reg = <0x1000 0x8000>;
+   };
+
+   reg_usb_otg_vbus: regulator-usb-otg-vbus {
+   compatible = "regulator-fixed";
+   regulator-name = "usb_otg_vbus";
+   regulator-min-microvolt = <500>;
+   regulator-max-microvolt = <500>;
+   regulator-always-on;
+   };
+
+   reg_usb_h1_vbus: regulator-usb-h1-vbus {
+   compatible = "regulator-fixed";
+   regulator-name = "usb_h1_vbus";
+   regulator-min-microvolt = <500>;
+   regulator-max-microvolt = <500>;
+   regulator-always-on;
+   };
+
+   leds {
+   compatible = "gpio-leds";
+   pinctrl-names = "default";
+   pinctrl-0 = <_gpio_leds>;
+
+   green {
+   gpios = < 8 0>;
+   default-state = "on";
+   linux,default-trigger = "heartbeat";
+   };
+   };
+};
+
+_csi0_from_ipu1_csi0_mux {
+   bus-width = <8>;
+   data-shift = <12>; /* Lines 19:12 used */
+   hsync-active = <1>;
+   vsync-active = <1>;
+};
+
+_csi0_mux_from_parallel_sensor {
+   remote-endpoint = <_to_ipu1_csi0_mux>;
+};
+
+_csi0 {
+   pinctrl-names = "default";
+   pinctrl-0 = <_ipu1_csi0>;
+   status = "okay";
+};
+
+ {
+   assigned-clocks = < IMX6QDL_CLK_LDB_DI0_SEL>,
+ < IMX6QDL_CLK_LDB_DI1_SEL>;
+   assigned-clock-parents = < IMX6QDL_CLK_PLL3_USB_OTG>,
+< IMX6QDL_CLK_PLL3_USB_OTG>;
+};
+
+ {
+   cs-gpios = < 9 GPIO_ACTIVE_LOW>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_ecspi1>;
+   status = "okay";
+
+   flash: m25p80@0 {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   compatible = "st,m25p80", "jedec,spi-nor";
+   spi-max-frequency = <2000>;
+   reg = <0>;
+   };
+};
+
+ {
+   pinctrl-names = "default";
+   pinctrl-0 = <_enet>;
+   phy-mode = "rgmii-id";
+   phy-handle = <>;
+   fsl,magic-packet;
+   status = "okay";
+
+   mdio {
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   phy: ethernet-phy@1 {
+   reg = <1>;
+   qca,clk-out-frequency = <12500>;
+   reset-gpios = < 10 GPIO_ACTIVE_LOW>;
+   reset-assert-us = <1>;
+   };
+   };
+};
+
+ {
+   pinctrl-names = "default";
+   pinctrl-0 = <_hdmi_cec>;

[PATCH v3 4/4] media: i2c: ov2659: Use clk_{prepare_enable,disable_unprepare}() to set xvclk on/off

2021-04-14 Thread dillon . minfei

From: dillon min 

On some platform(imx6q), xvclk might not switch on in advance,
also for power save purpose, xvclk should not be always on.
so, add clk_prepare_enable(), clk_disable_unprepare() in driver
side to set xvclk on/off at proper stage.

Add following changes:
- add 'struct clk *clk;' in 'struct ov2659 {}'
- enable xvclk in ov2659_power_on()
- disable xvclk in ov2659_power_off()

Signed-off-by: dillon min 
---
v3: optimize commit message

 drivers/media/i2c/ov2659.c | 24 ++--
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c
index 42f64175a6df..fb78a1cedc03 100644
--- a/drivers/media/i2c/ov2659.c
+++ b/drivers/media/i2c/ov2659.c
@@ -204,6 +204,7 @@ struct ov2659 {
struct i2c_client *client;
struct v4l2_ctrl_handler ctrls;
struct v4l2_ctrl *link_frequency;
+   struct clk *clk;
const struct ov2659_framesize *frame_size;
struct sensor_register *format_ctrl_regs;
struct ov2659_pll_ctrl pll;
@@ -1270,6 +1271,8 @@ static int ov2659_power_off(struct device *dev)
 
gpiod_set_value(ov2659->pwdn_gpio, 1);
 
+   clk_disable_unprepare(ov2659->clk);
+
return 0;
 }
 
@@ -1278,9 +1281,17 @@ static int ov2659_power_on(struct device *dev)
struct i2c_client *client = to_i2c_client(dev);
struct v4l2_subdev *sd = i2c_get_clientdata(client);
struct ov2659 *ov2659 = to_ov2659(sd);
+   int ret;
 
dev_dbg(>dev, "%s:\n", __func__);
 
+   ret = clk_prepare_enable(ov2659->clk);
+   if (ret) {
+   dev_err(>dev, "%s: failed to enable clock\n",
+   __func__);
+   return ret;
+   }
+
gpiod_set_value(ov2659->pwdn_gpio, 0);
 
if (ov2659->resetb_gpio) {
@@ -1425,7 +1436,6 @@ static int ov2659_probe(struct i2c_client *client)
const struct ov2659_platform_data *pdata = ov2659_get_pdata(client);
struct v4l2_subdev *sd;
struct ov2659 *ov2659;
-   struct clk *clk;
int ret;
 
if (!pdata) {
@@ -1440,11 +1450,11 @@ static int ov2659_probe(struct i2c_client *client)
ov2659->pdata = pdata;
ov2659->client = client;
 
-   clk = devm_clk_get(>dev, "xvclk");
-   if (IS_ERR(clk))
-   return PTR_ERR(clk);
+   ov2659->clk = devm_clk_get(>dev, "xvclk");
+   if (IS_ERR(ov2659->clk))
+   return PTR_ERR(ov2659->clk);
 
-   ov2659->xvclk_frequency = clk_get_rate(clk);
+   ov2659->xvclk_frequency = clk_get_rate(ov2659->clk);
if (ov2659->xvclk_frequency < 600 ||
ov2659->xvclk_frequency > 2700)
return -EINVAL;
@@ -1506,7 +1516,9 @@ static int ov2659_probe(struct i2c_client *client)
ov2659->frame_size = _framesizes[2];
ov2659->format_ctrl_regs = ov2659_formats[0].format_ctrl_regs;
 
-   ov2659_power_on(>dev);
+   ret = ov2659_power_on(>dev);
+   if (ret < 0)
+   goto error;
 
ret = ov2659_detect(sd);
if (ret < 0)
-- 
2.7.4

[PATCH v3 0/4] arm: imx: Add i.mx6q DaSheng COM-9XX SBC board support

2021-04-14 Thread dillon . minfei

From: dillon min 

This patchset aims to add kernel support on DaSheng COM-8XX SBC board
optimize ov2659 driver to handle xvclk on/off at proper stage to save power

changes based on master branch, since commit id:
89698becf06d341a700913c3d89ce2a914af69a2

The DaSheng Com-9xx is and ARM based signle board computer (SBC)
featuring:
- i.MX6Q
- 2GiB LPDDR3 DRAM
- 8GiB eMMC 5.0 FLASH
- 4MiB SPI Flash
- USB 2.0 Host/Device
- Multiple multi-protocol RS232/RS485 Serial ports
- microSD socket
- 5V DC power input
- HDMI1.4a,1080p@60
- RGMIIx1 Gigabit Ethernet
- CSI0x1, connect with ov2659

---
v3:
- optimize commit message for patch v3 ov2659 part
- move 'imx6q-ds.dtb' after 'imx6q-dms-ba16.dtb' to follow the alphabetical 
order
  (arch/arm/boot/dts/Makefile)
- move 'ds,imx6q-sbc' after 'dmo,imx6q-edmqmx6' to follow the alphabetical
  order. (Documentation/devicetree/bindings/arm/fsl.yaml)
- move v2 patch 4 to v3 patch 1
- add 'Reviewed-by: Krzysztof Kozlowski '
  for [PATCH v3 1/4]

- v2 link:
https://lore.kernel.org/linux-arm-kernel/1618383117-17179-1-git-send-email-dillon.min...@gmail.com/T/#md2dc86ac665ed7f10cffe4909825b97608805d3f

v2:
- add "[PATCH v2 4/4] dt-bindings: add dasheng vendor prefix" to fix
  checkpatch.pl warning.


dillon min (4):
  dt-bindings: add dasheng vendor prefix
  dt-bindings: arm: imx: Add i.mx6q DaSheng COM-9XX SBC
  arm: dts: imx: Add i.mx6q DaSheng COM-9XX SBC board support
  media: i2c: ov2659: Use clk_{prepare_enable,disable_unprepare}() to
set xvclk on/off

 Documentation/devicetree/bindings/arm/fsl.yaml |   1 +
 .../devicetree/bindings/vendor-prefixes.yaml   |   2 +
 arch/arm/boot/dts/Makefile |   1 +
 arch/arm/boot/dts/imx6q-ds.dts |  17 +
 arch/arm/boot/dts/imx6qdl-ds.dtsi  | 465 +
 drivers/media/i2c/ov2659.c |  24 +-
 6 files changed, 504 insertions(+), 6 deletions(-)
 create mode 100644 arch/arm/boot/dts/imx6q-ds.dts
 create mode 100644 arch/arm/boot/dts/imx6qdl-ds.dtsi

-- 
2.7.4

[PATCH v3 1/4] dt-bindings: add dasheng vendor prefix

2021-04-14 Thread dillon . minfei

From: dillon min 

Add vendor prefix for DaSheng, Inc.

Signed-off-by: dillon min 
Reviewed-by: Krzysztof Kozlowski 
---
v3:
- move v2 patch 4 to v3 patch 1
- add 'Reviewed-by: Krzysztof Kozlowski '

 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml 
b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index f6064d84a424..4ec28488c963 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -309,6 +309,8 @@ patternProperties:
 description: DPTechnics
   "^dragino,.*":
 description: Dragino Technology Co., Limited
+  "^ds,.*":
+description: DaSheng, Inc.
   "^dserve,.*":
 description: dServe Technology B.V.
   "^dynaimage,.*":
-- 
2.7.4

答复: [PATCH v5] docs/zh_CN: add translations in zh_CN/dev-tools/gcov

2021-04-14 Thread Zengtao (B)

> -邮件原件-
> 发件人: Wu XiangCheng [mailto:bob...@email.cn]
> 发送时间: 2021年4月14日 21:21
> 收件人: Alex Shi ; Bernard Zhao 
> 抄送: Jonathan Corbet ; YanTeng Si
> ; Nathan Chancellor ; Nick
> Desaulniers ; linux-...@vger.kernel.org;
> linux-kernel@vger.kernel.org; clang-built-li...@googlegroups.com
> 主题: [PATCH v5] docs/zh_CN: add translations in zh_CN/dev-tools/gcov
> 
> From: Bernard Zhao 
> 
> Add new zh translations
> * zh_CN/dev-tools/gcov.rst
> * zh_CN/dev-tools/index.rst
> and link them to zh_CN/index.rst
> 
> Signed-off-by: Bernard Zhao 
> Reviewed-by: Wu XiangCheng 
> Signed-off-by: Wu XiangCheng 
> ---
> base: linux-next
> commit 269dd42f4776 ("docs/zh_CN: add riscv to zh_CN index")
> 
> Changes since V4:
> * modified some words under Alex Shi's advices
> 
> Changes since V3:
> * update to newest linux-next
> * fix ``
> * fix tags
> * fix list indent
> 
> Changes since V2:
> * fix some inaccurate translation
> 
> Changes since V1:
> * add index.rst in dev-tools and link to to zh_CN/index.rst
> * fix some inaccurate translation
> 
>  .../translations/zh_CN/dev-tools/gcov.rst | 265 ++
>  .../translations/zh_CN/dev-tools/index.rst|  35 +++
>  Documentation/translations/zh_CN/index.rst|   1 +
>  3 files changed, 301 insertions(+)
>  create mode 100644 Documentation/translations/zh_CN/dev-tools/gcov.rst
>  create mode 100644 Documentation/translations/zh_CN/dev-tools/index.rst
> 
> diff --git a/Documentation/translations/zh_CN/dev-tools/gcov.rst
> b/Documentation/translations/zh_CN/dev-tools/gcov.rst
> new file mode 100644
> index ..7515b488bc4e
> --- /dev/null
> +++ b/Documentation/translations/zh_CN/dev-tools/gcov.rst
> @@ -0,0 +1,265 @@
> +.. include:: ../disclaimer-zh_CN.rst
> +
> +:Original: Documentation/dev-tools/gcov.rst
> +:Translator: 赵军奎 Bernard Zhao 
> +
> +在Linux内核里使用gcov做代码覆盖率检查
> +=
> +
> +gcov是linux中已经集成的一个分析模块，该模块在内核中对GCC的代码
> 覆盖率统

Gcov is a tool/function, misleading for " gcov是linux中已经集成的一个分析
模块"

I 'd suggest:
"Linux内核中已经集成一个特性支持gcov功能，该特性让用户可以使用gcov
 工具对内核代码覆盖率进行统计"

Thanks.

[QUESTION] Will the pahole tar source code with corresponding libbpf submodule codes be released as well in the future?

2021-04-14 Thread Tiezhu Yang


(1) tools/bpf/bpftool build failed due to the following reason:

Error: failed to load BTF from /boot/vmlinux-5.12.0-rc2: No such file or 
directory

make: *** [Makefile:158: vmlinux.h] Error 2

(2) When set CONFIG_DEBUG_INFO_BTF=y, failed to generate BTF for vmlinux
due to pahole is not available

BTF: .tmp_vmlinux.btf: pahole (pahole) is not available
Failed to generate BTF for vmlinux
Try to disable CONFIG_DEBUG_INFO_BTF
make: *** [Makefile:1197: vmlinux] Error 1

(3) When build pahole from tar.gz source code, it still failed
due to no libbpf submodule.

loongson@linux:~$ wget 
https://git.kernel.org/pub/scm/devel/pahole/pahole.git/snapshot/pahole-1.21.tar.gz

loongson@linux:~$ tar xf pahole-1.21.tar.gz
loongson@linux:~$ cd pahole-1.21
loongson@linux:~/pahole-1.21$ mkdir build
loongson@linux:~/pahole-1.21$ cd build/
loongson@linux:~/pahole-1.21/build$ cmake -D__LIB=lib ..
-- The C compiler identification is GNU 10.2.1
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Checking availability of DWARF and ELF development libraries
-- Looking for dwfl_module_build_id in elf
-- Looking for dwfl_module_build_id in elf - found
-- Found dwarf.h header: /usr/include
-- Found elfutils/libdw.h header: /usr/include
-- Found libdw library: /usr/lib/mips64el-linux-gnuabi64/libdw.so
-- Found libelf library: /usr/lib/mips64el-linux-gnuabi64/libelf.so
-- Checking availability of DWARF and ELF development libraries - done
-- Found ZLIB: /usr/lib/mips64el-linux-gnuabi64/libz.so (found version 
"1.2.11")

CMake Error at CMakeLists.txt:60 (message):
  The submodules were not downloaded! GIT_SUBMODULE was turned off or 
failed.

  Please update submodules and try again.

-- Configuring incomplete, errors occurred!
See also "/home/loongson/pahole-1.21/build/CMakeFiles/CMakeOutput.log".

(4) I notice that the pahole git source code can build successful because
it will clone libbpf automatically:

-- Submodule update
Submodule 'lib/bpf' (https://github.com/libbpf/libbpf) registered for 
path 'lib/bpf'

Cloning into '/home/loongson/pahole/lib/bpf'...
Submodule path 'lib/bpf': checked out 
'986962fade5dfa89c2890f3854eb040d2a64ab38'

-- Submodule update - done

(5) So Will the pahole tar source code with corresponding libbpf 
submodule codes

be released as well in the future? just like bcc:
https://github.com/iovisor/bcc/releases
https://github.com/iovisor/bcc/commit/708f786e3784dc32570a079f2ed74c35731664ea

Thanks,
Tiezhu

[RFC PATCH 2/2] bfq/mq-deadline: remove redundant check for passthrough request

2021-04-14 Thread Lin Feng

Since commit 01e99aeca39796003 'blk-mq: insert passthrough request into
hctx->dispatch directly', passthrough request should not appear in
IO-scheduler any more, so blk_rq_is_passthrough checking in addon IO
schedulers is redundant.

(Notes: this patch passes generic IO load test with hdds under SAS
controller and hdds under AHCI controller but obviously not covers all.
Not sure if passthrough request can still escape into IO scheduler from
blk_mq_sched_insert_requests, which is used by blk_mq_flush_plug_list and
has lots of indirect callers.)

Signed-off-by: Lin Feng 
---
 block/bfq-iosched.c | 2 +-
 block/mq-deadline.c | 7 ++-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 95586137194e..b827c9212b02 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -5627,7 +5627,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx 
*hctx, struct request *rq,
 
spin_lock_irq(>lock);
bfqq = bfq_init_rq(rq);
-   if (!bfqq || at_head || blk_rq_is_passthrough(rq)) {
+   if (!bfqq || at_head) {
if (at_head)
list_add(>queuelist, >dispatch);
else
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index f3631a287466..04aded71ead2 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -500,11 +500,8 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, 
struct request *rq,
 
trace_block_rq_insert(rq);
 
-   if (at_head || blk_rq_is_passthrough(rq)) {
-   if (at_head)
-   list_add(>queuelist, >dispatch);
-   else
-   list_add_tail(>queuelist, >dispatch);
+   if (at_head) {
+   list_add(>queuelist, >dispatch);
} else {
deadline_add_rq_rb(dd, rq);
 
-- 
2.30.2

Re: [PATCH v10] i2c: virtio: add a virtio i2c frontend driver

2021-04-14 Thread Jason Wang




在 2021/3/23 下午10:19, Jie Deng 写道:

Add an I2C bus driver for virtio para-virtualization.

The controller can be emulated by the backend driver in
any device model software by following the virtio protocol.

The device specification can be found on
https://lists.oasis-open.org/archives/virtio-comment/202101/msg8.html.

By following the specification, people may implement different
backend drivers to emulate different controllers according to
their needs.

Co-developed-by: Conghui Chen 
Signed-off-by: Conghui Chen 
Signed-off-by: Jie Deng 
---
Changes in v10:
 - Fix some typo errors.
 - Refined the virtio_i2c_complete_reqs to use less code lines.

Changes in v9:
 - Remove the virtio_adapter and update its members in probe.
 - Refined the virtio_i2c_complete_reqs for buf free.

Changes in v8:
 - Make virtio_i2c.adap a pointer.
 - Mark members in virtio_i2c_req with cacheline_aligned.

Changes in v7:
 - Remove unused headers.
 - Update Makefile and Kconfig.
 - Add the cleanup after completing reqs.
 - Avoid memcpy for data marked with I2C_M_DMA_SAFE.
 - Fix something reported by kernel test robot.

Changes in v6:
 - Move struct virtio_i2c_req into the driver.
 - Use only one buf in struct virtio_i2c_req.

Changes in v5:
 - The first version based on the acked specification.

  drivers/i2c/busses/Kconfig  |  11 ++
  drivers/i2c/busses/Makefile |   3 +
  drivers/i2c/busses/i2c-virtio.c | 276 
  include/uapi/linux/virtio_i2c.h |  40 ++
  include/uapi/linux/virtio_ids.h |   1 +
  5 files changed, 331 insertions(+)
  create mode 100644 drivers/i2c/busses/i2c-virtio.c
  create mode 100644 include/uapi/linux/virtio_i2c.h

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 05ebf75..cb8d0d8 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -21,6 +21,17 @@ config I2C_ALI1535
  This driver can also be built as a module.  If so, the module
  will be called i2c-ali1535.
  
+config I2C_VIRTIO

+   tristate "Virtio I2C Adapter"
+   select VIRTIO
+   help
+ If you say yes to this option, support will be included for the virtio
+ I2C adapter driver. The hardware can be emulated by any device model
+ software according to the virtio protocol.
+
+ This driver can also be built as a module. If so, the module
+ will be called i2c-virtio.
+
  config I2C_ALI1563
tristate "ALI 1563"
depends on PCI
diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 615f35e..efdd3f3 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile
@@ -145,4 +145,7 @@ obj-$(CONFIG_I2C_XGENE_SLIMPRO) += i2c-xgene-slimpro.o
  obj-$(CONFIG_SCx200_ACB)  += scx200_acb.o
  obj-$(CONFIG_I2C_FSI) += i2c-fsi.o
  
+# VIRTIO I2C host controller driver

+obj-$(CONFIG_I2C_VIRTIO)   += i2c-virtio.o
+
  ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/busses/i2c-virtio.c b/drivers/i2c/busses/i2c-virtio.c
new file mode 100644
index 000..99a1e30
--- /dev/null
+++ b/drivers/i2c/busses/i2c-virtio.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Virtio I2C Bus Driver
+ *
+ * The Virtio I2C Specification:
+ * 
https://raw.githubusercontent.com/oasis-tcs/virtio-spec/master/virtio-i2c.tex
+ *
+ * Copyright (c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/**
+ * struct virtio_i2c - virtio I2C data
+ * @vdev: virtio device for this controller
+ * @completion: completion of virtio I2C message
+ * @adap: I2C adapter for this controller
+ * @lock: lock for virtqueue processing
+ * @vq: the virtio virtqueue for communication
+ */
+struct virtio_i2c {
+   struct virtio_device *vdev;
+   struct completion completion;
+   struct i2c_adapter adap;
+   struct mutex lock;
+   struct virtqueue *vq;
+};
+
+/**
+ * struct virtio_i2c_req - the virtio I2C request structure
+ * @out_hdr: the OUT header of the virtio I2C message
+ * @buf: the buffer into which data is read, or from which it's written
+ * @in_hdr: the IN header of the virtio I2C message
+ */
+struct virtio_i2c_req {
+   struct virtio_i2c_out_hdr out_hdr   cacheline_aligned;
+   uint8_t *bufcacheline_aligned;
+   struct virtio_i2c_in_hdr in_hdr cacheline_aligned;
+};
+
+static void virtio_i2c_msg_done(struct virtqueue *vq)
+{
+   struct virtio_i2c *vi = vq->vdev->priv;
+
+   complete(>completion);
+}
+
+static int virtio_i2c_send_reqs(struct virtqueue *vq,
+   struct virtio_i2c_req *reqs,
+   struct i2c_msg *msgs, int nr)
+{
+   struct scatterlist *sgs[3],

[PATCH 1/2] blk-mq: bypass IO scheduler's limit_depth for passthrough request

2021-04-14 Thread Lin Feng

Commit 01e99aeca39796003 ("blk-mq: insert passthrough request into
hctx->dispatch directly") gives high priority to passthrough requests and
bypass underlying IO scheduler. But as we allocate tag for such request it
still runs io-scheduler's callback limit_depth, while we really want is to
give full sbitmap-depth capabity to such request for acquiring available
tag.
blktrace shows PC requests(dmraid -s -c -i) hit bfq's limit_depth:
  8,020 0.0 39952 1,0  m   N bfq [bfq_limit_depth] 
wr_busy 0 sync 0 depth 8
  8,021 0.08134 39952  D   R 4 [dmraid]
  8,022 0.2153824  C   R [0]
  8,020 0.35442 39952 1,0  m   N bfq [bfq_limit_depth] 
wr_busy 0 sync 0 depth 8
  8,023 0.38813 39952  D   R 24 [dmraid]
  8,024 0.4435624  C   R [0]

This patch introduce a new wrapper to make code not that ugly.

Signed-off-by: Lin Feng 
---
 block/blk-mq.c | 3 ++-
 include/linux/blkdev.h | 6 ++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d4d7c1caa439..927189a55575 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -361,11 +361,12 @@ static struct request *__blk_mq_alloc_request(struct 
blk_mq_alloc_data *data)
 
if (e) {
/*
-* Flush requests are special and go directly to the
+* Flush/passthrough requests are special and go directly to the
 * dispatch list. Don't include reserved tags in the
 * limiting, as it isn't useful.
 */
if (!op_is_flush(data->cmd_flags) &&
+   !blk_op_is_passthrough(data->cmd_flags) &&
e->type->ops.limit_depth &&
!(data->flags & BLK_MQ_REQ_RESERVED))
e->type->ops.limit_depth(data->cmd_flags, data);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 158aefae1030..0d81eed39833 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -272,6 +272,12 @@ static inline bool bio_is_passthrough(struct bio *bio)
return blk_op_is_scsi(op) || blk_op_is_private(op);
 }
 
+static inline bool blk_op_is_passthrough(unsigned int op)
+{
+   return (blk_op_is_scsi(op & REQ_OP_MASK) ||
+   blk_op_is_private(op & REQ_OP_MASK));
+}
+
 static inline unsigned short req_get_ioprio(struct request *req)
 {
return req->ioprio;
-- 
2.30.2

Re: [v2 1/2] dt-bindings: Add bindings for aspeed pwm-tach and pwm.

2021-04-14 Thread Billy Tsai

Hi Rob,

On 2021/4/15, 6:16 AM,Rob Herringwrote:

On Wed, Apr 14, 2021 at 06:49:38PM +0800, Billy Tsai wrote:
>> This patch adds device bindings for aspeed pwm-tach device which is a
>> multi-function device include pwn and tach function and pwm device which
>> should be the sub-node of pwm-tach device.
>> 
>> Signed-off-by: Billy Tsai 
>> Change-Id: I18d9dea14c3a04e1b7e38ffecd49d45917b9b545
>
>Drop
>
>> ---
>>  .../bindings/mfd/aspeed,ast2600-pwm-tach.yaml | 60 +++
>>  .../bindings/pwm/aspeed,ast2600-pwm.yaml  | 44 ++
>>  2 files changed, 104 insertions(+)
>>  create mode 100644 
Documentation/devicetree/bindings/mfd/aspeed,ast2600-pwm-tach.yaml
>>  create mode 100644 
Documentation/devicetree/bindings/pwm/aspeed,ast2600-pwm.yaml
>> 
>> diff --git 
a/Documentation/devicetree/bindings/mfd/aspeed,ast2600-pwm-tach.yaml 
b/Documentation/devicetree/bindings/mfd/aspeed,ast2600-pwm-tach.yaml
>> new file mode 100644
>> index ..eaf8bdf8d44e
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/mfd/aspeed,ast2600-pwm-tach.yaml
>> @@ -0,0 +1,60 @@
>> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
>> +# Copyright (C) 2021 ASPEED, Inc.
>> +%YAML 1.2
>> +---
>> +$id: http://devicetree.org/schemas/mfd/aspeed,ast2600-pwm-tach.yaml#
>> +$schema: http://devicetree.org/meta-schemas/core.yaml#
>> +
>> +title: PWM Tach controller Device Tree Bindings
>> +
>> +description: |
>> +  The PWM Tach controller is represented as a multi-function device 
which
>> +  includes:
>> +PWM
>> +Tach

> But is it really? A PWM and tach sounds like a fan controller. Look at 

Our PWM is not only for fans but also used for the motor, led, buzzer, and so 
on. 
So I want to split the function into two devices with a multi-function device. 
One for PWM output and one for tach monitor.

> other existing PWM+tach bindings we have for fans.

I didn't see the PWM+tach bindings can you give some example for me, thanks.

>> +
>> +maintainers:
>> +  - Billy Tsai 
>> +
>> +properties:
>> +  compatible:
>> +items:
>> +  - enum:
>> +  - aspeed,ast2600-pwm-tach
>> +  - const: syscon
>> +  - const: simple-mfd
>> +  reg:
>> +maxItems: 1
>> +  "#address-cells":
>> +const: 1
>> +  "#size-cells":
>> +const: 1
>> +
>> +required:
>> +  - compatible
>> +  - reg
>> +  - "#address-cells"
>> +  - "#size-cells"
>> +
>> +additionalProperties:
>> +  type: object

> As you know the 2 node names, they should be documented. However, see 
> below.

>> +
>> +examples:
>> +  - |
>> +pwm_tach: pwm_tach@1e61 {
>> +  compatible = "aspeed,ast2600-pwm-tach", "syscon", "simple-mfd";
>> +  #address-cells = <1>;
>> +  #size-cells = <1>;
>> +  reg = <0x1e61 0x100>;
>> +
>> +  pwm: pwm@0 {
>> +compatible = "aspeed,ast2600-pwm";
>> +#pwm-cells = <3>;
>> +reg = <0x0 0x100>;
>> +  };
>> +
>> +  tach: tach@1 {
>> +compatible = "aspeed,ast2600-tach";
>> +reg = <0x0 0x100>;

> You have 2 nodes at the same address. Not valid.

Our pwm and tach is used the same base address and the offset is like below:

PWM0 used 0x0 0x4, Tach0 used 0x8 0xc
PWM1 used 0x10 0x14, Tach1 used 0x18 0x1c
...

I will remove the reg property from pwm and tach node and remove the 
"#address-cells" and
"#size-cells" from the parent node.

>> +  };

> There's no real need for 2 child nodes. The parent node can be a PWM 
> provider.

However, In our usage, the parent node is a mfd, not a simple PWM device only. 
I don't want to
combine the different functions with the one device node.


>> +};
>> diff --git 
a/Documentation/devicetree/bindings/pwm/aspeed,ast2600-pwm.yaml 
b/Documentation/devicetree/bindings/pwm/aspeed,ast2600-pwm.yaml
>> new file mode 100644
>> index ..97923e68ccb9
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/pwm/aspeed,ast2600-pwm.yaml
>> @@ -0,0 +1,44 @@
>> +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
>> +# Copyright (C) 2021 ASPEED, Inc.
>> +%YAML 1.2
>> +---
>> +$id: http://devicetree.org/schemas/pwm/aspeed,ast2600-pwm.yaml#
>> +$schema: http://devicetree.org/meta-schemas/core.yaml#
>> +
>> +title: ASPEED AST2600 PWM controller
>> +
>> +maintainers:
>> +  - Billy Tsai 
>> +
>> +description: |
>> +  The ASPEED PWM controller can support upto 16 PWM outputs.
>> +
>> +properties:
>> +  compatible:
>> +enum:
>> +  - aspeed,ast2600-pwm
>> +
>> +  "#pwm-cells":
>> +const: 3
>> +
>> +  reg:

drivers/bus/mhi/core/pm.c:740:6: warning: stack frame size of 3568 bytes in function 'mhi_pm_st_worker'

2021-04-14 Thread kernel test robot

Hi Bhaumik,

FYI, the error/warning still remains.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 
master
head:   7f75285ca572eaabc028cf78c6ab5473d0d160be
commit: 556bbb442bbb44f429dbaa9f8b48e0b4cda6e088 bus: mhi: core: Separate 
system error and power down handling
date:   5 months ago
config: powerpc64-randconfig-r034-20210414 (attached as .config)
compiler: clang version 13.0.0 (https://github.com/llvm/llvm-project 
9829f5e6b1bca9b61efc629770d28bb9014dec45)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc64 cross compiling tool for clang build
# apt-get install binutils-powerpc64-linux-gnu
# 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=556bbb442bbb44f429dbaa9f8b48e0b4cda6e088
git remote add linus 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
git fetch --no-tags linus master
git checkout 556bbb442bbb44f429dbaa9f8b48e0b4cda6e088
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 
ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   ^
   arch/powerpc/include/asm/io.h:522:62: note: expanded from macro '__do_outl'
   #define __do_outl(val, port)writel(val,(PCI_IO_ADDR)_IO_BASE+port);
  ~^
   In file included from drivers/bus/mhi/core/pm.c:10:
   In file included from include/linux/dma-mapping.h:10:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/powerpc/include/asm/io.h:604:
   arch/powerpc/include/asm/io-defs.h:43:1: warning: performing pointer 
arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   DEF_PCI_AC_NORET(insb, (unsigned long p, void *b, unsigned long c),
   ^~~
   arch/powerpc/include/asm/io.h:601:3: note: expanded from macro 
'DEF_PCI_AC_NORET'
   __do_##name al; \
   ^~
   :236:1: note: expanded from here
   __do_insb
   ^
   arch/powerpc/include/asm/io.h:541:56: note: expanded from macro '__do_insb'
   #define __do_insb(p, b, n)  readsb((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
  ~^
   In file included from drivers/bus/mhi/core/pm.c:10:
   In file included from include/linux/dma-mapping.h:10:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/powerpc/include/asm/io.h:604:
   arch/powerpc/include/asm/io-defs.h:45:1: warning: performing pointer 
arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   DEF_PCI_AC_NORET(insw, (unsigned long p, void *b, unsigned long c),
   ^~~
   arch/powerpc/include/asm/io.h:601:3: note: expanded from macro 
'DEF_PCI_AC_NORET'
   __do_##name al; \
   ^~
   :238:1: note: expanded from here
   __do_insw
   ^
   arch/powerpc/include/asm/io.h:542:56: note: expanded from macro '__do_insw'
   #define __do_insw(p, b, n)  readsw((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
  ~^
   In file included from drivers/bus/mhi/core/pm.c:10:
   In file included from include/linux/dma-mapping.h:10:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/powerpc/include/asm/io.h:604:
   arch/powerpc/include/asm/io-defs.h:47:1: warning: performing pointer 
arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   DEF_PCI_AC_NORET(insl, (unsigned long p, void *b, unsigned long c),
   ^~~
   arch/powerpc/include/asm/io.h:601:3: note: expanded from macro 
'DEF_PCI_AC_NORET'
   __do_##name al; \
   ^~
   :2:1: note: expanded from here
   __do_insl
   ^
   arch/powerpc/include/asm/io.h:543:56: note: expanded from macro '__do_insl'
   #define __do_insl(p, b, n)  readsl((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
  ~^
   In file included from drivers/bus/mhi/core/pm.c:10:
   In file included from include/linux/dma-mapping.h:10:
   In file included from include/linux/scatterlist.h:9:
   In file included from arch/powerpc/include/asm/io.h:604:
   arch/powerpc/include/asm/io-defs.h:49:1: warning: performing pointer 
arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   DEF_PCI_AC

Re: [PATCH 3/3] vDPA/ifcvf: get_config_size should return dev specific config size

2021-04-14 Thread Jason Wang




在 2021/4/14 下午5:18, Zhu Lingshan 写道:

get_config_size() should return the size based on the decected
device type.

Signed-off-by: Zhu Lingshan 



Acked-by: Jason Wang 



---
  drivers/vdpa/ifcvf/ifcvf_main.c | 11 ++-
  1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 9b6a38b798fa..b48b9789b69e 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -347,7 +347,16 @@ static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device 
*vdpa_dev)
  
  static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev)

  {
-   return sizeof(struct virtio_net_config);
+   struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+   size_t size;
+
+   if (vf->dev_type == VIRTIO_ID_NET)
+   size = sizeof(struct virtio_net_config);
+
+   if (vf->dev_type == VIRTIO_ID_BLOCK)
+   size = sizeof(struct virtio_blk_config);
+
+   return size;
  }
  
  static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,

Re: [PATCH 2/3] vDPA/ifcvf: enable Intel C5000X-PL virtio-block for vDPA

2021-04-14 Thread Jason Wang




在 2021/4/14 下午5:18, Zhu Lingshan 写道:

This commit enabled Intel FPGA SmartNIC C5000X-PL virtio-block
for vDPA.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_base.h | 17 -
  drivers/vdpa/ifcvf/ifcvf_main.c | 10 +-
  2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index 1c04cd256fa7..8b403522bf06 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -15,6 +15,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  
@@ -28,7 +29,12 @@

  #define C5000X_PL_SUBSYS_VENDOR_ID0x8086
  #define C5000X_PL_SUBSYS_DEVICE_ID0x0001
  
-#define IFCVF_SUPPORTED_FEATURES \

+#define C5000X_PL_BLK_VENDOR_ID0x1AF4
+#define C5000X_PL_BLK_DEVICE_ID0x1001
+#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086
+#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002
+
+#define IFCVF_NET_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC)   | \
 (1ULL << VIRTIO_F_ANY_LAYOUT)| \
 (1ULL << VIRTIO_F_VERSION_1) | \
@@ -37,6 +43,15 @@
 (1ULL << VIRTIO_F_ACCESS_PLATFORM)   | \
 (1ULL << VIRTIO_NET_F_MRG_RXBUF))
  
+#define IFCVF_BLK_SUPPORTED_FEATURES \

+   ((1ULL << VIRTIO_BLK_F_SIZE_MAX)  | \
+(1ULL << VIRTIO_BLK_F_SEG_MAX)   | \
+(1ULL << VIRTIO_BLK_F_BLK_SIZE)  | \
+(1ULL << VIRTIO_BLK_F_TOPOLOGY)  | \
+(1ULL << VIRTIO_BLK_F_MQ)| \
+(1ULL << VIRTIO_F_VERSION_1) | \
+(1ULL << VIRTIO_F_ACCESS_PLATFORM))



I think we've discussed this sometime in the past but what's the reason 
for such whitelist consider there's already a get_features() implemention?


E.g Any reason to block VIRTIO_BLK_F_WRITE_ZEROS or VIRTIO_F_RING_PACKED?

Thanks



+
  /* Only one queue pair for now. */
  #define IFCVF_MAX_QUEUE_PAIRS 1
  
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c

index 99b0a6b4c227..9b6a38b798fa 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -171,7 +171,11 @@ static u64 ifcvf_vdpa_get_features(struct vdpa_device 
*vdpa_dev)
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
u64 features;
  
-	features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;

+   if (vf->dev_type == VIRTIO_ID_NET)
+   features = ifcvf_get_features(vf) & 
IFCVF_NET_SUPPORTED_FEATURES;
+
+   if (vf->dev_type == VIRTIO_ID_BLOCK)
+   features = ifcvf_get_features(vf) & 
IFCVF_BLK_SUPPORTED_FEATURES;
  
  	return features;

  }
@@ -509,6 +513,10 @@ static struct pci_device_id ifcvf_pci_ids[] = {
 C5000X_PL_DEVICE_ID,
 C5000X_PL_SUBSYS_VENDOR_ID,
 C5000X_PL_SUBSYS_DEVICE_ID) },
+   { PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID,
+C5000X_PL_BLK_DEVICE_ID,
+C5000X_PL_BLK_SUBSYS_VENDOR_ID,
+C5000X_PL_BLK_SUBSYS_DEVICE_ID) },
  
  	{ 0 },

  };

Re: [PATCH 1/3] vDPA/ifcvf: deduce VIRTIO device ID when probe

2021-04-14 Thread Jason Wang




在 2021/4/14 下午5:18, Zhu Lingshan 写道:

This commit deduces VIRTIO device ID as device type when probe,
then ifcvf_vdpa_get_device_id() can simply return the ID.
ifcvf_vdpa_get_features() and ifcvf_vdpa_get_config_size()
can work properly based on the device ID.

Signed-off-by: Zhu Lingshan 
---
  drivers/vdpa/ifcvf/ifcvf_base.h |  1 +
  drivers/vdpa/ifcvf/ifcvf_main.c | 22 ++
  2 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h
index b2eeb16b9c2c..1c04cd256fa7 100644
--- a/drivers/vdpa/ifcvf/ifcvf_base.h
+++ b/drivers/vdpa/ifcvf/ifcvf_base.h
@@ -84,6 +84,7 @@ struct ifcvf_hw {
u32 notify_off_multiplier;
u64 req_features;
u64 hw_features;
+   u32 dev_type;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *net_cfg;
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index 44d7586019da..99b0a6b4c227 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -323,19 +323,9 @@ static u32 ifcvf_vdpa_get_generation(struct vdpa_device 
*vdpa_dev)
  
  static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)

  {
-   struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
-   struct pci_dev *pdev = adapter->pdev;
-   u32 ret = -ENODEV;
-
-   if (pdev->device < 0x1000 || pdev->device > 0x107f)
-   return ret;
-
-   if (pdev->device < 0x1040)
-   ret =  pdev->subsystem_device;
-   else
-   ret =  pdev->device - 0x1040;
+   struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
  
-	return ret;

+   return vf->dev_type;
  }
  
  static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)

@@ -466,6 +456,14 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
pci_set_drvdata(pdev, adapter);
  
  	vf = >vf;

+   if (pdev->device < 0x1000 || pdev->device > 0x107f)
+   return -EOPNOTSUPP;
+
+   if (pdev->device < 0x1040)
+   vf->dev_type =  pdev->subsystem_device;
+   else
+   vf->dev_type =  pdev->device - 0x1040;



So a question here, is the device a transtional device or modern one?

If it's a transitonal one, can it swtich endianess automatically or not?

Thanks



+
vf->base = pcim_iomap_table(pdev);
  
  	adapter->pdev = pdev;

[PATCH v5 2/2] HID: google: Add of_match table to Whiskers switch device.

2021-04-14 Thread Ikjoon Jang

Add a device tree match table for "cros-cbas" switch device.

Signed-off-by: Ikjoon Jang 
Reviewed-by: Dmitry Torokhov 
Acked-by: Jiri Kosina 

---

(no changes since v1)

Please note that v3 was submitted in 28 Oct 2019, 1.5yrs ago.
Link(v2): 
https://patchwork.kernel.org/project/linux-input/patch/20191021030158.32464-1-i...@chromium.org/

---
 drivers/hid/hid-google-hammer.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c
index d9319622da44..be4f9f3dbbba 100644
--- a/drivers/hid/hid-google-hammer.c
+++ b/drivers/hid/hid-google-hammer.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -272,12 +273,21 @@ static const struct acpi_device_id cbas_ec_acpi_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, cbas_ec_acpi_ids);
 
+#ifdef CONFIG_OF
+static const struct of_device_id cbas_ec_of_match[] = {
+   { .compatible = "google,cros-cbas" },
+   { },
+};
+MODULE_DEVICE_TABLE(of, cbas_ec_of_match);
+#endif
+
 static struct platform_driver cbas_ec_driver = {
.probe = cbas_ec_probe,
.remove = cbas_ec_remove,
.driver = {
.name = "cbas_ec",
.acpi_match_table = ACPI_PTR(cbas_ec_acpi_ids),
+   .of_match_table = of_match_ptr(cbas_ec_of_match),
.pm = _ec_pm_ops,
},
 };
-- 
2.31.1.295.g9ea45b61b8-goog

[PATCH v5 1/2] mfd: google,cros-ec: add DT bindings for a baseboard's switch device

2021-04-14 Thread Ikjoon Jang

This is for ChromeOS tablets which have a 'cros_cbas' switch device
in the "Whiskers" base board. This device can be instantiated only by
device tree on ARM platforms. ChromeOS EC doesn't provide a way to
probe the device.

Signed-off-by: Ikjoon Jang 

---

Changes in v5:
 - Add missing blank lines and change the description property's position.
 - Add a note to description: "this device cannot be detected at runtime."

Changes in v4:
Define cros-cbase bindings inside google,cros-ec.yaml instead of
a separated binding document.

 .../bindings/mfd/google,cros-ec.yaml  | 20 +++
 1 file changed, 20 insertions(+)

diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml 
b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
index 76bf16ee27ec..8dcce176b72e 100644
--- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
+++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml
@@ -114,6 +114,22 @@ properties:
   - "#address-cells"
   - "#size-cells"
 
+  cbas:
+type: object
+
+description:
+  This device is used to signal when a detachable base is attached
+  to a Chrome OS tablet. This device cannot be detected at runtime.
+
+properties:
+  compatible:
+const: google,cros-cbas
+
+required:
+  - compatible
+
+additionalProperties: false
+
 patternProperties:
   "^i2c-tunnel[0-9]*$":
 type: object
@@ -180,6 +196,10 @@ examples:
 interrupts = <99 0>;
 interrupt-parent = <>;
 spi-max-frequency = <500>;
+
+base_detection: cbas {
+compatible = "google,cros-cbas";
+};
 };
 };
 
-- 
2.31.1.295.g9ea45b61b8-goog

[PATCH v5 0/2] HID: google: add device tree bindings for Whiskers switch device

2021-04-14 Thread Ikjoon Jang

Add device a tree binding for a "cros-cbas" switch device of
ChromeOS tablets with Whiskers base board.

Changes in v5:
 - Add missing blank lines and change the description property's position.
 - Add a note to description: "this device cannot be detected at runtime."

Changes in v4:
Define cros-cbase bindings inside google,cros-ec.yaml instead of
a separated binding document.

Ikjoon Jang (2):
  mfd: google,cros-ec: add DT bindings for a baseboard's switch device
  HID: google: Add of_match table to Whiskers switch device.

 .../bindings/mfd/google,cros-ec.yaml  | 20 +++
 drivers/hid/hid-google-hammer.c   | 10 ++
 2 files changed, 30 insertions(+)

-- 
2.31.1.295.g9ea45b61b8-goog

Re: [PATCH v3 5/5] mm/memcg: Optimize user context object stock access

2021-04-14 Thread Masayoshi Mizuma

On Tue, Apr 13, 2021 at 09:20:27PM -0400, Waiman Long wrote:
> Most kmem_cache_alloc() calls are from user context. With instrumentation
> enabled, the measured amount of kmem_cache_alloc() calls from non-task
> context was about 0.01% of the total.
> 
> The irq disable/enable sequence used in this case to access content
> from object stock is slow.  To optimize for user context access, there
> are now two object stocks for task context and interrupt context access
> respectively.
> 
> The task context object stock can be accessed after disabling preemption
> which is cheap in non-preempt kernel. The interrupt context object stock
> can only be accessed after disabling interrupt. User context code can
> access interrupt object stock, but not vice versa.
> 
> The mod_objcg_state() function is also modified to make sure that memcg
> and lruvec stat updates are done with interrupted disabled.
> 
> The downside of this change is that there are more data stored in local
> object stocks and not reflected in the charge counter and the vmstat
> arrays.  However, this is a small price to pay for better performance.
> 
> Signed-off-by: Waiman Long 
> Acked-by: Roman Gushchin 
> Reviewed-by: Shakeel Butt 
> ---
>  mm/memcontrol.c | 74 +++--
>  1 file changed, 59 insertions(+), 15 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 69f728383efe..8875e896e52b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2229,7 +2229,8 @@ struct obj_stock {
>  struct memcg_stock_pcp {
>   struct mem_cgroup *cached; /* this never be root cgroup */
>   unsigned int nr_pages;
> - struct obj_stock obj;
> + struct obj_stock task_obj;
> + struct obj_stock irq_obj;
>  
>   struct work_struct work;
>   unsigned long flags;
> @@ -2254,11 +2255,48 @@ static bool obj_stock_flush_required(struct 
> memcg_stock_pcp *stock,
>  }
>  #endif
>  
> +/*
> + * Most kmem_cache_alloc() calls are from user context. The irq 
> disable/enable
> + * sequence used in this case to access content from object stock is slow.
> + * To optimize for user context access, there are now two object stocks for
> + * task context and interrupt context access respectively.
> + *
> + * The task context object stock can be accessed by disabling preemption only
> + * which is cheap in non-preempt kernel. The interrupt context object stock
> + * can only be accessed after disabling interrupt. User context code can
> + * access interrupt object stock, but not vice versa.
> + */
>  static inline struct obj_stock *current_obj_stock(void)
>  {
>   struct memcg_stock_pcp *stock = this_cpu_ptr(_stock);
>  
> - return >obj;
> + return in_task() ? >task_obj : >irq_obj;
> +}
> +
> +#define get_obj_stock(flags) \
> +({   \
> + struct memcg_stock_pcp *stock;  \
> + struct obj_stock *obj_stock;\
> + \
> + if (in_task()) {\
> + preempt_disable();  \
> + (flags) = -1L;  \
> + stock = this_cpu_ptr(_stock); \
> + obj_stock = >task_obj;   \
> + } else {\
> + local_irq_save(flags);  \
> + stock = this_cpu_ptr(_stock); \
> + obj_stock = >irq_obj;\
> + }   \
> + obj_stock;  \
> +})
> +
> +static inline void put_obj_stock(unsigned long flags)
> +{
> + if (flags == -1L)
> + preempt_enable();
> + else
> + local_irq_restore(flags);
>  }
>  
>  /**
> @@ -2327,7 +2365,9 @@ static void drain_local_stock(struct work_struct *dummy)
>   local_irq_save(flags);
>  
>   stock = this_cpu_ptr(_stock);
> - drain_obj_stock(>obj);
> + drain_obj_stock(>irq_obj);
> + if (in_task())
> + drain_obj_stock(>task_obj);
>   drain_stock(stock);
>   clear_bit(FLUSHING_CACHED_CHARGE, >flags);
>  
> @@ -3183,7 +3223,7 @@ static inline void mod_objcg_state(struct obj_cgroup 
> *objcg,
>   memcg = obj_cgroup_memcg(objcg);
>   if (pgdat)
>   lruvec = mem_cgroup_lruvec(memcg, pgdat);
> - __mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
> + mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
>   rcu_read_unlock();
>  }
>  
> @@ -3193,15 +3233,14 @@ static bool consume_obj_stock(struct obj_cgroup 
> *objcg, unsigned int nr_bytes)
>   unsigned long flags;
>   bool ret = false;
>  
> - local_irq_save(flags);
> + stock = get_obj_stock(flags);
>  
> - stock = current_obj_stock();
>   if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
>   stock->nr_bytes -= nr_bytes;
>

Re: [PATCH v3 4/5] mm/memcg: Separate out object stock data into its own struct

2021-04-14 Thread Masayoshi Mizuma

On Tue, Apr 13, 2021 at 09:20:26PM -0400, Waiman Long wrote:
> The object stock data stored in struct memcg_stock_pcp are independent
> of the other page based data stored there. Separating them out into
> their own struct to highlight the independency.
> 
> Signed-off-by: Waiman Long 
> Acked-by: Roman Gushchin 
> Reviewed-by: Shakeel Butt 
> ---
>  mm/memcontrol.c | 41 ++---
>  1 file changed, 26 insertions(+), 15 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 539c3b632e47..69f728383efe 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2214,17 +2214,22 @@ void unlock_page_memcg(struct page *page)
>  }
>  EXPORT_SYMBOL(unlock_page_memcg);
>  
> -struct memcg_stock_pcp {
> - struct mem_cgroup *cached; /* this never be root cgroup */
> - unsigned int nr_pages;
> -
> +struct obj_stock {
>  #ifdef CONFIG_MEMCG_KMEM
>   struct obj_cgroup *cached_objcg;
>   struct pglist_data *cached_pgdat;
>   unsigned int nr_bytes;
>   int vmstat_idx;
>   int vmstat_bytes;
> +#else
> + int dummy[0];
>  #endif
> +};
> +
> +struct memcg_stock_pcp {
> + struct mem_cgroup *cached; /* this never be root cgroup */
> + unsigned int nr_pages;
> + struct obj_stock obj;
>  
>   struct work_struct work;
>   unsigned long flags;
> @@ -2234,12 +2239,12 @@ static DEFINE_PER_CPU(struct memcg_stock_pcp, 
> memcg_stock);
>  static DEFINE_MUTEX(percpu_charge_mutex);
>  
>  #ifdef CONFIG_MEMCG_KMEM
> -static void drain_obj_stock(struct memcg_stock_pcp *stock);
> +static void drain_obj_stock(struct obj_stock *stock);
>  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
>struct mem_cgroup *root_memcg);
>  
>  #else
> -static inline void drain_obj_stock(struct memcg_stock_pcp *stock)
> +static inline void drain_obj_stock(struct obj_stock *stock)
>  {
>  }
>  static bool obj_stock_flush_required(struct memcg_stock_pcp *stock,
> @@ -2249,6 +2254,13 @@ static bool obj_stock_flush_required(struct 
> memcg_stock_pcp *stock,
>  }
>  #endif
>  
> +static inline struct obj_stock *current_obj_stock(void)
> +{
> + struct memcg_stock_pcp *stock = this_cpu_ptr(_stock);
> +
> + return >obj;
> +}
> +
>  /**
>   * consume_stock: Try to consume stocked charge on this cpu.
>   * @memcg: memcg to consume from.
> @@ -2315,7 +2327,7 @@ static void drain_local_stock(struct work_struct *dummy)
>   local_irq_save(flags);
>  
>   stock = this_cpu_ptr(_stock);
> - drain_obj_stock(stock);
> + drain_obj_stock(>obj);
>   drain_stock(stock);
>   clear_bit(FLUSHING_CACHED_CHARGE, >flags);
>  
> @@ -3177,13 +3189,13 @@ static inline void mod_objcg_state(struct obj_cgroup 
> *objcg,
>  
>  static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int 
> nr_bytes)
>  {
> - struct memcg_stock_pcp *stock;
> + struct obj_stock *stock;
>   unsigned long flags;
>   bool ret = false;
>  
>   local_irq_save(flags);
>  
> - stock = this_cpu_ptr(_stock);
> + stock = current_obj_stock();
>   if (objcg == stock->cached_objcg && stock->nr_bytes >= nr_bytes) {
>   stock->nr_bytes -= nr_bytes;
>   ret = true;
> @@ -3194,7 +3206,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, 
> unsigned int nr_bytes)
>   return ret;
>  }
>  
> -static void drain_obj_stock(struct memcg_stock_pcp *stock)
> +static void drain_obj_stock(struct obj_stock *stock)
>  {
>   struct obj_cgroup *old = stock->cached_objcg;
>  
> @@ -3242,8 +3254,8 @@ static bool obj_stock_flush_required(struct 
> memcg_stock_pcp *stock,
>  {
>   struct mem_cgroup *memcg;
>  
> - if (stock->cached_objcg) {
> - memcg = obj_cgroup_memcg(stock->cached_objcg);
> + if (stock->obj.cached_objcg) {
> + memcg = obj_cgroup_memcg(stock->obj.cached_objcg);
>   if (memcg && mem_cgroup_is_descendant(memcg, root_memcg))
>   return true;
>   }
> @@ -3253,9 +3265,8 @@ static bool obj_stock_flush_required(struct 
> memcg_stock_pcp *stock,
>  
>  static void __refill_obj_stock(struct obj_cgroup *objcg, unsigned int 
> nr_bytes)
>  {
> - struct memcg_stock_pcp *stock;
> + struct obj_stock *stock = current_obj_stock();
>  
> - stock = this_cpu_ptr(_stock);
>   if (stock->cached_objcg != objcg) { /* reset if necessary */
>   drain_obj_stock(stock);
>   obj_cgroup_get(objcg);
> @@ -3280,7 +3291,7 @@ static void refill_obj_stock(struct obj_cgroup *objcg, 
> unsigned int nr_bytes)
>  static void __mod_obj_stock_state(struct obj_cgroup *objcg,
> struct pglist_data *pgdat, int idx, int nr)
>  {
> - struct memcg_stock_pcp *stock = this_cpu_ptr(_stock);
> + struct obj_stock *stock = current_obj_stock();
>  
>   if (stock->cached_objcg != objcg) {
>   /* Output the current data as is */
> -- 
> 2.18.1
> 
Please

Re: [PATCH v3 3/5] mm/memcg: Cache vmstat data in percpu memcg_stock_pcp

2021-04-14 Thread Masayoshi Mizuma

On Tue, Apr 13, 2021 at 09:20:25PM -0400, Waiman Long wrote:
> Before the new slab memory controller with per object byte charging,
> charging and vmstat data update happen only when new slab pages are
> allocated or freed. Now they are done with every kmem_cache_alloc()
> and kmem_cache_free(). This causes additional overhead for workloads
> that generate a lot of alloc and free calls.
> 
> The memcg_stock_pcp is used to cache byte charge for a specific
> obj_cgroup to reduce that overhead. To further reducing it, this patch
> makes the vmstat data cached in the memcg_stock_pcp structure as well
> until it accumulates a page size worth of update or when other cached
> data change.
> 
> On a 2-socket Cascade Lake server with instrumentation enabled and this
> patch applied, it was found that about 17% (946796 out of 5515184) of the
> time when __mod_obj_stock_state() is called leads to an actual call to
> mod_objcg_state() after initial boot. When doing parallel kernel build,
> the figure was about 16% (21894614 out of 139780628). So caching the
> vmstat data reduces the number of calls to mod_objcg_state() by more
> than 80%.
> 
> Signed-off-by: Waiman Long 
> Reviewed-by: Shakeel Butt 
> ---
>  mm/memcontrol.c | 78 +++--
>  mm/slab.h   | 26 +++--
>  2 files changed, 79 insertions(+), 25 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index b19100c68aa0..539c3b632e47 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2220,7 +2220,10 @@ struct memcg_stock_pcp {
>  
>  #ifdef CONFIG_MEMCG_KMEM
>   struct obj_cgroup *cached_objcg;
> + struct pglist_data *cached_pgdat;
>   unsigned int nr_bytes;
> + int vmstat_idx;
> + int vmstat_bytes;
>  #endif
>  
>   struct work_struct work;
> @@ -3157,6 +3160,21 @@ void __memcg_kmem_uncharge_page(struct page *page, int 
> order)
>   css_put(>css);
>  }
>  
> +static inline void mod_objcg_state(struct obj_cgroup *objcg,
> +struct pglist_data *pgdat,
> +enum node_stat_item idx, int nr)
> +{
> + struct mem_cgroup *memcg;
> + struct lruvec *lruvec = NULL;
> +
> + rcu_read_lock();
> + memcg = obj_cgroup_memcg(objcg);
> + if (pgdat)
> + lruvec = mem_cgroup_lruvec(memcg, pgdat);
> + __mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
> + rcu_read_unlock();
> +}
> +
>  static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int 
> nr_bytes)
>  {
>   struct memcg_stock_pcp *stock;
> @@ -3207,6 +3225,14 @@ static void drain_obj_stock(struct memcg_stock_pcp 
> *stock)
>   stock->nr_bytes = 0;
>   }
>  
> + if (stock->vmstat_bytes) {
> + mod_objcg_state(old, stock->cached_pgdat, stock->vmstat_idx,
> + stock->vmstat_bytes);
> + stock->vmstat_bytes = 0;
> + stock->vmstat_idx = 0;
> + stock->cached_pgdat = NULL;
> + }
> +
>   obj_cgroup_put(old);
>   stock->cached_objcg = NULL;
>  }
> @@ -3251,6 +3277,48 @@ static void refill_obj_stock(struct obj_cgroup *objcg, 
> unsigned int nr_bytes)
>   local_irq_restore(flags);
>  }
>  
> +static void __mod_obj_stock_state(struct obj_cgroup *objcg,
> +   struct pglist_data *pgdat, int idx, int nr)
> +{
> + struct memcg_stock_pcp *stock = this_cpu_ptr(_stock);
> +
> + if (stock->cached_objcg != objcg) {
> + /* Output the current data as is */
> + } else if (!stock->vmstat_bytes) {
> + /* Save the current data */
> + stock->vmstat_bytes = nr;
> + stock->vmstat_idx = idx;
> + stock->cached_pgdat = pgdat;
> + nr = 0;
> + } else if ((stock->cached_pgdat != pgdat) ||
> +(stock->vmstat_idx != idx)) {
> + /* Output the cached data & save the current data */
> + swap(nr, stock->vmstat_bytes);
> + swap(idx, stock->vmstat_idx);
> + swap(pgdat, stock->cached_pgdat);
> + } else {
> + stock->vmstat_bytes += nr;
> + if (abs(nr) > PAGE_SIZE) {
> + nr = stock->vmstat_bytes;
> + stock->vmstat_bytes = 0;
> + } else {
> + nr = 0;
> + }
> + }
> + if (nr)
> + mod_objcg_state(objcg, pgdat, idx, nr);
> +}
> +
> +void mod_obj_stock_state(struct obj_cgroup *objcg, struct pglist_data *pgdat,
> +  int idx, int nr)
> +{
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + __mod_obj_stock_state(objcg, pgdat, idx, nr);
> + local_irq_restore(flags);
> +}
> +
>  int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
>  {
>   struct mem_cgroup *memcg;
> @@ -3300,18 +3368,10 @@ void obj_cgroup_uncharge_mod_state(struct obj_cgroup 
> *objcg, size_t size,
>

Re: [PATCH v3 2/5] mm/memcg: Introduce obj_cgroup_uncharge_mod_state()

2021-04-14 Thread Masayoshi Mizuma

On Tue, Apr 13, 2021 at 09:20:24PM -0400, Waiman Long wrote:
> In memcg_slab_free_hook()/pcpu_memcg_free_hook(), obj_cgroup_uncharge()
> is followed by mod_objcg_state()/mod_memcg_state(). Each of these
> function call goes through a separate irq_save/irq_restore cycle. That
> is inefficient.  Introduce a new function obj_cgroup_uncharge_mod_state()
> that combines them with a single irq_save/irq_restore cycle.
> 
> Signed-off-by: Waiman Long 
> Reviewed-by: Shakeel Butt 
> Acked-by: Roman Gushchin 
> ---
>  include/linux/memcontrol.h |  2 ++
>  mm/memcontrol.c| 31 +++
>  mm/percpu.c|  9 ++---
>  mm/slab.h  |  6 +++---
>  4 files changed, 34 insertions(+), 14 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 95f12996e66c..6890f999c1a3 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -1592,6 +1592,8 @@ struct obj_cgroup *get_obj_cgroup_from_current(void);
>  
>  int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size);
>  void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size);
> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> +struct pglist_data *pgdat, int idx);
>  
>  extern struct static_key_false memcg_kmem_enabled_key;
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d66e1e38f8ac..b19100c68aa0 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3225,12 +3225,9 @@ static bool obj_stock_flush_required(struct 
> memcg_stock_pcp *stock,
>   return false;
>  }
>  
> -static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> +static void __refill_obj_stock(struct obj_cgroup *objcg, unsigned int 
> nr_bytes)
>  {
>   struct memcg_stock_pcp *stock;
> - unsigned long flags;
> -
> - local_irq_save(flags);
>  
>   stock = this_cpu_ptr(_stock);
>   if (stock->cached_objcg != objcg) { /* reset if necessary */
> @@ -3243,7 +3240,14 @@ static void refill_obj_stock(struct obj_cgroup *objcg, 
> unsigned int nr_bytes)
>  
>   if (stock->nr_bytes > PAGE_SIZE)
>   drain_obj_stock(stock);
> +}
> +
> +static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
> +{
> + unsigned long flags;
>  
> + local_irq_save(flags);
> + __refill_obj_stock(objcg, nr_bytes);
>   local_irq_restore(flags);
>  }
>  
> @@ -3292,6 +3296,25 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, 
> size_t size)
>   refill_obj_stock(objcg, size);
>  }
>  
> +void obj_cgroup_uncharge_mod_state(struct obj_cgroup *objcg, size_t size,
> +struct pglist_data *pgdat, int idx)
> +{
> + unsigned long flags;
> + struct mem_cgroup *memcg;
> + struct lruvec *lruvec = NULL;
> +
> + local_irq_save(flags);
> + __refill_obj_stock(objcg, size);
> +
> + rcu_read_lock();
> + memcg = obj_cgroup_memcg(objcg);
> + if (pgdat)
> + lruvec = mem_cgroup_lruvec(memcg, pgdat);
> + __mod_memcg_lruvec_state(memcg, lruvec, idx, -(int)size);
> + rcu_read_unlock();
> + local_irq_restore(flags);
> +}
> +
>  #endif /* CONFIG_MEMCG_KMEM */
>  
>  /*
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 23308113a5ff..fd7aad6d7f90 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1631,13 +1631,8 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk 
> *chunk, int off, size_t size)
>   objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT];
>   chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL;
>  
> - obj_cgroup_uncharge(objcg, size * num_possible_cpus());
> -
> - rcu_read_lock();
> - mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_PERCPU_B,
> - -(size * num_possible_cpus()));
> - rcu_read_unlock();
> -
> + obj_cgroup_uncharge_mod_state(objcg, size * num_possible_cpus(),
> +   NULL, MEMCG_PERCPU_B);
>   obj_cgroup_put(objcg);
>  }
>  
> diff --git a/mm/slab.h b/mm/slab.h
> index bc6c7545e487..677cdc52e641 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -366,9 +366,9 @@ static inline void memcg_slab_free_hook(struct kmem_cache 
> *s_orig,
>   continue;
>  
>   objcgs[off] = NULL;
> - obj_cgroup_uncharge(objcg, obj_full_size(s));
> - mod_objcg_state(objcg, page_pgdat(page), cache_vmstat_idx(s),
> - -obj_full_size(s));
> + obj_cgroup_uncharge_mod_state(objcg, obj_full_size(s),
> +   page_pgdat(page),
> +   cache_vmstat_idx(s));
>   obj_cgroup_put(objcg);
>   }
>  }
> -- 
> 2.18.1
> 

Please feel free to add:

Tested-by: Masayoshi Mizuma 

Thanks!
Masa

Re: [PATCH v3 1/5] mm/memcg: Pass both memcg and lruvec to mod_memcg_lruvec_state()

2021-04-14 Thread Masayoshi Mizuma

On Tue, Apr 13, 2021 at 09:20:23PM -0400, Waiman Long wrote:
> The caller of mod_memcg_lruvec_state() has both memcg and lruvec readily
> available. So both of them are now passed to mod_memcg_lruvec_state()
> and __mod_memcg_lruvec_state(). The __mod_memcg_lruvec_state() is
> updated to allow either of the two parameters to be set to null. This
> makes mod_memcg_lruvec_state() equivalent to mod_memcg_state() if lruvec
> is null.
> 
> The new __mod_memcg_lruvec_state() function will be used in the next
> patch as a replacement of mod_memcg_state() in mm/percpu.c for the
> consolidation of the memory uncharge and vmstat update functions in
> the kmem_cache_free() path.
> 
> Signed-off-by: Waiman Long 
> Acked-by: Roman Gushchin 
> Reviewed-by: Shakeel Butt 
> ---
>  include/linux/memcontrol.h | 12 +++-
>  mm/memcontrol.c| 19 +--
>  mm/slab.h  |  2 +-
>  3 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 0c04d39a7967..95f12996e66c 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -955,8 +955,8 @@ static inline unsigned long 
> lruvec_page_state_local(struct lruvec *lruvec,
>   return x;
>  }
>  
> -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
> -   int val);
> +void __mod_memcg_lruvec_state(struct mem_cgroup *memcg, struct lruvec 
> *lruvec,
> +   enum node_stat_item idx, int val);
>  void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
>  
>  static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
> @@ -969,13 +969,14 @@ static inline void mod_lruvec_kmem_state(void *p, enum 
> node_stat_item idx,
>   local_irq_restore(flags);
>  }
>  
> -static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
> +static inline void mod_memcg_lruvec_state(struct mem_cgroup *memcg,
> +   struct lruvec *lruvec,
> enum node_stat_item idx, int val)
>  {
>   unsigned long flags;
>  
>   local_irq_save(flags);
> - __mod_memcg_lruvec_state(lruvec, idx, val);
> + __mod_memcg_lruvec_state(memcg, lruvec, idx, val);
>   local_irq_restore(flags);
>  }
>  
> @@ -1369,7 +1370,8 @@ static inline unsigned long 
> lruvec_page_state_local(struct lruvec *lruvec,
>   return node_page_state(lruvec_pgdat(lruvec), idx);
>  }
>  
> -static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
> +static inline void __mod_memcg_lruvec_state(struct mem_cgroup *memcg,
> + struct lruvec *lruvec,
>   enum node_stat_item idx, int val)
>  {
>  }
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index e064ac0d850a..d66e1e38f8ac 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -799,20 +799,27 @@ parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
>   return mem_cgroup_nodeinfo(parent, nid);
>  }
>  
> -void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
> -   int val)
> +/*
> + * Either one of memcg or lruvec can be NULL, but not both.
> + */
> +void __mod_memcg_lruvec_state(struct mem_cgroup *memcg, struct lruvec 
> *lruvec,
> +   enum node_stat_item idx, int val)
>  {
>   struct mem_cgroup_per_node *pn;
> - struct mem_cgroup *memcg;
>   long x, threshold = MEMCG_CHARGE_BATCH;
>  
> + /* Update lruvec */
>   pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> - memcg = pn->memcg;
> +
> + if (!memcg)
> + memcg = pn->memcg;
>  
>   /* Update memcg */
>   __mod_memcg_state(memcg, idx, val);
>  
> - /* Update lruvec */
> + if (!lruvec)
> + return;
> +
>   __this_cpu_add(pn->lruvec_stat_local->count[idx], val);
>  
>   if (vmstat_item_in_bytes(idx))
> @@ -848,7 +855,7 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum 
> node_stat_item idx,
>  
>   /* Update memcg and lruvec */
>   if (!mem_cgroup_disabled())
> - __mod_memcg_lruvec_state(lruvec, idx, val);
> + __mod_memcg_lruvec_state(NULL, lruvec, idx, val);
>  }
>  
>  void __mod_lruvec_page_state(struct page *page, enum node_stat_item idx,
> diff --git a/mm/slab.h b/mm/slab.h
> index 076582f58f68..bc6c7545e487 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -293,7 +293,7 @@ static inline void mod_objcg_state(struct obj_cgroup 
> *objcg,
>   rcu_read_lock();
>   memcg = obj_cgroup_memcg(objcg);
>   lruvec = mem_cgroup_lruvec(memcg, pgdat);
> - mod_memcg_lruvec_state(lruvec, idx, nr);
> + mod_memcg_lruvec_state(memcg, lruvec, idx, nr);
>   rcu_read_unlock();
>  }
>  
> -- 
> 2.18.1
> 

Please feel free to add:

Tested-by: Masayoshi Mizuma 

Thanks!
Masa

Re: [PATCH v3 0/5] mm/memcg: Reduce kmemcache memory accounting overhead

2021-04-14 Thread Masayoshi Mizuma

On Tue, Apr 13, 2021 at 09:20:22PM -0400, Waiman Long wrote:
>  v3:
>   - Add missing "inline" qualifier to the alternate mod_obj_stock_state()
> in patch 3.
>   - Remove redundant current_obj_stock() call in patch 5.
> 
>  v2:
>   - Fix bug found by test robot in patch 5.
>   - Update cover letter and commit logs.
> 
> With the recent introduction of the new slab memory controller, we
> eliminate the need for having separate kmemcaches for each memory
> cgroup and reduce overall kernel memory usage. However, we also add
> additional memory accounting overhead to each call of kmem_cache_alloc()
> and kmem_cache_free().
> 
> For workloads that require a lot of kmemcache allocations and
> de-allocations, they may experience performance regression as illustrated
> in [1] and [2].
> 
> A simple kernel module that performs repeated loop of 100,000,000
> kmem_cache_alloc() and kmem_cache_free() of a 64-byte object at module
> init time is used for benchmarking. The test was run on a CascadeLake
> server with turbo-boosting disable to reduce run-to-run variation.
> 
> With memory accounting disable, the run time was 2.848s. With memory
> accounting enabled, the run times with the application of various
> patches in the patchset were:
> 
>   Applied patches   Run time   Accounting overhead   Overhead %age
>   ---      ---   -
>None  10.800s 7.952s  100.0%
> 1-2   9.140s 6.292s   79.1%
> 1-3   7.641s 4.793s   60.3%
> 1-5   6.801s 3.953s   49.7%
> 
> Note that this is the best case scenario where most updates happen only
> to the percpu stocks. Real workloads will likely have a certain amount
> of updates to the memcg charges and vmstats. So the performance benefit
> will be less.
> 
> It was found that a big part of the memory accounting overhead
> was caused by the local_irq_save()/local_irq_restore() sequences in
> updating local stock charge bytes and vmstat array, at least in x86
> systems. There are two such sequences in kmem_cache_alloc() and two
> in kmem_cache_free(). This patchset tries to reduce the use of such
> sequences as much as possible. In fact, it eliminates them in the common
> case. Another part of this patchset to cache the vmstat data update in
> the local stock as well which also helps.
> 
> [1] 
> https://lore.kernel.org/linux-mm/20210408193948.vfktg3azh2wrt56t@gabell/T/#u

Hi Longman,

Thank you for your patches.
I rerun the benchmark with your patches, it seems that the reduction
is small... The total duration of sendto() and recvfrom() system call 
during the benchmark are as follows.

- sendto
  - v5.8 vanilla:  2576.056 msec (100%)
  - v5.12-rc7 vanilla: 2988.911 msec (116%)
  - v5.12-rc7 with your patches (1-5): 2984.307 msec (115%)

- recvfrom
  - v5.8 vanilla:  2113.156 msec (100%)
  - v5.12-rc7 vanilla: 2305.810 msec (109%)
  - v5.12-rc7 with your patches (1-5): 2287.351 msec (108%)

kmem_cache_alloc()/kmem_cache_free() are called around 1,400,000 times during
the benchmark. I ran a loop in a kernel module as following. The duration
is reduced by your patches actually.

  ---
  dummy_cache = KMEM_CACHE(dummy, SLAB_ACCOUNT);
  for (i = 0; i < 140; i++) {
p = kmem_cache_alloc(dummy_cache, GFP_KERNEL);
kmem_cache_free(dummy_cache, p);
  }
  ---

- v5.12-rc7 vanilla: 110 msec (100%)
- v5.12-rc7 with your patches (1-5):  85 msec (77%)

It seems that the reduction is small for the benchmark though...
Anyway, I can see your patches reduce the overhead.
Please feel free to add:

Tested-by: Masayoshi Mizuma 

Thanks!
Masa

> [2] https://lore.kernel.org/lkml/20210114025151.GA22932@xsang-OptiPlex-9020/
> 
> Waiman Long (5):
>   mm/memcg: Pass both memcg and lruvec to mod_memcg_lruvec_state()
>   mm/memcg: Introduce obj_cgroup_uncharge_mod_state()
>   mm/memcg: Cache vmstat data in percpu memcg_stock_pcp
>   mm/memcg: Separate out object stock data into its own struct
>   mm/memcg: Optimize user context object stock access
> 
>  include/linux/memcontrol.h |  14 ++-
>  mm/memcontrol.c| 199 -
>  mm/percpu.c|   9 +-
>  mm/slab.h  |  32 +++---
>  4 files changed, 196 insertions(+), 58 deletions(-)
> 
> -- 
> 2.18.1
>

[PATCH] power: reset: Remove unneeded semicolon

2021-04-14 Thread Wan Jiabing

Fix the following coccicheck warning:

./drivers/power/reset/vexpress-poweroff.c:136:2-3: Unneeded semicolon

Signed-off-by: Wan Jiabing 
---
 drivers/power/reset/vexpress-poweroff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/reset/vexpress-poweroff.c 
b/drivers/power/reset/vexpress-poweroff.c
index 1fdbcbd95fc2..447ffdacddf9 100644
--- a/drivers/power/reset/vexpress-poweroff.c
+++ b/drivers/power/reset/vexpress-poweroff.c
@@ -133,7 +133,7 @@ static int vexpress_reset_probe(struct platform_device 
*pdev)
case FUNC_REBOOT:
ret = _vexpress_register_restart_handler(>dev);
break;
-   };
+   }
 
return ret;
 }
-- 
2.25.1

Re: [PATCH v2 4/4] dt-bindings: add dasheng vendor prefix

2021-04-14 Thread dillon min

On Wed, Apr 14, 2021 at 9:01 PM Krzysztof Kozlowski
 wrote:
>
> On 14/04/2021 08:51, dillon.min...@gmail.com wrote:
> > From: dillon min 
> >
> > Add vendor prefix for DaSheng, Inc.
> >
> > Signed-off-by: dillon min 
> > ---
> > v2: new add
> >
> >  Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
> >  1 file changed, 2 insertions(+)
>
> This should be the first patch in the series.
OK, will place this patch at v3's first. thanks

Best regards
Dillon,
>
> Reviewed-by: Krzysztof Kozlowski 
>
>
> Best regards,
> Krzysztof

[PATCH] mmc: sdhci-pci-gli: Enlarge ASPM L1 entry delay of GL975x

2021-04-14 Thread Ben Chuang

GL975x enters ASPM L1 state after a short idle in default.
Enlarge the idle period to 7.9us for improving the R/W performance.

Signed-off-by: Ben Chuang 
---
 drivers/mmc/host/sdhci-pci-gli.c | 32 
 1 file changed, 32 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c
index eb1ebb67e113..592d79082f58 100644
--- a/drivers/mmc/host/sdhci-pci-gli.c
+++ b/drivers/mmc/host/sdhci-pci-gli.c
@@ -22,6 +22,10 @@
 #define   GLI_9750_WT_EN_ON0x1
 #define   GLI_9750_WT_EN_OFF   0x0
 
+#define SDHCI_GLI_9750_CFG2  0x848
+#define   SDHCI_GLI_9750_CFG2_L1DLYGENMASK(28, 24)
+#define   GLI_9750_CFG2_L1DLY_VALUE0x1F
+
 #define SDHCI_GLI_9750_DRIVING  0x860
 #define   SDHCI_GLI_9750_DRIVING_1GENMASK(11, 0)
 #define   SDHCI_GLI_9750_DRIVING_2GENMASK(27, 26)
@@ -113,6 +117,10 @@
 #define   PCI_GLI_9755_LFCLKGENMASK(14, 12)
 #define   PCI_GLI_9755_DMACLK   BIT(29)
 
+#define PCI_GLI_9755_CFG2  0x48
+#define   PCI_GLI_9755_CFG2_L1DLYGENMASK(28, 24)
+#define   GLI_9755_CFG2_L1DLY_VALUE  0x1F
+
 #define PCI_GLI_9755_PLL0x64
 #define   PCI_GLI_9755_PLL_LDIV   GENMASK(9, 0)
 #define   PCI_GLI_9755_PLL_PDIV   GENMASK(14, 12)
@@ -408,6 +416,22 @@ static void sdhci_gl9750_set_clock(struct sdhci_host 
*host, unsigned int clock)
sdhci_enable_clk(host, clk);
 }
 
+static void gl9750_hw_setting(struct sdhci_host *host)
+{
+   u32 value;
+
+   gl9750_wt_on(host);
+
+   value = sdhci_readl(host, SDHCI_GLI_9750_CFG2);
+   value &= ~SDHCI_GLI_9750_CFG2_L1DLY;
+   /* set ASPM L1 entry delay to 7.9us */
+   value |= FIELD_PREP(SDHCI_GLI_9750_CFG2_L1DLY,
+   GLI_9750_CFG2_L1DLY_VALUE);
+   sdhci_writel(host, value, SDHCI_GLI_9750_CFG2);
+
+   gl9750_wt_off(host);
+}
+
 static void gli_pcie_enable_msi(struct sdhci_pci_slot *slot)
 {
int ret;
@@ -555,6 +579,13 @@ static void gl9755_hw_setting(struct sdhci_pci_slot *slot)
value &= ~PCI_GLI_9755_SCP_DIS;
pci_write_config_dword(pdev, PCI_GLI_9755_SerDes, value);
 
+   pci_read_config_dword(pdev, PCI_GLI_9755_CFG2, );
+   value &= ~PCI_GLI_9755_CFG2_L1DLY;
+   /* set ASPM L1 entry delay to 7.9us */
+   value |= FIELD_PREP(PCI_GLI_9755_CFG2_L1DLY,
+   GLI_9755_CFG2_L1DLY_VALUE);
+   pci_write_config_dword(pdev, PCI_GLI_9755_CFG2, value);
+
gl9755_wt_off(pdev);
 }
 
@@ -562,6 +593,7 @@ static int gli_probe_slot_gl9750(struct sdhci_pci_slot 
*slot)
 {
struct sdhci_host *host = slot->host;
 
+   gl9750_hw_setting(host);
gli_pcie_enable_msi(slot);
slot->host->mmc->caps2 |= MMC_CAP2_NO_SDIO;
sdhci_enable_v4_mode(host);
-- 
2.30.0

Re: [PATCH v4 01/16] perf/x86/intel: Add x86_pmu.pebs_vmx for Ice Lake Servers

2021-04-14 Thread Like Xu


On 2021/4/15 10:49, Liuxiangdong wrote:



On 2021/4/15 9:38, Xu, Like wrote:

On 2021/4/14 22:49, Liuxiangdong wrote:

Hi Like,

On 2021/4/9 16:46, Like Xu wrote:

Hi Liuxiangdong,

On 2021/4/9 16:33, Liuxiangdong (Aven, Cloud Infrastructure Service 
Product Dept.) wrote:

Do you have any comments or ideas about it ?

https://lore.kernel.org/kvm/606e5ef6.2060...@huawei.com/


My expectation is that there may be many fewer PEBS samples
on Skylake without any soft lockup.

You may need to confirm the statement

"All that matters is that the EPT pages don't get
unmapped ever while PEBS is active"

is true in the kernel level.

Try "-overcommit mem-lock=on" for your qemu.



Sorry, in fact, I don't quite understand
"My expectation is that there may be many fewer PEBS samples on Skylake 
without any soft lockup. "


For testcase: perf record -e instructions:pp ./workload

We can get 2242 samples on the ICX guest, but
only 17 samples or less on the Skylake guest.

In my testcase on Skylake, neither the host nor the guest triggered the 
soft lock.




Thanks for your explanation！
Could you please show your complete qemu command and qemu version used on 
Skylake?

I hope I can test it again according to your qemu cmd and version.


A new version is released and you may have a try.

qemu command: "-enable-kvm -cpu host,migratable=no"
qemu base commit: db55d2c9239d445cb7f1fa8ede8e42bd339058f4
kvm base commit: f96be2deac9bca3ef5a2b0b66b71fcef8bad586d

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 63c55f45ca92..727f55400eaf 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5618,6 +5618,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_KABYLAKE:
case INTEL_FAM6_COMETLAKE_L:
case INTEL_FAM6_COMETLAKE:
+   x86_pmu.pebs_vmx = 1;
x86_add_quirk(intel_pebs_isolation_quirk);
x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, 
sizeof(hw_cache_event_ids));

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 100a749251b8..9e37e3dbe3ae 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -150,9 +150,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, 
u32 type,

 * the accuracy of the PEBS profiling result, because the "event 
IP"
 * in the PEBS record is calibrated on the guest side.
 */
-   attr.precise_ip = 1;
-   if (x86_match_cpu(vmx_icl_pebs_cpu) && pmc->idx == 32)
-   attr.precise_ip = 3;
+   attr.precise_ip = x86_match_cpu(vmx_icl_pebs_cpu) ?
+   ((pmc->idx == 32) ? 3 : 1) : ((pmc->idx == 1) ? 3 : 1);
}

event = perf_event_create_kernel_counter(, -1, current,






And, I have used "-overcommit mem-lock=on"  when soft lockup happens.


I misunderstood the use of "mem-lock=on". It is not the same as the
guest mem pin and I believe more kernel patches are needed.




Now, I have tried to configure 1G-hugepages for 2G-mem vm. Each of guest 
numa nodes has 1G mem.
When I use pebs(perf record -e cycles:pp) in guest, there are successful 
pebs samples just for a while and

then I cannot get pebs samples. Host doesn't soft lockup in this process.


In the worst case, no samples are expected.



Are there something wrong on skylake for we can only get a few samples? 
IRQ?  Or using hugepage is not effecitve?


The few samples comes from hardware limitation.
The Skylake doesn't have this "EPT-Friendly PEBS" capabilityand
some PEBS records will be lost when used by guests.



Thanks!




On 2021/4/6 13:14, Xu, Like wrote:

Hi Xiangdong,

On 2021/4/6 11:24, Liuxiangdong (Aven, Cloud Infrastructure Service 
Product Dept.) wrote:

Hi，like.
Some questions about this new pebs patches set：
https://lore.kernel.org/kvm/20210329054137.120994-2-like...@linux.intel.com/ 



The new hardware facility supporting guest PEBS is only available
on Intel Ice Lake Server platforms for now.


Yes, we have documented this "EPT-friendly PEBS" capability in the SDM
18.3.10.1 Processor Event Based Sampling (PEBS) Facility

And again, this patch set doesn't officially support guest PEBS on 
the Skylake.





AFAIK， Icelake supports adaptive PEBS and extended PEBS which 
Skylake doesn't.
But we can still use IA32_PEBS_ENABLE MSR to indicate 
general-purpose counter in Skylake.


For Skylake, only the PMC0-PMC3 are valid for PEBS and you may
mask the other unsupported bits in the pmu->pebs_enable_mask.


Is there anything else that only Icelake supports in this patches set?


The PDIR counter on the Ice Lake is the fixed counter 0
while the PDIR counter on the Sky Lake is the gp counter 1.

You may also expose x86_pmu.pebs_vmx for Skylake in the 1st patch.




Besides, we have tried this patches set in Icelake.  We can use 
pebs(eg: "perf record -e cycles:pp")
when guest is kernel-5.11, but can't when kernel-4.18. Is there a 
minimum

Re: [PATCH v2 1/4] dt-bindings: arm: imx: Add i.mx6q DaSheng COM-9XX SBC board dts support

2021-04-14 Thread dillon min

On Wed, Apr 14, 2021 at 9:00 PM Krzysztof Kozlowski
 wrote:
>
> On 14/04/2021 08:51, dillon.min...@gmail.com wrote:
> > From: dillon min 
> >
> > The DaSheng Com-9xx is and ARM based signle board computer (SBC)
> > featuring:
> > - i.MX6Q
> > - 2GiB LPDDR3 DRAM
> > - 8GiB eMMC 5.0 FLASH
> > - 4MiB SPI Flash
> > - USB 2.0 Host/Device
> > - Multiple multi-protocol RS232/RS485 Serial ports
> > - microSD socket
> > - 5V DC power input
> > - HDMI1.4a,1080p@60
> > - RGMIIx1 Gigabit Ethernet
> > - CSI0x1, connect with ov2659
> >
> > Signed-off-by: dillon min 
> > Cc: Krzysztof Kozlowski 
> > ---
> > v2: no changes
> >
> >  Documentation/devicetree/bindings/arm/fsl.yaml | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/Documentation/devicetree/bindings/arm/fsl.yaml 
> > b/Documentation/devicetree/bindings/arm/fsl.yaml
> > index 297c87f45db8..24bdfbd4853f 100644
> > --- a/Documentation/devicetree/bindings/arm/fsl.yaml
> > +++ b/Documentation/devicetree/bindings/arm/fsl.yaml
> > @@ -206,6 +206,7 @@ properties:
> >- fsl,imx6q-sabreauto
> >- fsl,imx6q-sabrelite
> >- fsl,imx6q-sabresd
> > +  - ds,imx6q-sbc  # Da Sheng COM-9XX Modules
>
> You break here the alphabetical order. Should be after dmo,imx6q-edmqmx6.
Agree， will move to below dmo,imx6q-edmqmx6 in v3, thanks for patient.

Best regards.
Dillon
>
>
> Best regards,
> Krzysztof

[PATCH v5 16/16] KVM: x86/pmu: Expose CPUIDs feature bits PDCM, DS, DTES64

2021-04-14 Thread Like Xu

The CPUID features PDCM, DS and DTES64 are required for PEBS feature.
KVM would expose CPUID feature PDCM, DS and DTES64 to guest when PEBS
is supported in the KVM on the Ice Lake server platforms.

Originally-by: Andi Kleen 
Co-developed-by: Kan Liang 
Signed-off-by: Kan Liang 
Co-developed-by: Luwei Kang 
Signed-off-by: Luwei Kang 
Signed-off-by: Like Xu 
---
 arch/x86/kvm/vmx/capabilities.h | 26 ++
 arch/x86/kvm/vmx/vmx.c  | 15 +++
 2 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index d1d77985e889..241e41221701 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -5,6 +5,7 @@
 #include 
 
 #include "lapic.h"
+#include "pmu.h"
 
 extern bool __read_mostly enable_vpid;
 extern bool __read_mostly flexpriority_enabled;
@@ -378,20 +379,29 @@ static inline bool vmx_pt_mode_is_host_guest(void)
return pt_mode == PT_MODE_HOST_GUEST;
 }
 
-static inline u64 vmx_get_perf_capabilities(void)
+static inline bool vmx_pebs_supported(void)
 {
-   u64 perf_cap = 0;
-
-   if (boot_cpu_has(X86_FEATURE_PDCM))
-   rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap);
-
-   perf_cap &= PMU_CAP_LBR_FMT;
+   return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_vmx;
+}
 
+static inline u64 vmx_get_perf_capabilities(void)
+{
/*
 * Since counters are virtualized, KVM would support full
 * width counting unconditionally, even if the host lacks it.
 */
-   return PMU_CAP_FW_WRITES | perf_cap;
+   u64 perf_cap = PMU_CAP_FW_WRITES;
+   u64 host_perf_cap = 0;
+
+   if (boot_cpu_has(X86_FEATURE_PDCM))
+   rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
+
+   perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+
+   if (vmx_pebs_supported())
+   perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
+
+   return perf_cap;
 }
 
 static inline u64 vmx_supported_debugctl(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5ad12bb76296..e44eb57706e2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2261,6 +2261,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
if (!cpuid_model_is_consistent(vcpu))
return 1;
}
+   if (data & PERF_CAP_PEBS_FORMAT) {
+   if ((data & PERF_CAP_PEBS_MASK) !=
+   (vmx_get_perf_capabilities() & PERF_CAP_PEBS_MASK))
+   return 1;
+   if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
+   return 1;
+   if (!guest_cpuid_has(vcpu, X86_FEATURE_DTES64))
+   return 1;
+   if (!cpuid_model_is_consistent(vcpu))
+   return 1;
+   }
ret = kvm_set_msr_common(vcpu, msr_info);
break;
 
@@ -7287,6 +7298,10 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
if (vmx_pt_mode_is_host_guest())
kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
+   if (vmx_pebs_supported()) {
+   kvm_cpu_cap_check_and_set(X86_FEATURE_DS);
+   kvm_cpu_cap_check_and_set(X86_FEATURE_DTES64);
+   }
 
if (vmx_umip_emulated())
kvm_cpu_cap_set(X86_FEATURE_UMIP);
-- 
2.30.2

[PATCH v5 15/16] KVM: x86/cpuid: Refactor host/guest CPU model consistency check

2021-04-14 Thread Like Xu

For the same purpose, the leagcy intel_pmu_lbr_is_compatible() can be
renamed for reuse by more callers, and remove the comment about LBR
use case can be deleted by the way.

Signed-off-by: Like Xu 
---
 arch/x86/kvm/cpuid.h |  5 +
 arch/x86/kvm/vmx/pmu_intel.c | 12 +---
 arch/x86/kvm/vmx/vmx.c   |  2 +-
 arch/x86/kvm/vmx/vmx.h   |  1 -
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index ded84d244f19..3114ecff8080 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -278,6 +278,11 @@ static inline int guest_cpuid_model(struct kvm_vcpu *vcpu)
return x86_model(best->eax);
 }
 
+static inline bool cpuid_model_is_consistent(struct kvm_vcpu *vcpu)
+{
+   return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
+}
+
 static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
 {
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index fb297ffb5481..31e0e5e7d5a5 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -173,16 +173,6 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu 
*pmu, u32 msr)
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
 }
 
-bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
-{
-   /*
-* As a first step, a guest could only enable LBR feature if its
-* cpu model is the same as the host because the LBR registers
-* would be pass-through to the guest and they're model specific.
-*/
-   return boot_cpu_data.x86_model == guest_cpuid_model(vcpu);
-}
-
 bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
 {
struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
@@ -578,7 +568,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 
nested_vmx_pmu_entry_exit_ctls_update(vcpu);
 
-   if (intel_pmu_lbr_is_compatible(vcpu))
+   if (cpuid_model_is_consistent(vcpu))
x86_perf_get_lbr(_desc->records);
else
lbr_desc->records.nr = 0;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 4f0e35a0cd0f..5ad12bb76296 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2258,7 +2258,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
if ((data & PMU_CAP_LBR_FMT) !=
(vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT))
return 1;
-   if (!intel_pmu_lbr_is_compatible(vcpu))
+   if (!cpuid_model_is_consistent(vcpu))
return 1;
}
ret = kvm_set_msr_common(vcpu, msr_info);
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 1311f67046aa..28a588d83a01 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -97,7 +97,6 @@ union vmx_exit_reason {
 #define vcpu_to_lbr_records(vcpu) (_vmx(vcpu)->lbr_desc.records)
 
 void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
-bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
 bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
 
 int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
-- 
2.30.2

[PATCH v5 14/16] KVM: x86/pmu: Add kvm_pmu_cap to optimize perf_get_x86_pmu_capability

2021-04-14 Thread Like Xu

The information obtained from the interface perf_get_x86_pmu_capability()
doesn't change, so an exportable "struct x86_pmu_capability" is introduced
for all guests in the KVM, and it's initialized before hardware_setup().

Signed-off-by: Like Xu 
---
 arch/x86/kvm/cpuid.c | 24 +++-
 arch/x86/kvm/pmu.c   |  3 +++
 arch/x86/kvm/pmu.h   | 20 
 arch/x86/kvm/vmx/pmu_intel.c | 17 -
 arch/x86/kvm/x86.c   |  9 -
 5 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6bd2f8b830e4..b3c751d425b7 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -680,32 +680,22 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array 
*array, u32 function)
case 9:
break;
case 0xa: { /* Architectural Performance Monitoring */
-   struct x86_pmu_capability cap;
union cpuid10_eax eax;
union cpuid10_edx edx;
 
-   perf_get_x86_pmu_capability();
+   eax.split.version_id = kvm_pmu_cap.version;
+   eax.split.num_counters = kvm_pmu_cap.num_counters_gp;
+   eax.split.bit_width = kvm_pmu_cap.bit_width_gp;
+   eax.split.mask_length = kvm_pmu_cap.events_mask_len;
 
-   /*
-* Only support guest architectural pmu on a host
-* with architectural pmu.
-*/
-   if (!cap.version)
-   memset(, 0, sizeof(cap));
-
-   eax.split.version_id = min(cap.version, 2);
-   eax.split.num_counters = cap.num_counters_gp;
-   eax.split.bit_width = cap.bit_width_gp;
-   eax.split.mask_length = cap.events_mask_len;
-
-   edx.split.num_counters_fixed = min(cap.num_counters_fixed, 
MAX_FIXED_COUNTERS);
-   edx.split.bit_width_fixed = cap.bit_width_fixed;
+   edx.split.num_counters_fixed = kvm_pmu_cap.num_counters_fixed;
+   edx.split.bit_width_fixed = kvm_pmu_cap.bit_width_fixed;
edx.split.anythread_deprecated = 1;
edx.split.reserved1 = 0;
edx.split.reserved2 = 0;
 
entry->eax = eax.full;
-   entry->ebx = cap.events_mask;
+   entry->ebx = kvm_pmu_cap.events_mask;
entry->ecx = 0;
entry->edx = edx.full;
break;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 666a5e90a3cb..4798bf991b60 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -19,6 +19,9 @@
 #include "lapic.h"
 #include "pmu.h"
 
+struct x86_pmu_capability __read_mostly kvm_pmu_cap;
+EXPORT_SYMBOL_GPL(kvm_pmu_cap);
+
 /* This is enough to filter the vast majority of currently defined events. */
 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
 
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 6c902b2d2d5a..e945cf604c13 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -160,6 +160,24 @@ static inline bool pmc_speculative_in_use(struct kvm_pmc 
*pmc)
return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
 }
 
+extern struct x86_pmu_capability kvm_pmu_cap;
+
+static inline void kvm_init_pmu_capability(void)
+{
+   perf_get_x86_pmu_capability(_pmu_cap);
+
+   /*
+* Only support guest architectural pmu on
+* a host with architectural pmu.
+*/
+   if (!kvm_pmu_cap.version)
+   memset(_pmu_cap, 0, sizeof(kvm_pmu_cap));
+
+   kvm_pmu_cap.version = min(kvm_pmu_cap.version, 2);
+   kvm_pmu_cap.num_counters_fixed = min(kvm_pmu_cap.num_counters_fixed,
+MAX_FIXED_COUNTERS);
+}
+
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
 void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
@@ -177,9 +195,11 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu);
 void kvm_pmu_cleanup(struct kvm_vcpu *vcpu);
 void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
+void kvm_init_pmu_capability(void);
 
 bool is_vmware_backdoor_pmc(u32 pmc_idx);
 
 extern struct kvm_pmu_ops intel_pmu_ops;
 extern struct kvm_pmu_ops amd_pmu_ops;
+
 #endif /* __KVM_X86_PMU_H */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 989e7245d790..fb297ffb5481 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -504,8 +504,6 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 {
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct lbr_desc *lbr_desc = vcpu_to_lbr_desc(vcpu);
-
-   struct x86_pmu_capability x86_pmu;
struct kvm_cpuid_entry2 *entry;
union cpuid10_eax eax;
union cpuid10_edx edx;
@@ -532,13 +530,14 @@ static void intel_pmu_refresh(struct kvm_vcpu

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1382 matches

Mail list logo