[PATCH v7 05/10] PCI: Add support for relative addressing in quirk tables
Allow the PCI quirk tables to be emitted in a way that avoids absolute references to the hook functions. This reduces the size of the entries, and, more importantly, makes them invariant under runtime relocation (e.g., for KASLR) Acked-by: Bjorn HelgaasSigned-off-by: Ard Biesheuvel --- drivers/pci/quirks.c | 13 ++--- include/linux/pci.h | 20 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 10684b17d0bd..b6d51b4d5ce1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3556,9 +3556,16 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, f->vendor == (u16) PCI_ANY_ID) && (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) { - calltime = fixup_debug_start(dev, f->hook); - f->hook(dev); - fixup_debug_report(dev, calltime, f->hook); + void (*hook)(struct pci_dev *dev); +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + hook = (void *)((unsigned long)>hook_offset + + f->hook_offset); +#else + hook = f->hook; +#endif + calltime = fixup_debug_start(dev, hook); + hook(dev); + fixup_debug_report(dev, calltime, hook); } } diff --git a/include/linux/pci.h b/include/linux/pci.h index c170c9250c8b..086c3965710b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1792,7 +1792,11 @@ struct pci_fixup { u16 device; /* You can use PCI_ANY_ID here of course */ u32 class; /* You can use PCI_ANY_ID here too */ unsigned int class_shift; /* should be 0, 8, 16 */ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + int hook_offset; +#else void (*hook)(struct pci_dev *dev); +#endif }; enum pci_fixup_pass { @@ -1806,12 +1810,28 @@ enum pci_fixup_pass { pci_fixup_suspend_late, /* pci_device_suspend_late() */ }; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ + class_shift, hook) \ + __ADDRESSABLE(hook) \ + asm(".section " #sec ", \"a\" \n" \ + ".balign16 \n" \ + ".short " #vendor ", " #device " \n" \ + ".long "#class ", " #class_shift " \n" \ + ".long "VMLINUX_SYMBOL_STR(hook) " - . \n" \ + ".previous \n"); +#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,\ + class_shift, hook)\ + __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ + class_shift, hook) +#else /* Anonymous variables would be nice... */ #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \ class_shift, hook)\ static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used \ __attribute__((__section__(#section), aligned((sizeof(void *)\ = { vendor, device, class, class_shift, hook }; +#endif #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class, \ class_shift, hook) \ -- 2.11.0
[PATCH v7 05/10] PCI: Add support for relative addressing in quirk tables
Allow the PCI quirk tables to be emitted in a way that avoids absolute references to the hook functions. This reduces the size of the entries, and, more importantly, makes them invariant under runtime relocation (e.g., for KASLR) Acked-by: Bjorn Helgaas Signed-off-by: Ard Biesheuvel --- drivers/pci/quirks.c | 13 ++--- include/linux/pci.h | 20 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 10684b17d0bd..b6d51b4d5ce1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3556,9 +3556,16 @@ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, f->vendor == (u16) PCI_ANY_ID) && (f->device == dev->device || f->device == (u16) PCI_ANY_ID)) { - calltime = fixup_debug_start(dev, f->hook); - f->hook(dev); - fixup_debug_report(dev, calltime, f->hook); + void (*hook)(struct pci_dev *dev); +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + hook = (void *)((unsigned long)>hook_offset + + f->hook_offset); +#else + hook = f->hook; +#endif + calltime = fixup_debug_start(dev, hook); + hook(dev); + fixup_debug_report(dev, calltime, hook); } } diff --git a/include/linux/pci.h b/include/linux/pci.h index c170c9250c8b..086c3965710b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1792,7 +1792,11 @@ struct pci_fixup { u16 device; /* You can use PCI_ANY_ID here of course */ u32 class; /* You can use PCI_ANY_ID here too */ unsigned int class_shift; /* should be 0, 8, 16 */ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + int hook_offset; +#else void (*hook)(struct pci_dev *dev); +#endif }; enum pci_fixup_pass { @@ -1806,12 +1810,28 @@ enum pci_fixup_pass { pci_fixup_suspend_late, /* pci_device_suspend_late() */ }; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ + class_shift, hook) \ + __ADDRESSABLE(hook) \ + asm(".section " #sec ", \"a\" \n" \ + ".balign16 \n" \ + ".short " #vendor ", " #device " \n" \ + ".long "#class ", " #class_shift " \n" \ + ".long "VMLINUX_SYMBOL_STR(hook) " - . \n" \ + ".previous \n"); +#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,\ + class_shift, hook)\ + __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \ + class_shift, hook) +#else /* Anonymous variables would be nice... */ #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class, \ class_shift, hook)\ static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) __used \ __attribute__((__section__(#section), aligned((sizeof(void *)\ = { vendor, device, class, class_shift, hook }; +#endif #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class, \ class_shift, hook) \ -- 2.11.0
Re: [RESEND PATCH v2 08/15] ASoC: qcom: q6asm: add support to audio stream apis
On Thu 14 Dec 09:33 PST 2017, srinivas.kandaga...@linaro.org wrote: > From: Srinivas Kandagatla> > This patch adds support to open, write and media format commands > in the q6asm module. > > Signed-off-by: Srinivas Kandagatla > --- > sound/soc/qcom/qdsp6/q6asm.c | 530 > ++- > sound/soc/qcom/qdsp6/q6asm.h | 42 > 2 files changed, 571 insertions(+), 1 deletion(-) > > diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c > index 4be92441f524..dabd6509ef99 100644 > --- a/sound/soc/qcom/qdsp6/q6asm.c > +++ b/sound/soc/qcom/qdsp6/q6asm.c > @@ -8,16 +8,34 @@ > #include > #include > #include > +#include > #include > #include > #include > #include "q6asm.h" > #include "common.h" > > +#define ASM_STREAM_CMD_CLOSE 0x00010BCD > +#define ASM_STREAM_CMD_FLUSH 0x00010BCE > +#define ASM_SESSION_CMD_PAUSE0x00010BD3 > +#define ASM_DATA_CMD_EOS 0x00010BDB > +#define DEFAULT_POPP_TOPOLOGY0x00010BE4 > +#define ASM_STREAM_CMD_FLUSH_READBUFS0x00010C09 > #define ASM_CMD_SHARED_MEM_MAP_REGIONS 0x00010D92 > #define ASM_CMDRSP_SHARED_MEM_MAP_REGIONS0x00010D93 > #define ASM_CMD_SHARED_MEM_UNMAP_REGIONS 0x00010D94 > - > +#define ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2 0x00010D98 > +#define ASM_DATA_EVENT_WRITE_DONE_V2 0x00010D99 > +#define ASM_SESSION_CMD_RUN_V2 0x00010DAA > +#define ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2 0x00010DA5 > +#define ASM_DATA_CMD_WRITE_V20x00010DAB > +#define ASM_SESSION_CMD_SUSPEND 0x00010DEC > +#define ASM_STREAM_CMD_OPEN_WRITE_V3 0x00010DB3 > + > +#define ASM_LEGACY_STREAM_SESSION0 > +#define ASM_END_POINT_DEVICE_MATRIX 0 > +#define DEFAULT_APP_TYPE 0 > +#define TUN_WRITE_IO_MODE0x0008 /* tunnel read write mode */ > #define TUN_READ_IO_MODE 0x0004 /* tunnel read write mode */ > #define SYNC_IO_MODE 0x0001 > #define ASYNC_IO_MODE0x0002 Probably prettier to reorder these and make them Q6ASM_IO_MODE_xyz [..] > > +static int32_t q6asm_callback(struct apr_device *adev, This callback is an extracted part of q6asm_srvc_callback(), can it be given a more descriptive name? > + struct apr_client_data *data, int session_id) > +{ > + struct audio_client *ac;// = (struct audio_client *)priv; > + uint32_t token; > + uint32_t *payload; > + uint32_t wakeup_flag = 1; > + uint32_t client_event = 0; > + struct q6asm *q6asm = dev_get_drvdata(>dev); > + > + if (data == NULL) > + return -EINVAL; > + > + ac = q6asm_get_audio_client(q6asm, session_id); > + if (!q6asm_is_valid_audio_client(ac)) > + return -EINVAL; > + > + payload = data->payload; > + > + if (data->opcode == APR_BASIC_RSP_RESULT) { Move this into the switch. > + token = data->token; > + switch (payload[0]) { This is again that common response struct. > + case ASM_SESSION_CMD_PAUSE: > + client_event = ASM_CLIENT_EVENT_CMD_PAUSE_DONE; > + break; > + case ASM_SESSION_CMD_SUSPEND: > + client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE; > + break; > + case ASM_DATA_CMD_EOS: > + client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE; > + break; > + break; > + case ASM_STREAM_CMD_FLUSH: > + client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE; > + break; > + case ASM_SESSION_CMD_RUN_V2: > + client_event = ASM_CLIENT_EVENT_CMD_RUN_DONE; > + break; > + > + case ASM_STREAM_CMD_FLUSH_READBUFS: > + if (token != ac->session) { > + dev_err(ac->dev, "session invalid\n"); > + return -EINVAL; > + } > + case ASM_STREAM_CMD_CLOSE: > + client_event = ASM_CLIENT_EVENT_CMD_CLOSE_DONE; > + break; > + case ASM_STREAM_CMD_OPEN_WRITE_V3: > + case ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2: > + if (payload[1] != 0) { > + dev_err(ac->dev, > + "cmd = 0x%x returned error = 0x%x\n", > + payload[0], payload[1]); > + if (wakeup_flag) { > + ac->cmd_state = payload[1]; > + wake_up(>cmd_wait); > + } > + return 0; > +
Re: [RESEND PATCH v2 08/15] ASoC: qcom: q6asm: add support to audio stream apis
On Thu 14 Dec 09:33 PST 2017, srinivas.kandaga...@linaro.org wrote: > From: Srinivas Kandagatla > > This patch adds support to open, write and media format commands > in the q6asm module. > > Signed-off-by: Srinivas Kandagatla > --- > sound/soc/qcom/qdsp6/q6asm.c | 530 > ++- > sound/soc/qcom/qdsp6/q6asm.h | 42 > 2 files changed, 571 insertions(+), 1 deletion(-) > > diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c > index 4be92441f524..dabd6509ef99 100644 > --- a/sound/soc/qcom/qdsp6/q6asm.c > +++ b/sound/soc/qcom/qdsp6/q6asm.c > @@ -8,16 +8,34 @@ > #include > #include > #include > +#include > #include > #include > #include > #include "q6asm.h" > #include "common.h" > > +#define ASM_STREAM_CMD_CLOSE 0x00010BCD > +#define ASM_STREAM_CMD_FLUSH 0x00010BCE > +#define ASM_SESSION_CMD_PAUSE0x00010BD3 > +#define ASM_DATA_CMD_EOS 0x00010BDB > +#define DEFAULT_POPP_TOPOLOGY0x00010BE4 > +#define ASM_STREAM_CMD_FLUSH_READBUFS0x00010C09 > #define ASM_CMD_SHARED_MEM_MAP_REGIONS 0x00010D92 > #define ASM_CMDRSP_SHARED_MEM_MAP_REGIONS0x00010D93 > #define ASM_CMD_SHARED_MEM_UNMAP_REGIONS 0x00010D94 > - > +#define ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2 0x00010D98 > +#define ASM_DATA_EVENT_WRITE_DONE_V2 0x00010D99 > +#define ASM_SESSION_CMD_RUN_V2 0x00010DAA > +#define ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2 0x00010DA5 > +#define ASM_DATA_CMD_WRITE_V20x00010DAB > +#define ASM_SESSION_CMD_SUSPEND 0x00010DEC > +#define ASM_STREAM_CMD_OPEN_WRITE_V3 0x00010DB3 > + > +#define ASM_LEGACY_STREAM_SESSION0 > +#define ASM_END_POINT_DEVICE_MATRIX 0 > +#define DEFAULT_APP_TYPE 0 > +#define TUN_WRITE_IO_MODE0x0008 /* tunnel read write mode */ > #define TUN_READ_IO_MODE 0x0004 /* tunnel read write mode */ > #define SYNC_IO_MODE 0x0001 > #define ASYNC_IO_MODE0x0002 Probably prettier to reorder these and make them Q6ASM_IO_MODE_xyz [..] > > +static int32_t q6asm_callback(struct apr_device *adev, This callback is an extracted part of q6asm_srvc_callback(), can it be given a more descriptive name? > + struct apr_client_data *data, int session_id) > +{ > + struct audio_client *ac;// = (struct audio_client *)priv; > + uint32_t token; > + uint32_t *payload; > + uint32_t wakeup_flag = 1; > + uint32_t client_event = 0; > + struct q6asm *q6asm = dev_get_drvdata(>dev); > + > + if (data == NULL) > + return -EINVAL; > + > + ac = q6asm_get_audio_client(q6asm, session_id); > + if (!q6asm_is_valid_audio_client(ac)) > + return -EINVAL; > + > + payload = data->payload; > + > + if (data->opcode == APR_BASIC_RSP_RESULT) { Move this into the switch. > + token = data->token; > + switch (payload[0]) { This is again that common response struct. > + case ASM_SESSION_CMD_PAUSE: > + client_event = ASM_CLIENT_EVENT_CMD_PAUSE_DONE; > + break; > + case ASM_SESSION_CMD_SUSPEND: > + client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE; > + break; > + case ASM_DATA_CMD_EOS: > + client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE; > + break; > + break; > + case ASM_STREAM_CMD_FLUSH: > + client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE; > + break; > + case ASM_SESSION_CMD_RUN_V2: > + client_event = ASM_CLIENT_EVENT_CMD_RUN_DONE; > + break; > + > + case ASM_STREAM_CMD_FLUSH_READBUFS: > + if (token != ac->session) { > + dev_err(ac->dev, "session invalid\n"); > + return -EINVAL; > + } > + case ASM_STREAM_CMD_CLOSE: > + client_event = ASM_CLIENT_EVENT_CMD_CLOSE_DONE; > + break; > + case ASM_STREAM_CMD_OPEN_WRITE_V3: > + case ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2: > + if (payload[1] != 0) { > + dev_err(ac->dev, > + "cmd = 0x%x returned error = 0x%x\n", > + payload[0], payload[1]); > + if (wakeup_flag) { > + ac->cmd_state = payload[1]; > + wake_up(>cmd_wait); > + } > + return 0; > + } > + break; > +
Re: [PATCH 4.9 00/75] 4.9.74-stable review
On Tue, Jan 02, 2018 at 02:11:25PM -0500, Neal Cardwell wrote: > On Tue, Jan 2, 2018 at 1:32 PM, David Millerwrote: > > From: Neal Cardwell > > Date: Tue, 2 Jan 2018 11:57:59 -0500 > > > >> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman > >> wrote: > >>> This is the start of the stable review cycle for the 4.9.74 release. > >>> There are 75 patches in this series, all will be posted as a response > >>> to this one. If anyone has any issues with these being applied, please > >>> let me know. > >>> > >>> Responses should be made by Wed Jan 3 14:00:03 UTC 2018. > >>> Anything received after that time might be too late. > >>> > >>> The whole patch series can be found in one patch at: > >>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz > >>> or in the git tree and branch at: > >>> > >>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > >>> linux-4.9.y > >>> and the diffstat can be found below. > >> > >> Hi Greg, > >> > >> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were > >> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an > >> inventory of the TCP BBR fixes, AFAICT we have: > >> > >> c589e69b508d tcp_bbr: record "full bw reached" decision in new > >> full_bw_reached bit > >> - in 4.9 and 4.14 (great) > >> > >> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo > >> - in 4.14 (but not 4.9) > >> > >> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery > >> undo > >> - in 4.14 (but not 4.9) > >> > >> Lacking the second and third patches in 4.9 will not cause any new > >> problems, but it will miss out on some nice fixes. If it's possible to > >> get 2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I > >> would be very grateful. > > > > These were not straight-forward to backport and I felt the risk outweighed > > the gains. > > > > If you want to do the backport yourself and you feel confident in it, > > feel free. > > Thanks, Greg and David. Looks like these 2 patches will cherry-pick > cleanly if cherry-picked in the following sequence, on top of > 4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw > reached" decision in new full_bw_reached bit"): > > $ git checkout linux-stable-rc/linux-4.9.y > > $ git cherry-pick 2f6c498e4f15 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on > loss recovery undo > Date: Thu Dec 7 12:43:31 2017 -0500 > 1 file changed, 4 insertions(+) > > $ git cherry-pick 600647d467c6 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth > sampling on loss recovery undo > Date: Thu Dec 7 12:43:32 2017 -0500 > 1 file changed, 1 insertion(+) > > $ git log --oneline --decorate | head -3 > 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on > loss recovery undo > 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo > 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1 > > I verified that this compiles without warnings, and boots, and BBR works. > > Shall I prepare another version of these 2 patches, or do we think > this recipe will be sufficient? (Sorry I am not more familiar with the > backport-to-stable process.) That works, those two patches are now queued up for the next stable release, thanks! greg k-h
Re: [PATCH 4.9 00/75] 4.9.74-stable review
On Tue, Jan 02, 2018 at 02:11:25PM -0500, Neal Cardwell wrote: > On Tue, Jan 2, 2018 at 1:32 PM, David Miller wrote: > > From: Neal Cardwell > > Date: Tue, 2 Jan 2018 11:57:59 -0500 > > > >> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman > >> wrote: > >>> This is the start of the stable review cycle for the 4.9.74 release. > >>> There are 75 patches in this series, all will be posted as a response > >>> to this one. If anyone has any issues with these being applied, please > >>> let me know. > >>> > >>> Responses should be made by Wed Jan 3 14:00:03 UTC 2018. > >>> Anything received after that time might be too late. > >>> > >>> The whole patch series can be found in one patch at: > >>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz > >>> or in the git tree and branch at: > >>> > >>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git > >>> linux-4.9.y > >>> and the diffstat can be found below. > >> > >> Hi Greg, > >> > >> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were > >> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an > >> inventory of the TCP BBR fixes, AFAICT we have: > >> > >> c589e69b508d tcp_bbr: record "full bw reached" decision in new > >> full_bw_reached bit > >> - in 4.9 and 4.14 (great) > >> > >> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo > >> - in 4.14 (but not 4.9) > >> > >> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery > >> undo > >> - in 4.14 (but not 4.9) > >> > >> Lacking the second and third patches in 4.9 will not cause any new > >> problems, but it will miss out on some nice fixes. If it's possible to > >> get 2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I > >> would be very grateful. > > > > These were not straight-forward to backport and I felt the risk outweighed > > the gains. > > > > If you want to do the backport yourself and you feel confident in it, > > feel free. > > Thanks, Greg and David. Looks like these 2 patches will cherry-pick > cleanly if cherry-picked in the following sequence, on top of > 4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw > reached" decision in new full_bw_reached bit"): > > $ git checkout linux-stable-rc/linux-4.9.y > > $ git cherry-pick 2f6c498e4f15 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on > loss recovery undo > Date: Thu Dec 7 12:43:31 2017 -0500 > 1 file changed, 4 insertions(+) > > $ git cherry-pick 600647d467c6 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth > sampling on loss recovery undo > Date: Thu Dec 7 12:43:32 2017 -0500 > 1 file changed, 1 insertion(+) > > $ git log --oneline --decorate | head -3 > 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on > loss recovery undo > 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo > 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1 > > I verified that this compiles without warnings, and boots, and BBR works. > > Shall I prepare another version of these 2 patches, or do we think > this recipe will be sufficient? (Sorry I am not more familiar with the > backport-to-stable process.) That works, those two patches are now queued up for the next stable release, thanks! greg k-h
[PATCH v7 08/10] arm64/kernel: jump_label: use relative references
On a randomly chosen distro kernel build for arm64, vmlinux.o shows the following sections, containing jump label entries, and the associated RELA relocation records, respectively: ... [38088] __jump_table PROGBITS 00e19f30 0002ea10 WA 0 0 8 [38089] .rela__jump_table RELA 01fd8bb0 0008be30 0018 I 38178 38088 8 ... In other words, we have 190 KB worth of 'struct jump_entry' instances, and 573 KB worth of RELA entries to relocate each entry's code, target and key members. This means the RELA section occupies 10% of the .init segment, and the two sections combined represent 5% of vmlinux's entire memory footprint. So let's switch from 64-bit absolute references to 32-bit relative references: this reduces the size of the __jump_table by 50%, and gets rid of the RELA section entirely. Note that this requires some extra care in the sorting routine, given that the offsets change when entries are moved around in the jump_entry table. Signed-off-by: Ard Biesheuvel--- arch/arm64/include/asm/jump_label.h | 27 arch/arm64/kernel/jump_label.c | 22 +--- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 9d6e46355c89..8f82adeb7b0b 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -30,8 +30,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" -".align 3\n\t" -".quad 1b, %l[l_yes], %c0\n\t" +".align 2\n\t" +".long 1b - ., %l[l_yes] - ., %c0 - .\n\t" ".popsection\n\t" : : "i"(&((char *)key)[branch]) : : l_yes); @@ -44,8 +44,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool { asm goto("1: b %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" -".align 3\n\t" -".quad 1b, %l[l_yes], %c0\n\t" +".align 2\n\t" +".long 1b - ., %l[l_yes] - ., %c0 - .\n\t" ".popsection\n\t" : : "i"(&((char *)key)[branch]) : : l_yes); @@ -57,19 +57,26 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool typedef u64 jump_label_t; struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; + s32 code; + s32 target; + s32 key; }; static inline jump_label_t jump_entry_code(const struct jump_entry *entry) { - return entry->code; + return (unsigned long)>code + entry->code; +} + +static inline jump_label_t jump_entry_target(const struct jump_entry *entry) +{ + return (unsigned long)>target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { - return (struct static_key *)((unsigned long)entry->key & ~1UL); + unsigned long key = (unsigned long)>key + entry->key; + + return (struct static_key *)(key & ~1UL); } static inline bool jump_entry_is_branch(const struct jump_entry *entry) @@ -87,7 +94,7 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) entry->code = 0; } -#define jump_label_swapNULL +void jump_label_swap(void *a, void *b, int size); #endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index c2dd1ad3e648..2b8e459e91f7 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -25,12 +25,12 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - void *addr = (void *)entry->code; + void *addr = (void *)jump_entry_code(entry); u32 insn; if (type == JUMP_LABEL_JMP) { - insn = aarch64_insn_gen_branch_imm(entry->code, - entry->target, + insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry), + jump_entry_target(entry), AARCH64_INSN_BRANCH_NOLINK); } else { insn = aarch64_insn_gen_nop(); @@ -50,4 +50,20 @@ void arch_jump_label_transform_static(struct jump_entry *entry, */ } +void jump_label_swap(void *a, void *b, int size) +{ + long delta = (unsigned long)a - (unsigned long)b; + struct jump_entry *jea = a; + struct jump_entry *jeb = b; + struct jump_entry tmp = *jea; + + jea->code = jeb->code - delta; +
[PATCH v7 08/10] arm64/kernel: jump_label: use relative references
On a randomly chosen distro kernel build for arm64, vmlinux.o shows the following sections, containing jump label entries, and the associated RELA relocation records, respectively: ... [38088] __jump_table PROGBITS 00e19f30 0002ea10 WA 0 0 8 [38089] .rela__jump_table RELA 01fd8bb0 0008be30 0018 I 38178 38088 8 ... In other words, we have 190 KB worth of 'struct jump_entry' instances, and 573 KB worth of RELA entries to relocate each entry's code, target and key members. This means the RELA section occupies 10% of the .init segment, and the two sections combined represent 5% of vmlinux's entire memory footprint. So let's switch from 64-bit absolute references to 32-bit relative references: this reduces the size of the __jump_table by 50%, and gets rid of the RELA section entirely. Note that this requires some extra care in the sorting routine, given that the offsets change when entries are moved around in the jump_entry table. Signed-off-by: Ard Biesheuvel --- arch/arm64/include/asm/jump_label.h | 27 arch/arm64/kernel/jump_label.c | 22 +--- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 9d6e46355c89..8f82adeb7b0b 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -30,8 +30,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" -".align 3\n\t" -".quad 1b, %l[l_yes], %c0\n\t" +".align 2\n\t" +".long 1b - ., %l[l_yes] - ., %c0 - .\n\t" ".popsection\n\t" : : "i"(&((char *)key)[branch]) : : l_yes); @@ -44,8 +44,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool { asm goto("1: b %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" -".align 3\n\t" -".quad 1b, %l[l_yes], %c0\n\t" +".align 2\n\t" +".long 1b - ., %l[l_yes] - ., %c0 - .\n\t" ".popsection\n\t" : : "i"(&((char *)key)[branch]) : : l_yes); @@ -57,19 +57,26 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool typedef u64 jump_label_t; struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; + s32 code; + s32 target; + s32 key; }; static inline jump_label_t jump_entry_code(const struct jump_entry *entry) { - return entry->code; + return (unsigned long)>code + entry->code; +} + +static inline jump_label_t jump_entry_target(const struct jump_entry *entry) +{ + return (unsigned long)>target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { - return (struct static_key *)((unsigned long)entry->key & ~1UL); + unsigned long key = (unsigned long)>key + entry->key; + + return (struct static_key *)(key & ~1UL); } static inline bool jump_entry_is_branch(const struct jump_entry *entry) @@ -87,7 +94,7 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) entry->code = 0; } -#define jump_label_swapNULL +void jump_label_swap(void *a, void *b, int size); #endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index c2dd1ad3e648..2b8e459e91f7 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -25,12 +25,12 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - void *addr = (void *)entry->code; + void *addr = (void *)jump_entry_code(entry); u32 insn; if (type == JUMP_LABEL_JMP) { - insn = aarch64_insn_gen_branch_imm(entry->code, - entry->target, + insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry), + jump_entry_target(entry), AARCH64_INSN_BRANCH_NOLINK); } else { insn = aarch64_insn_gen_nop(); @@ -50,4 +50,20 @@ void arch_jump_label_transform_static(struct jump_entry *entry, */ } +void jump_label_swap(void *a, void *b, int size) +{ + long delta = (unsigned long)a - (unsigned long)b; + struct jump_entry *jea = a; + struct jump_entry *jeb = b; + struct jump_entry tmp = *jea; + + jea->code = jeb->code - delta; + jea->target =
[PATCH v7 09/10] x86: jump_label: switch to jump_entry accessors
In preparation of switching x86 to use place-relative references for the code, target and key members of struct jump_entry, replace direct references to the struct member with invocations of the new accessors. This will allow us to make the switch by modifying the accessors only. Signed-off-by: Ard Biesheuvel--- arch/x86/kernel/jump_label.c | 43 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e56c95be2808..d64296092ef5 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -52,22 +52,24 @@ static void __jump_label_transform(struct jump_entry *entry, * Jump label is enabled for the first time. * So we expect a default_nop... */ - if (unlikely(memcmp((void *)entry->code, default_nop, 5) -!= 0)) - bug_at((void *)entry->code, __LINE__); + if (unlikely(memcmp((void *)jump_entry_code(entry), + default_nop, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } else { /* * ...otherwise expect an ideal_nop. Otherwise * something went horribly wrong. */ - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) -!= 0)) - bug_at((void *)entry->code, __LINE__); + if (unlikely(memcmp((void *)jump_entry_code(entry), + ideal_nop, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } code.jump = 0xe9; - code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); + code.offset = jump_entry_target(entry) - + (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); } else { /* * We are disabling this jump label. If it is not what @@ -76,14 +78,18 @@ static void __jump_label_transform(struct jump_entry *entry, * are converting the default nop to the ideal nop. */ if (init) { - if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0)) - bug_at((void *)entry->code, __LINE__); + if (unlikely(memcmp((void *)jump_entry_code(entry), + default_nop, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } else { code.jump = 0xe9; - code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); - if (unlikely(memcmp((void *)entry->code, , 5) != 0)) - bug_at((void *)entry->code, __LINE__); + code.offset = jump_entry_target(entry) - + (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + if (unlikely(memcmp((void *)jump_entry_code(entry), +, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } memcpy(, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); } @@ -97,10 +103,13 @@ static void __jump_label_transform(struct jump_entry *entry, * */ if (poker) - (*poker)((void *)entry->code, , JUMP_LABEL_NOP_SIZE); + (*poker)((void *)jump_entry_code(entry), , +JUMP_LABEL_NOP_SIZE); else - text_poke_bp((void *)entry->code, , JUMP_LABEL_NOP_SIZE, -(void *)entry->code + JUMP_LABEL_NOP_SIZE); + text_poke_bp((void *)jump_entry_code(entry), , +JUMP_LABEL_NOP_SIZE, +(void *)jump_entry_code(entry) + +JUMP_LABEL_NOP_SIZE); } void arch_jump_label_transform(struct jump_entry *entry, -- 2.11.0
[PATCH v7 10/10] x86/kernel: jump_table: use relative references
Similar to the arm64 case, 64-bit x86 can benefit from using 32-bit relative references rather than 64-bit absolute ones when emitting struct jump_entry instances. Not only does this reduce the memory footprint of the entries themselves by 50%, it also removes the need for carrying relocation metadata on relocatable builds (i.e., for KASLR) which saves a fair chunk of .init space as well (although the savings are not as dramatic as on arm64) Signed-off-by: Ard Biesheuvel--- arch/x86/include/asm/jump_label.h | 35 arch/x86/kernel/jump_label.c | 16 + tools/objtool/special.c | 4 +-- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 009ff2699d07..35fc2c5ec846 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -36,8 +36,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" + ".balign 4\n\t" + ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t" ".popsection \n\t" : : "i" (key), "i" (branch) : : l_yes); @@ -52,8 +52,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" "2:\n\t" ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" + ".balign 4\n\t" + ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t" ".popsection \n\t" : : "i" (key), "i" (branch) : : l_yes); @@ -69,19 +69,26 @@ typedef u32 jump_label_t; #endif struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; + s32 code; + s32 target; + s32 key; }; static inline jump_label_t jump_entry_code(const struct jump_entry *entry) { - return entry->code; + return (unsigned long)>code + entry->code; +} + +static inline jump_label_t jump_entry_target(const struct jump_entry *entry) +{ + return (unsigned long)>target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { - return (struct static_key *)((unsigned long)entry->key & ~1UL); + unsigned long key = (unsigned long)>key + entry->key; + + return (struct static_key *)(key & ~1UL); } static inline bool jump_entry_is_branch(const struct jump_entry *entry) @@ -99,7 +106,7 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) entry->code = 0; } -#define jump_label_swapNULL +void jump_label_swap(void *a, void *b, int size); #else /* __ASSEMBLY__ */ @@ -114,8 +121,8 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) .byte STATIC_KEY_INIT_NOP .endif .pushsection __jump_table, "aw" - _ASM_ALIGN - _ASM_PTR.Lstatic_jump_\@, \target, \key + .balign 4 + .long .Lstatic_jump_\@ - ., \target - ., \key - . .popsection .endm @@ -130,8 +137,8 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) .Lstatic_jump_after_\@: .endif .pushsection __jump_table, "aw" - _ASM_ALIGN - _ASM_PTR.Lstatic_jump_\@, \target, \key + 1 + .balign 4 + .long .Lstatic_jump_\@ - ., \target - ., \key + 1 - . .popsection .endm diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index d64296092ef5..cc5034b42335 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -149,4 +149,20 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, __jump_label_transform(entry, type, text_poke_early, 1); } +void jump_label_swap(void *a, void *b, int size) +{ + long delta = (unsigned long)a - (unsigned long)b; + struct jump_entry *jea = a; + struct jump_entry *jeb = b; + struct jump_entry tmp = *jea; + + jea->code = jeb->code - delta; + jea->target = jeb->target - delta; + jea->key= jeb->key - delta; + + jeb->code = tmp.code + delta; + jeb->target = tmp.target + delta; + jeb->key= tmp.key + delta; +} + #endif diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 84f001d52322..98ae55b39037 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -30,9 +30,9 @@ #define EX_ORIG_OFFSET 0 #define EX_NEW_OFFSET 4
[PATCH v7 09/10] x86: jump_label: switch to jump_entry accessors
In preparation of switching x86 to use place-relative references for the code, target and key members of struct jump_entry, replace direct references to the struct member with invocations of the new accessors. This will allow us to make the switch by modifying the accessors only. Signed-off-by: Ard Biesheuvel --- arch/x86/kernel/jump_label.c | 43 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index e56c95be2808..d64296092ef5 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -52,22 +52,24 @@ static void __jump_label_transform(struct jump_entry *entry, * Jump label is enabled for the first time. * So we expect a default_nop... */ - if (unlikely(memcmp((void *)entry->code, default_nop, 5) -!= 0)) - bug_at((void *)entry->code, __LINE__); + if (unlikely(memcmp((void *)jump_entry_code(entry), + default_nop, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } else { /* * ...otherwise expect an ideal_nop. Otherwise * something went horribly wrong. */ - if (unlikely(memcmp((void *)entry->code, ideal_nop, 5) -!= 0)) - bug_at((void *)entry->code, __LINE__); + if (unlikely(memcmp((void *)jump_entry_code(entry), + ideal_nop, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } code.jump = 0xe9; - code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); + code.offset = jump_entry_target(entry) - + (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); } else { /* * We are disabling this jump label. If it is not what @@ -76,14 +78,18 @@ static void __jump_label_transform(struct jump_entry *entry, * are converting the default nop to the ideal nop. */ if (init) { - if (unlikely(memcmp((void *)entry->code, default_nop, 5) != 0)) - bug_at((void *)entry->code, __LINE__); + if (unlikely(memcmp((void *)jump_entry_code(entry), + default_nop, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } else { code.jump = 0xe9; - code.offset = entry->target - - (entry->code + JUMP_LABEL_NOP_SIZE); - if (unlikely(memcmp((void *)entry->code, , 5) != 0)) - bug_at((void *)entry->code, __LINE__); + code.offset = jump_entry_target(entry) - + (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + if (unlikely(memcmp((void *)jump_entry_code(entry), +, 5) != 0)) + bug_at((void *)jump_entry_code(entry), + __LINE__); } memcpy(, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE); } @@ -97,10 +103,13 @@ static void __jump_label_transform(struct jump_entry *entry, * */ if (poker) - (*poker)((void *)entry->code, , JUMP_LABEL_NOP_SIZE); + (*poker)((void *)jump_entry_code(entry), , +JUMP_LABEL_NOP_SIZE); else - text_poke_bp((void *)entry->code, , JUMP_LABEL_NOP_SIZE, -(void *)entry->code + JUMP_LABEL_NOP_SIZE); + text_poke_bp((void *)jump_entry_code(entry), , +JUMP_LABEL_NOP_SIZE, +(void *)jump_entry_code(entry) + +JUMP_LABEL_NOP_SIZE); } void arch_jump_label_transform(struct jump_entry *entry, -- 2.11.0
[PATCH v7 10/10] x86/kernel: jump_table: use relative references
Similar to the arm64 case, 64-bit x86 can benefit from using 32-bit relative references rather than 64-bit absolute ones when emitting struct jump_entry instances. Not only does this reduce the memory footprint of the entries themselves by 50%, it also removes the need for carrying relocation metadata on relocatable builds (i.e., for KASLR) which saves a fair chunk of .init space as well (although the savings are not as dramatic as on arm64) Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/jump_label.h | 35 arch/x86/kernel/jump_label.c | 16 + tools/objtool/special.c | 4 +-- 3 files changed, 39 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 009ff2699d07..35fc2c5ec846 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -36,8 +36,8 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:" ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t" ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" + ".balign 4\n\t" + ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t" ".popsection \n\t" : : "i" (key), "i" (branch) : : l_yes); @@ -52,8 +52,8 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" "2:\n\t" ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t" + ".balign 4\n\t" + ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t" ".popsection \n\t" : : "i" (key), "i" (branch) : : l_yes); @@ -69,19 +69,26 @@ typedef u32 jump_label_t; #endif struct jump_entry { - jump_label_t code; - jump_label_t target; - jump_label_t key; + s32 code; + s32 target; + s32 key; }; static inline jump_label_t jump_entry_code(const struct jump_entry *entry) { - return entry->code; + return (unsigned long)>code + entry->code; +} + +static inline jump_label_t jump_entry_target(const struct jump_entry *entry) +{ + return (unsigned long)>target + entry->target; } static inline struct static_key *jump_entry_key(const struct jump_entry *entry) { - return (struct static_key *)((unsigned long)entry->key & ~1UL); + unsigned long key = (unsigned long)>key + entry->key; + + return (struct static_key *)(key & ~1UL); } static inline bool jump_entry_is_branch(const struct jump_entry *entry) @@ -99,7 +106,7 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) entry->code = 0; } -#define jump_label_swapNULL +void jump_label_swap(void *a, void *b, int size); #else /* __ASSEMBLY__ */ @@ -114,8 +121,8 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) .byte STATIC_KEY_INIT_NOP .endif .pushsection __jump_table, "aw" - _ASM_ALIGN - _ASM_PTR.Lstatic_jump_\@, \target, \key + .balign 4 + .long .Lstatic_jump_\@ - ., \target - ., \key - . .popsection .endm @@ -130,8 +137,8 @@ static inline void jump_entry_set_module_init(struct jump_entry *entry) .Lstatic_jump_after_\@: .endif .pushsection __jump_table, "aw" - _ASM_ALIGN - _ASM_PTR.Lstatic_jump_\@, \target, \key + 1 + .balign 4 + .long .Lstatic_jump_\@ - ., \target - ., \key + 1 - . .popsection .endm diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index d64296092ef5..cc5034b42335 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -149,4 +149,20 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, __jump_label_transform(entry, type, text_poke_early, 1); } +void jump_label_swap(void *a, void *b, int size) +{ + long delta = (unsigned long)a - (unsigned long)b; + struct jump_entry *jea = a; + struct jump_entry *jeb = b; + struct jump_entry tmp = *jea; + + jea->code = jeb->code - delta; + jea->target = jeb->target - delta; + jea->key= jeb->key - delta; + + jeb->code = tmp.code + delta; + jeb->target = tmp.target + delta; + jeb->key= tmp.key + delta; +} + #endif diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 84f001d52322..98ae55b39037 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -30,9 +30,9 @@ #define EX_ORIG_OFFSET 0 #define EX_NEW_OFFSET 4 -#define JUMP_ENTRY_SIZE
[PATCH v7 04/10] init: allow initcall tables to be emitted using relative references
Allow the initcall tables to be emitted using relative references that are only half the size on 64-bit architectures and don't require fixups at runtime on relocatable kernels. Cc: Petr MladekCc: Sergey Senozhatsky Cc: Steven Rostedt Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Ard Biesheuvel --- include/linux/init.h | 44 +++- init/main.c| 32 +++--- kernel/printk/printk.c | 4 +- security/security.c| 4 +- 4 files changed, 53 insertions(+), 31 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index ea1b31101d9e..cef8e817e5a5 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -109,8 +109,24 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -extern initcall_t __con_initcall_start[], __con_initcall_end[]; -extern initcall_t __security_initcall_start[], __security_initcall_end[]; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef int initcall_entry_t; + +static inline initcall_t initcall_from_entry(initcall_entry_t *entry) +{ + return (initcall_t)((unsigned long)entry + *entry); +} +#else +typedef initcall_t initcall_entry_t; + +static inline initcall_t initcall_from_entry(initcall_entry_t *entry) +{ + return *entry; +} +#endif + +extern initcall_entry_t __con_initcall_start[], __con_initcall_end[]; +extern initcall_entry_t __security_initcall_start[], __security_initcall_end[]; /* Used for contructor calls. */ typedef void (*ctor_fn_t)(void); @@ -160,9 +176,20 @@ extern bool initcall_debug; * as KEEP() in the linker script. */ -#define __define_initcall(fn, id) \ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define ___define_initcall(fn, id, __sec) \ + __ADDRESSABLE(fn) \ + asm(".section \"" #__sec ".init\", \"a\" \n" \ + "__initcall_" #fn #id ":\n" \ + ".long "VMLINUX_SYMBOL_STR(fn) " - .\n" \ + ".previous \n"); +#else +#define ___define_initcall(fn, id, __sec) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; + __attribute__((__section__(#__sec ".init"))) = fn; +#endif + +#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) /* * Early initcalls run before initializing SMP. @@ -201,13 +228,8 @@ extern bool initcall_debug; #define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.con_initcall.init) = fn - -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.security_initcall.init) = fn +#define console_initcall(fn) ___define_initcall(fn,, .con_initcall) +#define security_initcall(fn) ___define_initcall(fn,, .security_initcall) struct obs_kernel_param { const char *str; diff --git a/init/main.c b/init/main.c index a8100b954839..d81487cc126d 100644 --- a/init/main.c +++ b/init/main.c @@ -848,18 +848,18 @@ int __init_or_module do_one_initcall(initcall_t fn) } -extern initcall_t __initcall_start[]; -extern initcall_t __initcall0_start[]; -extern initcall_t __initcall1_start[]; -extern initcall_t __initcall2_start[]; -extern initcall_t __initcall3_start[]; -extern initcall_t __initcall4_start[]; -extern initcall_t __initcall5_start[]; -extern initcall_t __initcall6_start[]; -extern initcall_t __initcall7_start[]; -extern initcall_t __initcall_end[]; - -static initcall_t *initcall_levels[] __initdata = { +extern initcall_entry_t __initcall_start[]; +extern initcall_entry_t __initcall0_start[]; +extern initcall_entry_t __initcall1_start[]; +extern initcall_entry_t __initcall2_start[]; +extern initcall_entry_t __initcall3_start[]; +extern initcall_entry_t __initcall4_start[]; +extern initcall_entry_t __initcall5_start[]; +extern initcall_entry_t __initcall6_start[]; +extern initcall_entry_t __initcall7_start[]; +extern initcall_entry_t __initcall_end[]; + +static initcall_entry_t *initcall_levels[] __initdata = { __initcall0_start, __initcall1_start, __initcall2_start, @@ -885,7 +885,7 @@ static char *initcall_level_names[] __initdata = { static void __init do_initcall_level(int level) { - initcall_t *fn; + initcall_entry_t *fn; strcpy(initcall_command_line, saved_command_line); parse_args(initcall_level_names[level], @@ -895,7 +895,7 @@ static void __init do_initcall_level(int level) NULL, _env_string); for (fn
[PATCH v7 04/10] init: allow initcall tables to be emitted using relative references
Allow the initcall tables to be emitted using relative references that are only half the size on 64-bit architectures and don't require fixups at runtime on relocatable kernels. Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Steven Rostedt Cc: James Morris Cc: "Serge E. Hallyn" Signed-off-by: Ard Biesheuvel --- include/linux/init.h | 44 +++- init/main.c| 32 +++--- kernel/printk/printk.c | 4 +- security/security.c| 4 +- 4 files changed, 53 insertions(+), 31 deletions(-) diff --git a/include/linux/init.h b/include/linux/init.h index ea1b31101d9e..cef8e817e5a5 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -109,8 +109,24 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -extern initcall_t __con_initcall_start[], __con_initcall_end[]; -extern initcall_t __security_initcall_start[], __security_initcall_end[]; +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +typedef int initcall_entry_t; + +static inline initcall_t initcall_from_entry(initcall_entry_t *entry) +{ + return (initcall_t)((unsigned long)entry + *entry); +} +#else +typedef initcall_t initcall_entry_t; + +static inline initcall_t initcall_from_entry(initcall_entry_t *entry) +{ + return *entry; +} +#endif + +extern initcall_entry_t __con_initcall_start[], __con_initcall_end[]; +extern initcall_entry_t __security_initcall_start[], __security_initcall_end[]; /* Used for contructor calls. */ typedef void (*ctor_fn_t)(void); @@ -160,9 +176,20 @@ extern bool initcall_debug; * as KEEP() in the linker script. */ -#define __define_initcall(fn, id) \ +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define ___define_initcall(fn, id, __sec) \ + __ADDRESSABLE(fn) \ + asm(".section \"" #__sec ".init\", \"a\" \n" \ + "__initcall_" #fn #id ":\n" \ + ".long "VMLINUX_SYMBOL_STR(fn) " - .\n" \ + ".previous \n"); +#else +#define ___define_initcall(fn, id, __sec) \ static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; + __attribute__((__section__(#__sec ".init"))) = fn; +#endif + +#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) /* * Early initcalls run before initializing SMP. @@ -201,13 +228,8 @@ extern bool initcall_debug; #define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.con_initcall.init) = fn - -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.security_initcall.init) = fn +#define console_initcall(fn) ___define_initcall(fn,, .con_initcall) +#define security_initcall(fn) ___define_initcall(fn,, .security_initcall) struct obs_kernel_param { const char *str; diff --git a/init/main.c b/init/main.c index a8100b954839..d81487cc126d 100644 --- a/init/main.c +++ b/init/main.c @@ -848,18 +848,18 @@ int __init_or_module do_one_initcall(initcall_t fn) } -extern initcall_t __initcall_start[]; -extern initcall_t __initcall0_start[]; -extern initcall_t __initcall1_start[]; -extern initcall_t __initcall2_start[]; -extern initcall_t __initcall3_start[]; -extern initcall_t __initcall4_start[]; -extern initcall_t __initcall5_start[]; -extern initcall_t __initcall6_start[]; -extern initcall_t __initcall7_start[]; -extern initcall_t __initcall_end[]; - -static initcall_t *initcall_levels[] __initdata = { +extern initcall_entry_t __initcall_start[]; +extern initcall_entry_t __initcall0_start[]; +extern initcall_entry_t __initcall1_start[]; +extern initcall_entry_t __initcall2_start[]; +extern initcall_entry_t __initcall3_start[]; +extern initcall_entry_t __initcall4_start[]; +extern initcall_entry_t __initcall5_start[]; +extern initcall_entry_t __initcall6_start[]; +extern initcall_entry_t __initcall7_start[]; +extern initcall_entry_t __initcall_end[]; + +static initcall_entry_t *initcall_levels[] __initdata = { __initcall0_start, __initcall1_start, __initcall2_start, @@ -885,7 +885,7 @@ static char *initcall_level_names[] __initdata = { static void __init do_initcall_level(int level) { - initcall_t *fn; + initcall_entry_t *fn; strcpy(initcall_command_line, saved_command_line); parse_args(initcall_level_names[level], @@ -895,7 +895,7 @@ static void __init do_initcall_level(int level) NULL, _env_string); for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++) - do_one_initcall(*fn); +
[PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors
In preparation of allowing architectures to use relative references in jump_label entries [which can dramatically reduce the memory footprint], introduce abstractions for references to the 'code' and 'key' members of struct jump_entry. Signed-off-by: Ard Biesheuvel--- arch/arm/include/asm/jump_label.h | 27 ++ arch/arm64/include/asm/jump_label.h | 27 ++ arch/mips/include/asm/jump_label.h| 27 ++ arch/powerpc/include/asm/jump_label.h | 27 ++ arch/s390/include/asm/jump_label.h| 27 ++ arch/sparc/include/asm/jump_label.h | 27 ++ arch/tile/include/asm/jump_label.h| 27 ++ arch/x86/include/asm/jump_label.h | 27 ++ kernel/jump_label.c | 38 +--- 9 files changed, 232 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc0..7b05b404063a 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -45,5 +45,32 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 1b5e0e843c3a..9d6e46355c89 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -62,5 +62,32 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e77672539e8e..70df9293dc49 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -66,5 +66,32 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #endif /* __ASSEMBLY__ */ #endif /* _ASM_MIPS_JUMP_LABEL_H */ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 9a287e0ac8b1..412b2699c9f6 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -59,6 +59,33 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #else #define ARCH_STATIC_BRANCH(LABEL, KEY) \ 1098: nop;\ diff --git
[PATCH v7 06/10] kernel: tracepoints: add support for relative references
To avoid the need for relocating absolute references to tracepoint structures at boot time when running relocatable kernels (which may take a disproportionate amount of space), add the option to emit these tables as relative references instead. Cc: Ingo MolnarAcked-by: Steven Rostedt (VMware) Signed-off-by: Ard Biesheuvel --- include/linux/tracepoint.h | 19 ++-- kernel/tracepoint.c| 50 +++- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a26ffbe09e71..d02bf1a695e8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -228,6 +228,19 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define __TRACEPOINT_ENTRY(name)\ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4\n" \ + " .long " VMLINUX_SYMBOL_STR(__tracepoint_##name) " - .\n" \ + " .previous\n") +#else +#define __TRACEPOINT_ENTRY(name)\ + static struct tracepoint * const __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration @@ -237,11 +250,9 @@ extern void syscall_unregfunc(void); static const char __tpstrtab_##name[]\ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name\ - __attribute__((section("__tracepoints"))) = \ + __attribute__((section("__tracepoints"), used)) =\ { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name; + __TRACEPOINT_ENTRY(name); #define DEFINE_TRACE(name) \ DEFINE_TRACE_FN(name, NULL, NULL); diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 685c50ae6300..05649fef106c 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -327,6 +327,28 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +static void for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + if (!begin) + return; + + if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { + const int *iter; + + for (iter = (const int *)begin; iter < (const int *)end; iter++) + fct((struct tracepoint *)((unsigned long)iter + *iter), + priv); + } else { + struct tracepoint * const *iter; + + for (iter = begin; iter < end; iter++) + fct(*iter, priv); + } +} + #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { @@ -391,15 +413,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); * Ensure the tracer unregistered the module's probes before the module * teardown is performed. Prevents leaks of probe and data pointers. */ -static void tp_module_going_check_quiescent(struct tracepoint * const *begin, - struct tracepoint * const *end) +static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) { - struct tracepoint * const *iter; - - if (!begin) - return; - for (iter = begin; iter < end; iter++) - WARN_ON_ONCE((*iter)->funcs); + WARN_ON_ONCE(tp->funcs); } static int tracepoint_module_coming(struct module *mod) @@ -450,8 +466,9 @@ static void tracepoint_module_going(struct module *mod) * Called the going notifier before checking for * quiescence. */ - tp_module_going_check_quiescent(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); + for_each_tracepoint_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints, +
[PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors
In preparation of allowing architectures to use relative references in jump_label entries [which can dramatically reduce the memory footprint], introduce abstractions for references to the 'code' and 'key' members of struct jump_entry. Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/jump_label.h | 27 ++ arch/arm64/include/asm/jump_label.h | 27 ++ arch/mips/include/asm/jump_label.h| 27 ++ arch/powerpc/include/asm/jump_label.h | 27 ++ arch/s390/include/asm/jump_label.h| 27 ++ arch/sparc/include/asm/jump_label.h | 27 ++ arch/tile/include/asm/jump_label.h| 27 ++ arch/x86/include/asm/jump_label.h | 27 ++ kernel/jump_label.c | 38 +--- 9 files changed, 232 insertions(+), 22 deletions(-) diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h index e12d7d096fc0..7b05b404063a 100644 --- a/arch/arm/include/asm/jump_label.h +++ b/arch/arm/include/asm/jump_label.h @@ -45,5 +45,32 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 1b5e0e843c3a..9d6e46355c89 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -62,5 +62,32 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #endif /* __ASSEMBLY__ */ #endif /* __ASM_JUMP_LABEL_H */ diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e77672539e8e..70df9293dc49 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -66,5 +66,32 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #endif /* __ASSEMBLY__ */ #endif /* _ASM_MIPS_JUMP_LABEL_H */ diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h index 9a287e0ac8b1..412b2699c9f6 100644 --- a/arch/powerpc/include/asm/jump_label.h +++ b/arch/powerpc/include/asm/jump_label.h @@ -59,6 +59,33 @@ struct jump_entry { jump_label_t key; }; +static inline jump_label_t jump_entry_code(const struct jump_entry *entry) +{ + return entry->code; +} + +static inline struct static_key *jump_entry_key(const struct jump_entry *entry) +{ + return (struct static_key *)((unsigned long)entry->key & ~1UL); +} + +static inline bool jump_entry_is_branch(const struct jump_entry *entry) +{ + return (unsigned long)entry->key & 1UL; +} + +static inline bool jump_entry_is_module_init(const struct jump_entry *entry) +{ + return entry->code == 0; +} + +static inline void jump_entry_set_module_init(struct jump_entry *entry) +{ + entry->code = 0; +} + +#define jump_label_swapNULL + #else #define ARCH_STATIC_BRANCH(LABEL, KEY) \ 1098: nop;\ diff --git
[PATCH v7 06/10] kernel: tracepoints: add support for relative references
To avoid the need for relocating absolute references to tracepoint structures at boot time when running relocatable kernels (which may take a disproportionate amount of space), add the option to emit these tables as relative references instead. Cc: Ingo Molnar Acked-by: Steven Rostedt (VMware) Signed-off-by: Ard Biesheuvel --- include/linux/tracepoint.h | 19 ++-- kernel/tracepoint.c| 50 +++- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a26ffbe09e71..d02bf1a695e8 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -228,6 +228,19 @@ extern void syscall_unregfunc(void); return static_key_false(&__tracepoint_##name.key); \ } +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#define __TRACEPOINT_ENTRY(name)\ + asm(" .section \"__tracepoints_ptrs\", \"a\" \n" \ + " .balign 4\n" \ + " .long " VMLINUX_SYMBOL_STR(__tracepoint_##name) " - .\n" \ + " .previous\n") +#else +#define __TRACEPOINT_ENTRY(name)\ + static struct tracepoint * const __tracepoint_ptr_##name __used \ + __attribute__((section("__tracepoints_ptrs"))) = \ + &__tracepoint_##name +#endif + /* * We have no guarantee that gcc and the linker won't up-align the tracepoint * structures, so we create an array of pointers that will be used for iteration @@ -237,11 +250,9 @@ extern void syscall_unregfunc(void); static const char __tpstrtab_##name[]\ __attribute__((section("__tracepoints_strings"))) = #name; \ struct tracepoint __tracepoint_##name\ - __attribute__((section("__tracepoints"))) = \ + __attribute__((section("__tracepoints"), used)) =\ { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ - static struct tracepoint * const __tracepoint_ptr_##name __used \ - __attribute__((section("__tracepoints_ptrs"))) = \ - &__tracepoint_##name; + __TRACEPOINT_ENTRY(name); #define DEFINE_TRACE(name) \ DEFINE_TRACE_FN(name, NULL, NULL); diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 685c50ae6300..05649fef106c 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -327,6 +327,28 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data) } EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); +static void for_each_tracepoint_range(struct tracepoint * const *begin, + struct tracepoint * const *end, + void (*fct)(struct tracepoint *tp, void *priv), + void *priv) +{ + if (!begin) + return; + + if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) { + const int *iter; + + for (iter = (const int *)begin; iter < (const int *)end; iter++) + fct((struct tracepoint *)((unsigned long)iter + *iter), + priv); + } else { + struct tracepoint * const *iter; + + for (iter = begin; iter < end; iter++) + fct(*iter, priv); + } +} + #ifdef CONFIG_MODULES bool trace_module_has_bad_taint(struct module *mod) { @@ -391,15 +413,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier); * Ensure the tracer unregistered the module's probes before the module * teardown is performed. Prevents leaks of probe and data pointers. */ -static void tp_module_going_check_quiescent(struct tracepoint * const *begin, - struct tracepoint * const *end) +static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv) { - struct tracepoint * const *iter; - - if (!begin) - return; - for (iter = begin; iter < end; iter++) - WARN_ON_ONCE((*iter)->funcs); + WARN_ON_ONCE(tp->funcs); } static int tracepoint_module_coming(struct module *mod) @@ -450,8 +466,9 @@ static void tracepoint_module_going(struct module *mod) * Called the going notifier before checking for * quiescence. */ - tp_module_going_check_quiescent(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); + for_each_tracepoint_range(mod->tracepoints_ptrs, + mod->tracepoints_ptrs + mod->num_tracepoints, + tp_module_going_check_quiescent, NULL); break; }
[PATCH v7 02/10] module: allow symbol exports to be disabled
To allow existing C code to be incorporated into the decompressor or the UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx declarations into nops, and #define it in places where such exports are undesirable. Note that this gets rid of a rather dodgy redefine of linux/export.h's header guard. Cc: m...@codeblueprint.co.uk Cc: keesc...@chromium.org Cc: j...@kernel.org Signed-off-by: Ard Biesheuvel--- arch/x86/boot/compressed/kaslr.c | 5 + drivers/firmware/efi/libstub/Makefile | 3 ++- include/linux/export.h| 9 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8199a6187251..3a2a6d7049e4 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -23,11 +23,8 @@ * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL * which is meaningless and will cause compiling error in some cases. - * So do not include linux/export.h and define EXPORT_SYMBOL(sym) - * as empty. */ -#define _LINUX_EXPORT_H -#define EXPORT_SYMBOL(sym) +#define __DISABLE_EXPORTS #include "misc.h" #include "error.h" diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index adaa4a964f0c..312bd0b64a61 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ - $(call cc-option,-fno-stack-protector) + $(call cc-option,-fno-stack-protector) \ + -D__DISABLE_EXPORTS GCOV_PROFILE := n KASAN_SANITIZE := n diff --git a/include/linux/export.h b/include/linux/export.h index 1a1dfdb2a5c6..6dba2fb08f77 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -83,6 +83,15 @@ extern struct module __this_module; */ #define __EXPORT_SYMBOL(sym, sec) === __KSYM_##sym === +#elif defined(__DISABLE_EXPORTS) + +/* + * Allow symbol exports to be disabled completely so that C code may + * be reused in other execution contexts such as the UEFI stub or the + * decompressor. + */ +#define __EXPORT_SYMBOL(sym, sec) + #elif defined(CONFIG_TRIM_UNUSED_KSYMS) #include -- 2.11.0
[PATCH v7 02/10] module: allow symbol exports to be disabled
To allow existing C code to be incorporated into the decompressor or the UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx declarations into nops, and #define it in places where such exports are undesirable. Note that this gets rid of a rather dodgy redefine of linux/export.h's header guard. Cc: m...@codeblueprint.co.uk Cc: keesc...@chromium.org Cc: j...@kernel.org Signed-off-by: Ard Biesheuvel --- arch/x86/boot/compressed/kaslr.c | 5 + drivers/firmware/efi/libstub/Makefile | 3 ++- include/linux/export.h| 9 + 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 8199a6187251..3a2a6d7049e4 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -23,11 +23,8 @@ * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h. * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL * which is meaningless and will cause compiling error in some cases. - * So do not include linux/export.h and define EXPORT_SYMBOL(sym) - * as empty. */ -#define _LINUX_EXPORT_H -#define EXPORT_SYMBOL(sym) +#define __DISABLE_EXPORTS #include "misc.h" #include "error.h" diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index adaa4a964f0c..312bd0b64a61 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \ -D__NO_FORTIFY \ $(call cc-option,-ffreestanding) \ - $(call cc-option,-fno-stack-protector) + $(call cc-option,-fno-stack-protector) \ + -D__DISABLE_EXPORTS GCOV_PROFILE := n KASAN_SANITIZE := n diff --git a/include/linux/export.h b/include/linux/export.h index 1a1dfdb2a5c6..6dba2fb08f77 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -83,6 +83,15 @@ extern struct module __this_module; */ #define __EXPORT_SYMBOL(sym, sec) === __KSYM_##sym === +#elif defined(__DISABLE_EXPORTS) + +/* + * Allow symbol exports to be disabled completely so that C code may + * be reused in other execution contexts such as the UEFI stub or the + * decompressor. + */ +#define __EXPORT_SYMBOL(sym, sec) + #elif defined(CONFIG_TRIM_UNUSED_KSYMS) #include -- 2.11.0
[PATCH v7 03/10] module: use relative references for __ksymtab entries
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries, each consisting of two 64-bit fields containing absolute references, to the symbol itself and to a char array containing its name, respectively. When we build the same configuration with KASLR enabled, we end up with an additional ~192 KB of relocations in the .init section, i.e., one 24 byte entry for each absolute reference, which all need to be processed at boot time. Given how the struct kernel_symbol that describes each entry is completely local to module.c (except for the references emitted by EXPORT_SYMBOL() itself), we can easily modify it to contain two 32-bit relative references instead. This reduces the size of the __ksymtab section by 50% for all 64-bit architectures, and gets rid of the runtime relocations entirely for architectures implementing KASLR, either via standard PIE linking (arm64) or using custom host tools (x86). Note that the binary search involving __ksymtab contents relies on each section being sorted by symbol name. This is implemented based on the input section names, not the names in the ksymtab entries, so this patch does not interfere with that. Given that the use of place-relative relocations requires support both in the toolchain and in the module loader, we cannot enable this feature for all architectures. So make it dependent on whether CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined. Cc: Arnd BergmannCc: Andrew Morton Cc: Ingo Molnar Cc: Kees Cook Cc: Thomas Garnier Cc: Nicolas Pitre Acked-by: Jessica Yu Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/export.h | 5 --- include/asm-generic/export.h | 12 - include/linux/compiler.h | 10 + include/linux/export.h| 46 +++- kernel/module.c | 33 +++--- 6 files changed, 83 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 5d6a53fd7521..3e8a88dcaa1d 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -9,5 +9,6 @@ generated-y += xen-hypercalls.h generic-y += clkdev.h generic-y += dma-contiguous.h generic-y += early_ioremap.h +generic-y += export.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h deleted file mode 100644 index 2a51d66689c5.. --- a/arch/x86/include/asm/export.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_64BIT -#define KSYM_ALIGN 16 -#endif -#include diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 719db1968d81..97ce606459ae 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -5,12 +5,10 @@ #define KSYM_FUNC(x) x #endif #ifdef CONFIG_64BIT -#define __put .quad #ifndef KSYM_ALIGN #define KSYM_ALIGN 8 #endif #else -#define __put .long #ifndef KSYM_ALIGN #define KSYM_ALIGN 4 #endif @@ -25,6 +23,16 @@ #define KSYM(name) name #endif +.macro __put, val, name +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + .long \val - ., \name - . +#elif defined(CONFIG_64BIT) + .quad \val, \name +#else + .long \val, \name +#endif +.endm + /* * note on .section use: @progbits vs %progbits nastiness doesn't matter, * since we immediately emit into those sections anyway. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 52e611ab9a6c..79db4aa87d75 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -327,4 +327,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s compiletime_assert(__native_word(t),\ "Need native word sized stores/loads for atomicity.") +/* + * Force the compiler to emit 'sym' as a symbol, so that we can reference + * it from inline assembler. Necessary in case 'sym' could be inlined + * otherwise, or eliminated entirely due to lack of references that are + * visible to the compiler. + */ +#define __ADDRESSABLE(sym) \ + static void * const __attribute__((section(".discard"), used)) \ + __PASTE(__addressable_##sym, __LINE__) = (void *) + #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/export.h b/include/linux/export.h index 6dba2fb08f77..4744cf4736b0 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -24,12 +24,6 @@ #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x) #ifndef __ASSEMBLY__ -struct kernel_symbol -{ - unsigned long value; - const char *name; -}; - #ifdef MODULE extern struct module __this_module; #define THIS_MODULE (&__this_module) @@ -60,17 +54,47 @@ extern struct module __this_module; #define __CRC_SYMBOL(sym, sec) #endif +#ifdef
[PATCH v7 03/10] module: use relative references for __ksymtab entries
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab entries, each consisting of two 64-bit fields containing absolute references, to the symbol itself and to a char array containing its name, respectively. When we build the same configuration with KASLR enabled, we end up with an additional ~192 KB of relocations in the .init section, i.e., one 24 byte entry for each absolute reference, which all need to be processed at boot time. Given how the struct kernel_symbol that describes each entry is completely local to module.c (except for the references emitted by EXPORT_SYMBOL() itself), we can easily modify it to contain two 32-bit relative references instead. This reduces the size of the __ksymtab section by 50% for all 64-bit architectures, and gets rid of the runtime relocations entirely for architectures implementing KASLR, either via standard PIE linking (arm64) or using custom host tools (x86). Note that the binary search involving __ksymtab contents relies on each section being sorted by symbol name. This is implemented based on the input section names, not the names in the ksymtab entries, so this patch does not interfere with that. Given that the use of place-relative relocations requires support both in the toolchain and in the module loader, we cannot enable this feature for all architectures. So make it dependent on whether CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined. Cc: Arnd Bergmann Cc: Andrew Morton Cc: Ingo Molnar Cc: Kees Cook Cc: Thomas Garnier Cc: Nicolas Pitre Acked-by: Jessica Yu Signed-off-by: Ard Biesheuvel --- arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/export.h | 5 --- include/asm-generic/export.h | 12 - include/linux/compiler.h | 10 + include/linux/export.h| 46 +++- kernel/module.c | 33 +++--- 6 files changed, 83 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 5d6a53fd7521..3e8a88dcaa1d 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -9,5 +9,6 @@ generated-y += xen-hypercalls.h generic-y += clkdev.h generic-y += dma-contiguous.h generic-y += early_ioremap.h +generic-y += export.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h deleted file mode 100644 index 2a51d66689c5.. --- a/arch/x86/include/asm/export.h +++ /dev/null @@ -1,5 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_64BIT -#define KSYM_ALIGN 16 -#endif -#include diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 719db1968d81..97ce606459ae 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -5,12 +5,10 @@ #define KSYM_FUNC(x) x #endif #ifdef CONFIG_64BIT -#define __put .quad #ifndef KSYM_ALIGN #define KSYM_ALIGN 8 #endif #else -#define __put .long #ifndef KSYM_ALIGN #define KSYM_ALIGN 4 #endif @@ -25,6 +23,16 @@ #define KSYM(name) name #endif +.macro __put, val, name +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + .long \val - ., \name - . +#elif defined(CONFIG_64BIT) + .quad \val, \name +#else + .long \val, \name +#endif +.endm + /* * note on .section use: @progbits vs %progbits nastiness doesn't matter, * since we immediately emit into those sections anyway. diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 52e611ab9a6c..79db4aa87d75 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -327,4 +327,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s compiletime_assert(__native_word(t),\ "Need native word sized stores/loads for atomicity.") +/* + * Force the compiler to emit 'sym' as a symbol, so that we can reference + * it from inline assembler. Necessary in case 'sym' could be inlined + * otherwise, or eliminated entirely due to lack of references that are + * visible to the compiler. + */ +#define __ADDRESSABLE(sym) \ + static void * const __attribute__((section(".discard"), used)) \ + __PASTE(__addressable_##sym, __LINE__) = (void *) + #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/export.h b/include/linux/export.h index 6dba2fb08f77..4744cf4736b0 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -24,12 +24,6 @@ #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x) #ifndef __ASSEMBLY__ -struct kernel_symbol -{ - unsigned long value; - const char *name; -}; - #ifdef MODULE extern struct module __this_module; #define THIS_MODULE (&__this_module) @@ -60,17 +54,47 @@ extern struct module __this_module; #define __CRC_SYMBOL(sym, sec) #endif +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS +#include +/* + * Emit the ksymtab entry as a pair of relative references: this reduces + * the size by half on 64-bit
[PATCH v7 00/10] add support for relative references in special sections
This adds support for emitting special sections such as initcall arrays, PCI fixups and tracepoints as relative references rather than absolute references. This reduces the size by 50% on 64-bit architectures, but more importantly, it removes the need for carrying relocation metadata for these sections in relocatables kernels (e.g., for KASLR) that need to fix up these absolute references at boot time. On arm64, this reduces the vmlinux footprint of such a reference by 8x (8 byte absolute reference + 24 byte RELA entry vs 4 byte relative reference) Patch #3 was sent out before as a single patch. This series supersedes the previous submission. This version makes relative ksymtab entries dependent on the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS rather than trying to infer from kbuild test robot replies for which architectures it should be blacklisted. Patch #1 introduces the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS, and sets it for the main architectures that are expected to benefit the most from this feature, i.e., 64-bit architectures or ones that use runtime relocations. Patches #4 - #6 implement relative references for initcalls, PCI fixups and tracepoints, respectively, all of which produce sections with order ~1000 entries on an arm64 defconfig kernel with tracing enabled. This means we save about 28 KB of vmlinux space for each of these patches. Patches #7 - #10 have been added in v5, and implement relative references in jump tables for arm64 and x86. On arm64, this results in significant space savings (650+ KB on a typical distro kernel). On x86, the savings are not as impressive, but still worthwhile. (Note that these patches do not rely on CONFIG_HAVE_ARCH_PREL32_RELOCATIONS, given that the inline asm that is emitted is already per-arch) For the arm64 kernel, all patches combined reduce the memory footprint of vmlinux by about 1.3 MB (using a config copied from Ubuntu that has KASLR enabled), of which ~1 MB is the size reduction of the RELA section in .init, and the remaining 300 KB is reduction of .text/.data. Branch: git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git relative-special-sections-v7 Changes since v6: - drop S390 from patch #1 introducing HAVE_ARCH_PREL32_RELOCATIONS: kbuild robot threw me some s390 curveballs, and given that s390 does not define CONFIG_RELOCATABLE in the first place, it does not benefit as much from relative references as arm64, x86 and power do - add patch to allow symbol exports to be disabled at compilation unit granularity (#2) - get rid of arm64 vmlinux.lds.S hunk to ensure code generated by __ADDRESSABLE gets discarded from the EFI stub - it is no longer needed after adding #2 (#1) - change _ADDRESSABLE() to emit a data reference, not a code reference - this is another simplification made possible by patch #2 (#3) - add Steven's ack to #6 - split x86 jump_label patch into two (#9, #10) Changes since v5: - add missing jump_label prototypes to s390 jump_label.h (#6) - fix inverted condition in call to jump_entry_is_module_init() (#6) Changes since v4: - add patches to convert x86 and arm64 to use relative references for jump tables (#6 - #8) - rename PCI patch and add Bjorn's ack (#4) - rebase onto v4.15-rc5 Changes since v3: - fix module unload issue in patch #5 reported by Jessica, by reusing the updated routine for_each_tracepoint_range() for the quiescent check at module unload time; this requires this routine to be moved before tracepoint_module_going() in kernel/tracepoint.c - add Jessica's ack to #2 - rebase onto v4.14-rc1 Changes since v2: - Revert my slightly misguided attempt to appease checkpatch, which resulted in needless churn and worse code. This v3 is based on v1 with a few tweaks that were actually reasonable checkpatch warnings: unnecessary braces (as pointed out by Ingo) and other minor whitespace misdemeanors. Changes since v1: - Remove checkpatch errors to the extent feasible: in some cases, this involves moving extern declarations into C files, and switching to struct definitions rather than typedefs. Some errors are impossible to fix: please find the remaining ones after the diffstat. - Used 'int' instead if 'signed int' for the various offset fields: there is no ambiguity between architectures regarding its signedness (unlike 'char') - Refactor the different patches to be more uniform in the way they define the section entry type and accessors in the .h file, and avoid the need to add #ifdefs to the C code. Cc: "H. Peter Anvin"Cc: Ralf Baechle Cc: Arnd Bergmann Cc: Heiko Carstens Cc: Kees Cook Cc: Will Deacon Cc: Michael Ellerman Cc: Thomas Garnier Cc: Thomas Gleixner Cc: "Serge E. Hallyn" Cc: Bjorn Helgaas Cc: Benjamin
[PATCH v7 01/10] arch: enable relative relocations for arm64, power and x86
Before updating certain subsystems to use place relative 32-bit relocations in special sections, to save space and reduce the number of absolute relocations that need to be processed at runtime by relocatable kernels, introduce the Kconfig symbol and define it for some architectures that should be able to support and benefit from it. Cc: Catalin MarinasCc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org Signed-off-by: Ard Biesheuvel --- arch/Kconfig | 10 ++ arch/arm64/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/x86/Kconfig | 1 + 4 files changed, 13 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..dbc036a7bd1b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -959,4 +959,14 @@ config REFCOUNT_FULL against various use-after-free conditions that can be used in security flaw exploits. +config HAVE_ARCH_PREL32_RELOCATIONS + bool + help + May be selected by an architecture if it supports place-relative + 32-bit relocations, both in the toolchain and in the module loader, + in which case relative references can be used in special sections + for PCI fixup, initcalls etc which are only half the size on 64 bit + architectures, and don't require runtime relocation on relocatable + kernels. + source "kernel/gcov/Kconfig" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c9a7e9e1414f..66c7b9ab2a3d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -89,6 +89,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..e172478e2ae7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -177,6 +177,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4fc98c50378..9f2bb853aedb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -115,6 +115,7 @@ config X86 select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE -- 2.11.0
[PATCH v7 00/10] add support for relative references in special sections
This adds support for emitting special sections such as initcall arrays, PCI fixups and tracepoints as relative references rather than absolute references. This reduces the size by 50% on 64-bit architectures, but more importantly, it removes the need for carrying relocation metadata for these sections in relocatables kernels (e.g., for KASLR) that need to fix up these absolute references at boot time. On arm64, this reduces the vmlinux footprint of such a reference by 8x (8 byte absolute reference + 24 byte RELA entry vs 4 byte relative reference) Patch #3 was sent out before as a single patch. This series supersedes the previous submission. This version makes relative ksymtab entries dependent on the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS rather than trying to infer from kbuild test robot replies for which architectures it should be blacklisted. Patch #1 introduces the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS, and sets it for the main architectures that are expected to benefit the most from this feature, i.e., 64-bit architectures or ones that use runtime relocations. Patches #4 - #6 implement relative references for initcalls, PCI fixups and tracepoints, respectively, all of which produce sections with order ~1000 entries on an arm64 defconfig kernel with tracing enabled. This means we save about 28 KB of vmlinux space for each of these patches. Patches #7 - #10 have been added in v5, and implement relative references in jump tables for arm64 and x86. On arm64, this results in significant space savings (650+ KB on a typical distro kernel). On x86, the savings are not as impressive, but still worthwhile. (Note that these patches do not rely on CONFIG_HAVE_ARCH_PREL32_RELOCATIONS, given that the inline asm that is emitted is already per-arch) For the arm64 kernel, all patches combined reduce the memory footprint of vmlinux by about 1.3 MB (using a config copied from Ubuntu that has KASLR enabled), of which ~1 MB is the size reduction of the RELA section in .init, and the remaining 300 KB is reduction of .text/.data. Branch: git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git relative-special-sections-v7 Changes since v6: - drop S390 from patch #1 introducing HAVE_ARCH_PREL32_RELOCATIONS: kbuild robot threw me some s390 curveballs, and given that s390 does not define CONFIG_RELOCATABLE in the first place, it does not benefit as much from relative references as arm64, x86 and power do - add patch to allow symbol exports to be disabled at compilation unit granularity (#2) - get rid of arm64 vmlinux.lds.S hunk to ensure code generated by __ADDRESSABLE gets discarded from the EFI stub - it is no longer needed after adding #2 (#1) - change _ADDRESSABLE() to emit a data reference, not a code reference - this is another simplification made possible by patch #2 (#3) - add Steven's ack to #6 - split x86 jump_label patch into two (#9, #10) Changes since v5: - add missing jump_label prototypes to s390 jump_label.h (#6) - fix inverted condition in call to jump_entry_is_module_init() (#6) Changes since v4: - add patches to convert x86 and arm64 to use relative references for jump tables (#6 - #8) - rename PCI patch and add Bjorn's ack (#4) - rebase onto v4.15-rc5 Changes since v3: - fix module unload issue in patch #5 reported by Jessica, by reusing the updated routine for_each_tracepoint_range() for the quiescent check at module unload time; this requires this routine to be moved before tracepoint_module_going() in kernel/tracepoint.c - add Jessica's ack to #2 - rebase onto v4.14-rc1 Changes since v2: - Revert my slightly misguided attempt to appease checkpatch, which resulted in needless churn and worse code. This v3 is based on v1 with a few tweaks that were actually reasonable checkpatch warnings: unnecessary braces (as pointed out by Ingo) and other minor whitespace misdemeanors. Changes since v1: - Remove checkpatch errors to the extent feasible: in some cases, this involves moving extern declarations into C files, and switching to struct definitions rather than typedefs. Some errors are impossible to fix: please find the remaining ones after the diffstat. - Used 'int' instead if 'signed int' for the various offset fields: there is no ambiguity between architectures regarding its signedness (unlike 'char') - Refactor the different patches to be more uniform in the way they define the section entry type and accessors in the .h file, and avoid the need to add #ifdefs to the C code. Cc: "H. Peter Anvin" Cc: Ralf Baechle Cc: Arnd Bergmann Cc: Heiko Carstens Cc: Kees Cook Cc: Will Deacon Cc: Michael Ellerman Cc: Thomas Garnier Cc: Thomas Gleixner Cc: "Serge E. Hallyn" Cc: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Russell King Cc: Paul Mackerras Cc: Catalin Marinas Cc: "David S. Miller" Cc: Petr Mladek Cc: Ingo Molnar Cc: James Morris Cc: Andrew Morton Cc: Nicolas Pitre Cc: Josh Poimboeuf Cc: Steven Rostedt Cc:
[PATCH v7 01/10] arch: enable relative relocations for arm64, power and x86
Before updating certain subsystems to use place relative 32-bit relocations in special sections, to save space and reduce the number of absolute relocations that need to be processed at runtime by relocatable kernels, introduce the Kconfig symbol and define it for some architectures that should be able to support and benefit from it. Cc: Catalin Marinas Cc: Will Deacon Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: x...@kernel.org Signed-off-by: Ard Biesheuvel --- arch/Kconfig | 10 ++ arch/arm64/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/x86/Kconfig | 1 + 4 files changed, 13 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..dbc036a7bd1b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -959,4 +959,14 @@ config REFCOUNT_FULL against various use-after-free conditions that can be used in security flaw exploits. +config HAVE_ARCH_PREL32_RELOCATIONS + bool + help + May be selected by an architecture if it supports place-relative + 32-bit relocations, both in the toolchain and in the module loader, + in which case relative references can be used in special sections + for PCI fixup, initcalls etc which are only half the size on 64 bit + architectures, and don't require runtime relocation on relocatable + kernels. + source "kernel/gcov/Kconfig" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c9a7e9e1414f..66c7b9ab2a3d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -89,6 +89,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..e172478e2ae7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -177,6 +177,7 @@ config PPC select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d4fc98c50378..9f2bb853aedb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -115,6 +115,7 @@ config X86 select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT + select HAVE_ARCH_PREL32_RELOCATIONS select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE -- 2.11.0
Re: [alsa-devel] [PATCH -next] soundwire: Fix typo in return value check of sdw_read()
On 1/1/18 10:08 PM, Wei Yongjun wrote: Fix the typo, 'status' should be instead of 'status2'. well spotted, thanks! Acked-by: Pierre-Louis BossartFixes: b0a9c37b0178 ("soundwire: Add slave status handling") Signed-off-by: Wei Yongjun --- drivers/soundwire/bus.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 4c34519..266d2b3 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -671,8 +671,8 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status) status2 = sdw_read(slave, SDW_DP0_INT); if (status2 < 0) { dev_err(slave->bus->dev, - "SDW_DP0_INT read failed:%d", status); - return status; + "SDW_DP0_INT read failed:%d", status2); + return status2; } status &= status2; @@ -741,10 +741,10 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave, /* Read DPN interrupt again */ status2 = sdw_read(slave, addr); - if (status < 0) { + if (status2 < 0) { dev_err(slave->bus->dev, - "SDW_DPN_INT read failed:%d", status); - return status; + "SDW_DPN_INT read failed:%d", status2); + return status2; } status &= status2; ___ Alsa-devel mailing list alsa-de...@alsa-project.org http://mailman.alsa-project.org/mailman/listinfo/alsa-devel
Re: [alsa-devel] [PATCH -next] soundwire: Fix typo in return value check of sdw_read()
On 1/1/18 10:08 PM, Wei Yongjun wrote: Fix the typo, 'status' should be instead of 'status2'. well spotted, thanks! Acked-by: Pierre-Louis Bossart Fixes: b0a9c37b0178 ("soundwire: Add slave status handling") Signed-off-by: Wei Yongjun --- drivers/soundwire/bus.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index 4c34519..266d2b3 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -671,8 +671,8 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status) status2 = sdw_read(slave, SDW_DP0_INT); if (status2 < 0) { dev_err(slave->bus->dev, - "SDW_DP0_INT read failed:%d", status); - return status; + "SDW_DP0_INT read failed:%d", status2); + return status2; } status &= status2; @@ -741,10 +741,10 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave, /* Read DPN interrupt again */ status2 = sdw_read(slave, addr); - if (status < 0) { + if (status2 < 0) { dev_err(slave->bus->dev, - "SDW_DPN_INT read failed:%d", status); - return status; + "SDW_DPN_INT read failed:%d", status2); + return status2; } status &= status2; ___ Alsa-devel mailing list alsa-de...@alsa-project.org http://mailman.alsa-project.org/mailman/listinfo/alsa-devel
Re: linux-next: build failure after merge of the clk tree
On 01/02, Stephen Rothwell wrote: > Hi all, > > After merging the clk tree, today's linux-next build (x86_64 allmodconfig) > failed like this: > > ERROR: "clk_regmap_mux_div_ops" [drivers/clk/qcom/apcs-msm8916.ko] undefined! > ERROR: "__mux_div_set_src_div" [drivers/clk/qcom/apcs-msm8916.ko] undefined! > > Caused by commit > > 8a77f61118a2 ("clk: qcom: Add APCS clock controller support") > > I have used the clk tree from next-20171222 for today. > Thanks for the report. I've fixed it up and I'm going to roll more build coverage into my scripts now. -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Re: linux-next: build failure after merge of the clk tree
On 01/02, Stephen Rothwell wrote: > Hi all, > > After merging the clk tree, today's linux-next build (x86_64 allmodconfig) > failed like this: > > ERROR: "clk_regmap_mux_div_ops" [drivers/clk/qcom/apcs-msm8916.ko] undefined! > ERROR: "__mux_div_set_src_div" [drivers/clk/qcom/apcs-msm8916.ko] undefined! > > Caused by commit > > 8a77f61118a2 ("clk: qcom: Add APCS clock controller support") > > I have used the clk tree from next-20171222 for today. > Thanks for the report. I've fixed it up and I'm going to roll more build coverage into my scripts now. -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Re: [Intel-wired-lan] [PATCH] i40e: Delete an error message for a failed memory allocation in i40e_init_interrupt_scheme()
On Mon, 1 Jan 2018 20:43:35 +0100 SF Markus Elfringwrote: > From: Markus Elfring > Date: Mon, 1 Jan 2018 20:38:14 +0100 > > Omit an extra message for a memory allocation failure in this function. > > This issue was detected by using the Coccinelle software. > > Signed-off-by: Markus Elfring Thanks for the patch. Acked-by: Jesse Brandeburg
Re: [Intel-wired-lan] [PATCH] i40e: Delete an error message for a failed memory allocation in i40e_init_interrupt_scheme()
On Mon, 1 Jan 2018 20:43:35 +0100 SF Markus Elfring wrote: > From: Markus Elfring > Date: Mon, 1 Jan 2018 20:38:14 +0100 > > Omit an extra message for a memory allocation failure in this function. > > This issue was detected by using the Coccinelle software. > > Signed-off-by: Markus Elfring Thanks for the patch. Acked-by: Jesse Brandeburg
Re: [alsa-devel] [PATCH 15/27] ALSA: hda - Use timecounter_initialize interface
On 1/2/18 12:21 PM, Richard Cochran wrote: On Tue, Jan 02, 2018 at 11:15:45AM -0600, Pierre-Louis Bossart wrote: I wrote the code for HDaudio and I remember wasting time trying to figure out the gory details of the cycle counter stuff when all I wanted was a conversion from a 24MHz counter to ns values using a 125/3 operation in the right order - as explained in the comments Would using clocks_calc_mult_shift() work for you? In theory yes, but I'd need to re-check what the results would be. I remember applying the 1/3 factor separately to avoid wrap-around after 4 hours [1], but I can't remember the details on the analysis. I can't figure out what the 'maxsec' argument should be either. [1] http://elixir.free-electrons.com/linux/latest/source/sound/hda/hdac_stream.c#L486
Re: [alsa-devel] [PATCH 15/27] ALSA: hda - Use timecounter_initialize interface
On 1/2/18 12:21 PM, Richard Cochran wrote: On Tue, Jan 02, 2018 at 11:15:45AM -0600, Pierre-Louis Bossart wrote: I wrote the code for HDaudio and I remember wasting time trying to figure out the gory details of the cycle counter stuff when all I wanted was a conversion from a 24MHz counter to ns values using a 125/3 operation in the right order - as explained in the comments Would using clocks_calc_mult_shift() work for you? In theory yes, but I'd need to re-check what the results would be. I remember applying the 1/3 factor separately to avoid wrap-around after 4 hours [1], but I can't remember the details on the analysis. I can't figure out what the 'maxsec' argument should be either. [1] http://elixir.free-electrons.com/linux/latest/source/sound/hda/hdac_stream.c#L486
Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint
From: Brendan GreggDate: Tue, 2 Jan 2018 11:46:26 -0800 > If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like > sk->sk_protocol exposed as a tracepoint argument so I can match on > IPPROTO_TCP. Agreed.
Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint
From: Brendan Gregg Date: Tue, 2 Jan 2018 11:46:26 -0800 > If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like > sk->sk_protocol exposed as a tracepoint argument so I can match on > IPPROTO_TCP. Agreed.
Re: [PATCH] NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625
From: SZ Lin (林上智)Date: Fri, 29 Dec 2017 17:02:17 +0800 > This patch adds support for PID 0x9625 of YUGA CLM920-NC5. > > YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation. > > qmicli -d /dev/cdc-wdm0 -p --dms-get-revision > [/dev/cdc-wdm0] Device revision retrieved: > Revision: 'CLM920_NC5-V1 1 [Oct 23 2016 19:00:00]' > > Signed-off-by: SZ Lin (林上智) Applied, thank you.
Re: [PATCH] NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625
From: SZ Lin (林上智) Date: Fri, 29 Dec 2017 17:02:17 +0800 > This patch adds support for PID 0x9625 of YUGA CLM920-NC5. > > YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation. > > qmicli -d /dev/cdc-wdm0 -p --dms-get-revision > [/dev/cdc-wdm0] Device revision retrieved: > Revision: 'CLM920_NC5-V1 1 [Oct 23 2016 19:00:00]' > > Signed-off-by: SZ Lin (林上智) Applied, thank you.
Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint
On Sat, Dec 30, 2017 at 7:06 PM, Yafang Shaowrote: > On Sun, Dec 31, 2017 at 6:33 AM, Brendan Gregg > wrote: >> On Tue, Dec 19, 2017 at 7:12 PM, Yafang Shao wrote: >>> As sk_state is a common field for struct sock, so the state >>> transition tracepoint should not be a TCP specific feature. >>> Currently it traces all AF_INET state transition, so I rename this >>> tracepoint to inet_sock_set_state tracepoint with some minor changes and >>> move it >>> into trace/events/sock.h. >> >> The tcp:tcp_set_state probe is tcp_set_state(), so it's only going to >> fire for TCP sessions. It's not broken, and we could add a >> sctp:sctp_set_state as well. Replacing tcp:tcp_set_state with >> inet_sk_set_state is feeling like we might be baking too much >> implementation detail into the tracepoint API. >> >> If we must have inet_sk_set_state, then must we also delete >> tcp:tcp_set_state? >> > > Hi Brendan, > > The reason we have to make this change could be got from this mail > thread, https://patchwork.kernel.org/patch/10099243/ . > > The original tcp:tcp_set_state probe doesn't traced all TCP state transitions. > There're some state transitions in inet_connection_sock.c and > inet_hashtables.c are missed. > So we have to place this probe into these two files to fix the issue. > But as inet_connection_sock.c and inet_hashtables.c are common files > for all IPv4 protocols, not only for TCP, so it is not proper to place > a tcp_ function in these two files. > That's why we decide to rename tcp:tcp_set_state probe to > sock:inet_sock_set_state. It kinda feels like we are fixing one exposing-implementation problem (the missing state changes, which I'm happy to see fixed), by exposing another (there's no tcp:tcp_set_state because we don't want to put tcp functions in inet*.c files). Anyway... If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like sk->sk_protocol exposed as a tracepoint argument so I can match on IPPROTO_TCP. Otherwise I'll have to keep digging it out of (void *)skaddr. (And if we're adding arguments, maybe consider sk_family as well, to make it easier to see which address arguments to use). Brendan
Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint
On Sat, Dec 30, 2017 at 7:06 PM, Yafang Shao wrote: > On Sun, Dec 31, 2017 at 6:33 AM, Brendan Gregg > wrote: >> On Tue, Dec 19, 2017 at 7:12 PM, Yafang Shao wrote: >>> As sk_state is a common field for struct sock, so the state >>> transition tracepoint should not be a TCP specific feature. >>> Currently it traces all AF_INET state transition, so I rename this >>> tracepoint to inet_sock_set_state tracepoint with some minor changes and >>> move it >>> into trace/events/sock.h. >> >> The tcp:tcp_set_state probe is tcp_set_state(), so it's only going to >> fire for TCP sessions. It's not broken, and we could add a >> sctp:sctp_set_state as well. Replacing tcp:tcp_set_state with >> inet_sk_set_state is feeling like we might be baking too much >> implementation detail into the tracepoint API. >> >> If we must have inet_sk_set_state, then must we also delete >> tcp:tcp_set_state? >> > > Hi Brendan, > > The reason we have to make this change could be got from this mail > thread, https://patchwork.kernel.org/patch/10099243/ . > > The original tcp:tcp_set_state probe doesn't traced all TCP state transitions. > There're some state transitions in inet_connection_sock.c and > inet_hashtables.c are missed. > So we have to place this probe into these two files to fix the issue. > But as inet_connection_sock.c and inet_hashtables.c are common files > for all IPv4 protocols, not only for TCP, so it is not proper to place > a tcp_ function in these two files. > That's why we decide to rename tcp:tcp_set_state probe to > sock:inet_sock_set_state. It kinda feels like we are fixing one exposing-implementation problem (the missing state changes, which I'm happy to see fixed), by exposing another (there's no tcp:tcp_set_state because we don't want to put tcp functions in inet*.c files). Anyway... If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like sk->sk_protocol exposed as a tracepoint argument so I can match on IPPROTO_TCP. Otherwise I'll have to keep digging it out of (void *)skaddr. (And if we're adding arguments, maybe consider sk_family as well, to make it easier to see which address arguments to use). Brendan
Re: pci driver loads right after unload
On Tue, Jan 02, 2018 at 01:00:03PM -0600, Bjorn Helgaas wrote: > [+cc Greg, linux-kernel] > > Hi Max, > > Thanks for the report! > > On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote: > > hi all, > > I encountered a strange phenomena using 2 different pci drivers > > (nvme and mlx5_core) since 4.15-rc1: > > when I try to unload the modules using "modprobe -r" cmd it calls > > the .probe function right after calling the .remove function and the > > module is not realy unloaded. > > I think there is some race condition because when I added a > > msleep(1000) after "pci_unregister_driver(_driver);" (in the > > nvme module testing, it also worked in the mlx5_core), the issue > > seems to dissapear. > > You say "since 4.15-rc1". Does that mean it's a regression? If so, > what's the most recent kernel that does not have this problem? Worst > case, you could bisect to find where it broke. > > I don't see anything obvious in the drivers/pci changes between v4.14 > and v4.15-rc1. Module loading and driver binding is mostly driven by > the driver core and udev. Maybe you could learn something with > "udevadm monitor" or by turning on the some of the debug in > lib/kobject_uevent.c? This should be resolved in 4.15-rc6, there was a regression in -rc1 in this area when dealing with uevents over netlink. Max, can you test -rc6 to verify if this is really fixed or not? thanks, greg k-h
Re: [PATCH] Nokia N9: add support for up/down keys in the dts
Hi, On Tue, Jan 02, 2018 at 01:59:48PM +0100, Pavel Machek wrote: > > This adds support for volume up/down keys in the dts. > > Signed-off-by: Pavel MachekReviewed-by: Sebastian Reichel -- Sebastian > > diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts > index 39e35f8..57a6679 100644 > --- a/arch/arm/boot/dts/omap3-n9.dts > +++ b/arch/arm/boot/dts/omap3-n9.dts > @@ -11,9 +11,10 @@ > /dts-v1/; > > #include "omap3-n950-n9.dtsi" > +#include > > / { > model = "Nokia N9"; > compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3"; > }; > > @@ -72,3 +133,9 @@ > st,max-limit-y = <46>; > st,max-limit-z = <46>; > }; > + > +_keypad { > + linux,keymap = < MATRIX_KEY(6, 8, KEY_VOLUMEUP) > + MATRIX_KEY(7, 8, KEY_VOLUMEDOWN) > + >; > +}; > > > -- > (english) http://www.livejournal.com/~pavelmachek > (cesky, pictures) > http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html signature.asc Description: PGP signature
Re: pci driver loads right after unload
On Tue, Jan 02, 2018 at 01:00:03PM -0600, Bjorn Helgaas wrote: > [+cc Greg, linux-kernel] > > Hi Max, > > Thanks for the report! > > On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote: > > hi all, > > I encountered a strange phenomena using 2 different pci drivers > > (nvme and mlx5_core) since 4.15-rc1: > > when I try to unload the modules using "modprobe -r" cmd it calls > > the .probe function right after calling the .remove function and the > > module is not realy unloaded. > > I think there is some race condition because when I added a > > msleep(1000) after "pci_unregister_driver(_driver);" (in the > > nvme module testing, it also worked in the mlx5_core), the issue > > seems to dissapear. > > You say "since 4.15-rc1". Does that mean it's a regression? If so, > what's the most recent kernel that does not have this problem? Worst > case, you could bisect to find where it broke. > > I don't see anything obvious in the drivers/pci changes between v4.14 > and v4.15-rc1. Module loading and driver binding is mostly driven by > the driver core and udev. Maybe you could learn something with > "udevadm monitor" or by turning on the some of the debug in > lib/kobject_uevent.c? This should be resolved in 4.15-rc6, there was a regression in -rc1 in this area when dealing with uevents over netlink. Max, can you test -rc6 to verify if this is really fixed or not? thanks, greg k-h
Re: [PATCH] Nokia N9: add support for up/down keys in the dts
Hi, On Tue, Jan 02, 2018 at 01:59:48PM +0100, Pavel Machek wrote: > > This adds support for volume up/down keys in the dts. > > Signed-off-by: Pavel Machek Reviewed-by: Sebastian Reichel -- Sebastian > > diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts > index 39e35f8..57a6679 100644 > --- a/arch/arm/boot/dts/omap3-n9.dts > +++ b/arch/arm/boot/dts/omap3-n9.dts > @@ -11,9 +11,10 @@ > /dts-v1/; > > #include "omap3-n950-n9.dtsi" > +#include > > / { > model = "Nokia N9"; > compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3"; > }; > > @@ -72,3 +133,9 @@ > st,max-limit-y = <46>; > st,max-limit-z = <46>; > }; > + > +_keypad { > + linux,keymap = < MATRIX_KEY(6, 8, KEY_VOLUMEUP) > + MATRIX_KEY(7, 8, KEY_VOLUMEDOWN) > + >; > +}; > > > -- > (english) http://www.livejournal.com/~pavelmachek > (cesky, pictures) > http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html signature.asc Description: PGP signature
Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)
Quoting Rodrigo Vivi (2018-01-02 19:21:08) > On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote: > > On Sat, 30 Dec 2017, Jiri Kosina wrote: > > > > > Seems like disabling RC6 on the kernel command line works this around, > > > and > > > I can dock / undock several times in a row with the image always coming > > > up properly on the external display. > > > > > > On the first undock, the WARN_ONCE() below triggers, so I believe each > > > undock leaks memory. > > > > > > [ 38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, > > > pin_global=0 > > > [ 38.755138] WARNING: CPU: 3 PID: 96 at > > > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 > > > [i915] Not a leak, just a warning the shadow pte are stale due to someone pinning a (gfx) vma. > > OK, I am seeing this warning with current Linus' tree (5aa90a845) even > > without any attempt to dock/undock, so it's probably unrelated to external > > outputs and it only by coincidence appeared originally at the same time I > > docked the machine. > > > > So there are two separate issues on this machine with latest kernel > > (neither of them probably being regression): > > > > - I have to disable i915 RC6 at the kernel cmdline, otherwise external > > (dock) display gets output only randomly (seems like always only on > > first dock) > > Joonas, Chris, time to bring rc6_enable back on next-fixes before we > remove this support entirely? No. It's precisely because of this mistake (thinking that rc6 has anything to do with the issue when it's the consequential shotgun disabling of rpm) that the modparam is not coming back. -Chris
Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)
Quoting Rodrigo Vivi (2018-01-02 19:21:08) > On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote: > > On Sat, 30 Dec 2017, Jiri Kosina wrote: > > > > > Seems like disabling RC6 on the kernel command line works this around, > > > and > > > I can dock / undock several times in a row with the image always coming > > > up properly on the external display. > > > > > > On the first undock, the WARN_ONCE() below triggers, so I believe each > > > undock leaks memory. > > > > > > [ 38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, > > > pin_global=0 > > > [ 38.755138] WARNING: CPU: 3 PID: 96 at > > > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 > > > [i915] Not a leak, just a warning the shadow pte are stale due to someone pinning a (gfx) vma. > > OK, I am seeing this warning with current Linus' tree (5aa90a845) even > > without any attempt to dock/undock, so it's probably unrelated to external > > outputs and it only by coincidence appeared originally at the same time I > > docked the machine. > > > > So there are two separate issues on this machine with latest kernel > > (neither of them probably being regression): > > > > - I have to disable i915 RC6 at the kernel cmdline, otherwise external > > (dock) display gets output only randomly (seems like always only on > > first dock) > > Joonas, Chris, time to bring rc6_enable back on next-fixes before we > remove this support entirely? No. It's precisely because of this mistake (thinking that rc6 has anything to do with the issue when it's the consequential shotgun disabling of rpm) that the modparam is not coming back. -Chris
Re: [PATCH 2/2] serial: imx: fix endless loop during suspend
Hi Martin, On Tue, Jan 2, 2018 at 2:15 PM, Martin Kaiserwrote: > Fabio, could you post the output of > > cat /sys/kernel/debug/suspend_stats > > after supend failed, to confirm that we're failing below > device_suspend_noirq()? Here it goes: # cat /sys/kernel/debug/suspend_stats success: 0 fail: 1 failed_freeze: 0 failed_prepare: 0 failed_suspend: 0 failed_suspend_late: 0 failed_suspend_noirq: 1 failed_resume: 0 failed_resume_early: 0 failed_resume_noirq: 0 failures: last_failed_dev: last_failed_errno:-16 0 last_failed_step: suspend_noirq
Re: [PATCH 2/2] serial: imx: fix endless loop during suspend
Hi Martin, On Tue, Jan 2, 2018 at 2:15 PM, Martin Kaiser wrote: > Fabio, could you post the output of > > cat /sys/kernel/debug/suspend_stats > > after supend failed, to confirm that we're failing below > device_suspend_noirq()? Here it goes: # cat /sys/kernel/debug/suspend_stats success: 0 fail: 1 failed_freeze: 0 failed_prepare: 0 failed_suspend: 0 failed_suspend_late: 0 failed_suspend_noirq: 1 failed_resume: 0 failed_resume_early: 0 failed_resume_noirq: 0 failures: last_failed_dev: last_failed_errno:-16 0 last_failed_step: suspend_noirq
Re: [PATCH 0/3] Ktest: add email support
On Tue, 2 Jan 2018 11:08:00 -0800 Tim Tianyang Chenwrote: > Hi Steve, did your mailer find all the patches? I made sure they all > reply to the same mail ID this time. > Yes, sorry due to end of year work, these were put on the back burner. I'll see if I can get to them sometime this week. -- Steve
Re: [PATCH 0/3] Ktest: add email support
On Tue, 2 Jan 2018 11:08:00 -0800 Tim Tianyang Chen wrote: > Hi Steve, did your mailer find all the patches? I made sure they all > reply to the same mail ID this time. > Yes, sorry due to end of year work, these were put on the back burner. I'll see if I can get to them sometime this week. -- Steve
Re: [PATCH net-next v7 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events
From: Masami HiramatsuDate: Fri, 29 Dec 2017 11:45:20 +0900 > This series is v7 of the replacement of jprobe usage with trace > events. This version fixes net/dccp/trace.h to avoid sparse > warning. Since the TP_STORE_ADDR_PORTS macro can be shared > with trace/events/tcp.h, it also introduce a new common header > file and move the definition of that macro. > > Previous version is here; > https://lkml.org/lkml/2017/12/28/7 > > Changes from v6: > [5/6]: Avoid preprocessor directives in tracepoint macro args Series applied, thank you.
Re: [PATCH net-next v7 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events
From: Masami Hiramatsu Date: Fri, 29 Dec 2017 11:45:20 +0900 > This series is v7 of the replacement of jprobe usage with trace > events. This version fixes net/dccp/trace.h to avoid sparse > warning. Since the TP_STORE_ADDR_PORTS macro can be shared > with trace/events/tcp.h, it also introduce a new common header > file and move the definition of that macro. > > Previous version is here; > https://lkml.org/lkml/2017/12/28/7 > > Changes from v6: > [5/6]: Avoid preprocessor directives in tracepoint macro args Series applied, thank you.
Re: [PATCH v4] f2fs: add reserved blocks for root user
This patch allows root to reserve some blocks via mount option. "-o reserve_root=N" means N x 4KB-sized blocks for root only. Signed-off-by: Jaegeuk Kim--- Change log from v3: - fix 0.2% calculation - preserve reserve_root=%u from remount_fs fs/f2fs/f2fs.h | 26 ++ fs/f2fs/super.c | 35 ++- fs/f2fs/sysfs.c | 3 ++- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5f7f42267221..9dde05c62b1c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_PRJQUOTA0x0020 #define F2FS_MOUNT_QUOTA 0x0040 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x0080 +#define F2FS_MOUNT_RESERVE_ROOT0x0100 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -1110,6 +,7 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks;/* configurable reserved blocks */ block_t current_reserved_blocks;/* current reserved blocks */ + block_t root_reserved_blocks; /* root reserved blocks */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi) +{ + /* limit is 0.2% */ + return (sbi->user_block_count << 1) / 1000; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) @@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->total_valid_block_count += (block_t)(*count); avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; + + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + avail_user_block_count -= sbi->root_reserved_blocks; + if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; + if (diff > *count) + diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count = avail_user_block_count; + sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(>stat_lock); percpu_counter_sub(>alloc_valid_block_count, diff); @@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(>stat_lock); - valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->current_reserved_blocks > - sbi->user_block_count)) { + valid_block_count = sbi->total_valid_block_count + + sbi->current_reserved_blocks + 1; + + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + valid_block_count += sbi->root_reserved_blocks; + + if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(>stat_lock); goto enospc; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cb876d905ca5..3c62492b6a0d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -107,6 +107,7 @@ enum { Opt_noextent_cache, Opt_noinline_data, Opt_data_flush, + Opt_reserve_root, Opt_mode, Opt_io_size_bits, Opt_fault_injection, @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = { {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, + {Opt_reserve_root, "reserve_root=%u"}, {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, @@ -488,6 +490,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_data_flush: set_opt(sbi, DATA_FLUSH); break; + case Opt_reserve_root: + if (args->from && match_int(args, )) + return -EINVAL; + if (test_opt(sbi, RESERVE_ROOT)) { + f2fs_msg(sb, KERN_INFO, + "Preserve previous reserve_root=%u", + sbi->root_reserved_blocks); +
Re: [PATCH v4] f2fs: add reserved blocks for root user
This patch allows root to reserve some blocks via mount option. "-o reserve_root=N" means N x 4KB-sized blocks for root only. Signed-off-by: Jaegeuk Kim --- Change log from v3: - fix 0.2% calculation - preserve reserve_root=%u from remount_fs fs/f2fs/f2fs.h | 26 ++ fs/f2fs/super.c | 35 ++- fs/f2fs/sysfs.c | 3 ++- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5f7f42267221..9dde05c62b1c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_PRJQUOTA0x0020 #define F2FS_MOUNT_QUOTA 0x0040 #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x0080 +#define F2FS_MOUNT_RESERVE_ROOT0x0100 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) @@ -1110,6 +,7 @@ struct f2fs_sb_info { block_t last_valid_block_count; /* for recovery */ block_t reserved_blocks;/* configurable reserved blocks */ block_t current_reserved_blocks;/* current reserved blocks */ + block_t root_reserved_blocks; /* root reserved blocks */ unsigned int nquota_files; /* # of quota sysfile */ @@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi) +{ + /* limit is 0.2% */ + return (sbi->user_block_count << 1) / 1000; +} + static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) @@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, sbi->total_valid_block_count += (block_t)(*count); avail_user_block_count = sbi->user_block_count - sbi->current_reserved_blocks; + + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + avail_user_block_count -= sbi->root_reserved_blocks; + if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { diff = sbi->total_valid_block_count - avail_user_block_count; + if (diff > *count) + diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count = avail_user_block_count; + sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(>stat_lock); percpu_counter_sub(>alloc_valid_block_count, diff); @@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, spin_lock(>stat_lock); - valid_block_count = sbi->total_valid_block_count + 1; - if (unlikely(valid_block_count + sbi->current_reserved_blocks > - sbi->user_block_count)) { + valid_block_count = sbi->total_valid_block_count + + sbi->current_reserved_blocks + 1; + + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) + valid_block_count += sbi->root_reserved_blocks; + + if (unlikely(valid_block_count > sbi->user_block_count)) { spin_unlock(>stat_lock); goto enospc; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cb876d905ca5..3c62492b6a0d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -107,6 +107,7 @@ enum { Opt_noextent_cache, Opt_noinline_data, Opt_data_flush, + Opt_reserve_root, Opt_mode, Opt_io_size_bits, Opt_fault_injection, @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = { {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, + {Opt_reserve_root, "reserve_root=%u"}, {Opt_mode, "mode=%s"}, {Opt_io_size_bits, "io_bits=%u"}, {Opt_fault_injection, "fault_injection=%u"}, @@ -488,6 +490,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_data_flush: set_opt(sbi, DATA_FLUSH); break; + case Opt_reserve_root: + if (args->from && match_int(args, )) + return -EINVAL; + if (test_opt(sbi, RESERVE_ROOT)) { + f2fs_msg(sb, KERN_INFO, + "Preserve previous reserve_root=%u", + sbi->root_reserved_blocks); +
Re: [f2fs-dev] [PATCH v3] f2fs: add reserved blocks for root user
On 01/02, Chao Yu wrote: > On 2018/1/1 9:29, Jaegeuk Kim wrote: > > This patch allows root to reserve some blocks via mount option. > > > > "-o reserve_root=N" means N x 4KB-sized blocks for root only. > > > > Signed-off-by: Jaegeuk Kim> > --- > > > > Change log from v2: > > - wrong submission. :P > > > > fs/f2fs/f2fs.h | 26 ++ > > fs/f2fs/super.c | 26 +++--- > > fs/f2fs/sysfs.c | 3 ++- > > 3 files changed, 47 insertions(+), 8 deletions(-) > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index 5f7f42267221..123d875f7293 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX]; > > #define F2FS_MOUNT_PRJQUOTA0x0020 > > #define F2FS_MOUNT_QUOTA 0x0040 > > #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x0080 > > +#define F2FS_MOUNT_RESERVE_ROOT0x0100 > > > > #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= > > ~F2FS_MOUNT_##option) > > #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= > > F2FS_MOUNT_##option) > > @@ -1110,6 +,7 @@ struct f2fs_sb_info { > > block_t last_valid_block_count; /* for recovery */ > > block_t reserved_blocks;/* configurable reserved blocks > > */ > > block_t current_reserved_blocks;/* current reserved blocks */ > > + block_t root_reserved_blocks; /* root reserved blocks */ > > > > unsigned int nquota_files; /* # of quota sysfile */ > > > > @@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int > > ofs) > > return ofs == XATTR_NODE_OFFSET; > > } > > > > +static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi) > > +{ > > + /* limit is 0.2% */ > > Should be 2% according to below calculation? Oh, right. Fixed. > > > + return (sbi->user_block_count << 1) / 100; > > +} > > + > > static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, > > bool); > > static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, > > struct inode *inode, blkcnt_t *count) > > @@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct > > f2fs_sb_info *sbi, > > sbi->total_valid_block_count += (block_t)(*count); > > avail_user_block_count = sbi->user_block_count - > > sbi->current_reserved_blocks; > > + > > + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) > > How about adding uid & gid verification also like ext4? Again, that's another feature which requires a mount option. I think it'd be better to add that, once we have a use-case. > As this is a mount option, in ->remount_fs, we should consider to recover > original reserved block number if we encounter some error during remount. Yup, agreed. Thanks, > > Thanks, > > > + avail_user_block_count -= sbi->root_reserved_blocks; > > + > > if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { > > diff = sbi->total_valid_block_count - avail_user_block_count; > > + if (diff > *count) > > + diff = *count; > > *count -= diff; > > release = diff; > > - sbi->total_valid_block_count = avail_user_block_count; > > + sbi->total_valid_block_count -= diff; > > if (!*count) { > > spin_unlock(>stat_lock); > > percpu_counter_sub(>alloc_valid_block_count, diff); > > @@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct > > f2fs_sb_info *sbi, > > > > spin_lock(>stat_lock); > > > > - valid_block_count = sbi->total_valid_block_count + 1; > > - if (unlikely(valid_block_count + sbi->current_reserved_blocks > > > - sbi->user_block_count)) { > > + valid_block_count = sbi->total_valid_block_count + > > + sbi->current_reserved_blocks + 1; > > + > > + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) > > + valid_block_count += sbi->root_reserved_blocks; > > + > > + if (unlikely(valid_block_count > sbi->user_block_count)) { > > spin_unlock(>stat_lock); > > goto enospc; > > } > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > > index cb876d905ca5..9221b013db98 100644 > > --- a/fs/f2fs/super.c > > +++ b/fs/f2fs/super.c > > @@ -107,6 +107,7 @@ enum { > > Opt_noextent_cache, > > Opt_noinline_data, > > Opt_data_flush, > > + Opt_reserve_root, > > Opt_mode, > > Opt_io_size_bits, > > Opt_fault_injection, > > @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = { > > {Opt_noextent_cache, "noextent_cache"}, > > {Opt_noinline_data, "noinline_data"}, > > {Opt_data_flush, "data_flush"}, > > + {Opt_reserve_root, "reserve_root=%u"}, > > {Opt_mode,
Re: [f2fs-dev] [PATCH v3] f2fs: add reserved blocks for root user
On 01/02, Chao Yu wrote: > On 2018/1/1 9:29, Jaegeuk Kim wrote: > > This patch allows root to reserve some blocks via mount option. > > > > "-o reserve_root=N" means N x 4KB-sized blocks for root only. > > > > Signed-off-by: Jaegeuk Kim > > --- > > > > Change log from v2: > > - wrong submission. :P > > > > fs/f2fs/f2fs.h | 26 ++ > > fs/f2fs/super.c | 26 +++--- > > fs/f2fs/sysfs.c | 3 ++- > > 3 files changed, 47 insertions(+), 8 deletions(-) > > > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index 5f7f42267221..123d875f7293 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX]; > > #define F2FS_MOUNT_PRJQUOTA0x0020 > > #define F2FS_MOUNT_QUOTA 0x0040 > > #define F2FS_MOUNT_INLINE_XATTR_SIZE 0x0080 > > +#define F2FS_MOUNT_RESERVE_ROOT0x0100 > > > > #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= > > ~F2FS_MOUNT_##option) > > #define set_opt(sbi, option) ((sbi)->mount_opt.opt |= > > F2FS_MOUNT_##option) > > @@ -1110,6 +,7 @@ struct f2fs_sb_info { > > block_t last_valid_block_count; /* for recovery */ > > block_t reserved_blocks;/* configurable reserved blocks > > */ > > block_t current_reserved_blocks;/* current reserved blocks */ > > + block_t root_reserved_blocks; /* root reserved blocks */ > > > > unsigned int nquota_files; /* # of quota sysfile */ > > > > @@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int > > ofs) > > return ofs == XATTR_NODE_OFFSET; > > } > > > > +static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi) > > +{ > > + /* limit is 0.2% */ > > Should be 2% according to below calculation? Oh, right. Fixed. > > > + return (sbi->user_block_count << 1) / 100; > > +} > > + > > static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, > > bool); > > static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, > > struct inode *inode, blkcnt_t *count) > > @@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct > > f2fs_sb_info *sbi, > > sbi->total_valid_block_count += (block_t)(*count); > > avail_user_block_count = sbi->user_block_count - > > sbi->current_reserved_blocks; > > + > > + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) > > How about adding uid & gid verification also like ext4? Again, that's another feature which requires a mount option. I think it'd be better to add that, once we have a use-case. > As this is a mount option, in ->remount_fs, we should consider to recover > original reserved block number if we encounter some error during remount. Yup, agreed. Thanks, > > Thanks, > > > + avail_user_block_count -= sbi->root_reserved_blocks; > > + > > if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { > > diff = sbi->total_valid_block_count - avail_user_block_count; > > + if (diff > *count) > > + diff = *count; > > *count -= diff; > > release = diff; > > - sbi->total_valid_block_count = avail_user_block_count; > > + sbi->total_valid_block_count -= diff; > > if (!*count) { > > spin_unlock(>stat_lock); > > percpu_counter_sub(>alloc_valid_block_count, diff); > > @@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct > > f2fs_sb_info *sbi, > > > > spin_lock(>stat_lock); > > > > - valid_block_count = sbi->total_valid_block_count + 1; > > - if (unlikely(valid_block_count + sbi->current_reserved_blocks > > > - sbi->user_block_count)) { > > + valid_block_count = sbi->total_valid_block_count + > > + sbi->current_reserved_blocks + 1; > > + > > + if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE))) > > + valid_block_count += sbi->root_reserved_blocks; > > + > > + if (unlikely(valid_block_count > sbi->user_block_count)) { > > spin_unlock(>stat_lock); > > goto enospc; > > } > > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c > > index cb876d905ca5..9221b013db98 100644 > > --- a/fs/f2fs/super.c > > +++ b/fs/f2fs/super.c > > @@ -107,6 +107,7 @@ enum { > > Opt_noextent_cache, > > Opt_noinline_data, > > Opt_data_flush, > > + Opt_reserve_root, > > Opt_mode, > > Opt_io_size_bits, > > Opt_fault_injection, > > @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = { > > {Opt_noextent_cache, "noextent_cache"}, > > {Opt_noinline_data, "noinline_data"}, > > {Opt_data_flush, "data_flush"}, > > + {Opt_reserve_root, "reserve_root=%u"}, > > {Opt_mode, "mode=%s"}, > >
Re: [Intel-gfx] [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read
Quoting Rodrigo Vivi (2018-01-02 19:12:18) > On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote: > > + edid = drm_get_edid(connector, i2c); > > + > > + if (!edid && !intel_gmbus_is_forced_bit(i2c)) { > > + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO > > bit-banging\n"); > > + intel_gmbus_force_bit(i2c, true); > > + edid = drm_get_edid(connector, i2c); > > + intel_gmbus_force_bit(i2c, false); > > + } > > Approach seems fine for this case. > I just wonder what would be the risks of forcing this bit and edid read when > nothing is present on the other end? Should be no more risky than using GMBUS as the bit-banging is the underlying HW protocol; it should just be adding an extra delay to the disconnected probe. Offset against the chance that it fixes detection of borderline devices. I would say that given the explanation above, the question is why not apply it universally? (Bonus points for including the explanation as comments.) -Chris
Re: [Intel-gfx] [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read
Quoting Rodrigo Vivi (2018-01-02 19:12:18) > On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote: > > + edid = drm_get_edid(connector, i2c); > > + > > + if (!edid && !intel_gmbus_is_forced_bit(i2c)) { > > + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO > > bit-banging\n"); > > + intel_gmbus_force_bit(i2c, true); > > + edid = drm_get_edid(connector, i2c); > > + intel_gmbus_force_bit(i2c, false); > > + } > > Approach seems fine for this case. > I just wonder what would be the risks of forcing this bit and edid read when > nothing is present on the other end? Should be no more risky than using GMBUS as the bit-banging is the underlying HW protocol; it should just be adding an extra delay to the disconnected probe. Offset against the chance that it fixes detection of borderline devices. I would say that given the explanation above, the question is why not apply it universally? (Bonus points for including the explanation as comments.) -Chris
Re: [PATCH v5] x86/microcode/intel: Blacklist the specific BDW-EP for late loading
On Mon, Jan 01, 2018 at 11:10:56PM +0100, Borislav Petkov wrote: > On Mon, Jan 01, 2018 at 10:04:47AM +0800, Jia Zhang wrote: > > Ok, I went and massaged your version, here's what I committed: > > Signed-off-by: Jia Zhang> Acked-by: Tony Luck > Cc: x86-ml > Link: > http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue...@alibaba-inc.com > [ Heavily massage commit message and pr_* statements. ] > Signed-off-by: Borislav Petkov Should there be a "Cc: stable ..." in there? The original patch that disables for all broadwell seems to be geting applied to a bunch of older trees, I think we want this one to chase after it. Perhaps a Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79") too? -Tony
Re: [PATCH v5] x86/microcode/intel: Blacklist the specific BDW-EP for late loading
On Mon, Jan 01, 2018 at 11:10:56PM +0100, Borislav Petkov wrote: > On Mon, Jan 01, 2018 at 10:04:47AM +0800, Jia Zhang wrote: > > Ok, I went and massaged your version, here's what I committed: > > Signed-off-by: Jia Zhang > Acked-by: Tony Luck > Cc: x86-ml > Link: > http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue...@alibaba-inc.com > [ Heavily massage commit message and pr_* statements. ] > Signed-off-by: Borislav Petkov Should there be a "Cc: stable ..." in there? The original patch that disables for all broadwell seems to be geting applied to a bunch of older trees, I think we want this one to chase after it. Perhaps a Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79") too? -Tony
Re: [PATCH v2 0/6] wl1251: Fix MAC address for Nokia N900
On Friday 10 November 2017 00:38:22 Pali Rohár wrote: > This patch series fix processing MAC address for wl1251 chip found in Nokia > N900. > > Changes since v1: > * Added Acked-by for Pavel Machek > * Fixed grammar > * Magic numbers for NVS offsets are replaced by defines > * Check for validity of mac address NVS data is moved into function > * Changed order of patches as Pavel requested > > Pali Rohár (6): > wl1251: Update wl->nvs_len after wl->nvs is valid > wl1251: Generate random MAC address only if driver does not have > valid > wl1251: Parse and use MAC address from supplied NVS data > wl1251: Set generated MAC address back to NVS data > firmware: Add request_firmware_prefer_user() function > wl1251: Use request_firmware_prefer_user() for loading NVS > calibration data > > drivers/base/firmware_class.c | 45 +- > drivers/net/wireless/ti/wl1251/Kconfig |1 + > drivers/net/wireless/ti/wl1251/main.c | 104 > ++-- > include/linux/firmware.h |9 +++ > 4 files changed, 138 insertions(+), 21 deletions(-) Hi! Are there any comments for first 4 patches? If not, could they be accepted and merged? -- Pali Rohár pali.ro...@gmail.com
Re: [PATCH v2 0/6] wl1251: Fix MAC address for Nokia N900
On Friday 10 November 2017 00:38:22 Pali Rohár wrote: > This patch series fix processing MAC address for wl1251 chip found in Nokia > N900. > > Changes since v1: > * Added Acked-by for Pavel Machek > * Fixed grammar > * Magic numbers for NVS offsets are replaced by defines > * Check for validity of mac address NVS data is moved into function > * Changed order of patches as Pavel requested > > Pali Rohár (6): > wl1251: Update wl->nvs_len after wl->nvs is valid > wl1251: Generate random MAC address only if driver does not have > valid > wl1251: Parse and use MAC address from supplied NVS data > wl1251: Set generated MAC address back to NVS data > firmware: Add request_firmware_prefer_user() function > wl1251: Use request_firmware_prefer_user() for loading NVS > calibration data > > drivers/base/firmware_class.c | 45 +- > drivers/net/wireless/ti/wl1251/Kconfig |1 + > drivers/net/wireless/ti/wl1251/main.c | 104 > ++-- > include/linux/firmware.h |9 +++ > 4 files changed, 138 insertions(+), 21 deletions(-) Hi! Are there any comments for first 4 patches? If not, could they be accepted and merged? -- Pali Rohár pali.ro...@gmail.com
Re: [PATCH] clk: Fix debugfs_create_*() usage
On 01/02, Geert Uytterhoeven wrote: > When exposing data access through debugfs, the correct > debugfs_create_*() functions must be used, depending on data type. > > Remove all casts from data pointers passed to debugfs_create_*() > functions, as such casts prevent the compiler from flagging bugs. > > clk_core.rate, .accuracy, and .flags are "unsigned long", hence casting > to "u32 *" exposed the wrong halves on big-endian 64-bit systems. > > Fix .rate and .accuracy, by using debugfs_create_ulong() instead. > > Fix .flags by changing the field to "unsigned int", as a change to > debugfs_create_x64() on 64-bit systems would change the user-visible > formatting in debugfs. > Note that __clk_get_flags() and clk_hw_get_flags() are left unchanged > and still return "unsigned long", to avoid having to change all their > users. Likewise, of_clk_detect_critical() still takes "unsigned long", > but the comment is updated as it is never passed a real pointer to > clk_core.flags. > > Signed-off-by: Geert Uytterhoeven> --- > Looks like none of the 64-bit architectures support common clock yet? arm64 does. > --- > drivers/clk/clk.c | 24 > 1 file changed, 12 insertions(+), 12 deletions(-) > > diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c > index 5ec580914089510a..b23e0249f0e3c634 100644 > --- a/drivers/clk/clk.c > +++ b/drivers/clk/clk.c > @@ -58,7 +58,7 @@ struct clk_core { > unsigned long new_rate; > struct clk_core *new_parent; > struct clk_core *new_child; > - unsigned long flags; > + unsigned intflags; This doesn't look good. > boolorphan; > unsigned intenable_count; > unsigned intprepare_count; > @@ -2600,43 +2600,43 @@ static int clk_debug_create_one(struct clk_core > *core, struct dentry *pdentry) > > core->dentry = d; > > - d = debugfs_create_u32("clk_rate", S_IRUGO, core->dentry, > - (u32 *)>rate); > + d = debugfs_create_ulong("clk_rate", S_IRUGO, core->dentry, > + >rate); As you're changing these lines, can you also change S_IRUGO to the octal values. That's the preferred style now. > if (!d) > goto err_out; > > - d = debugfs_create_u32("clk_accuracy", S_IRUGO, core->dentry, > - (u32 *)>accuracy); > + d = debugfs_create_ulong("clk_accuracy", S_IRUGO, core->dentry, > + >accuracy); > if (!d) > goto err_out; > > d = debugfs_create_u32("clk_phase", S_IRUGO, core->dentry, > - (u32 *)>phase); > +>phase); > if (!d) > goto err_out; > > d = debugfs_create_x32("clk_flags", S_IRUGO, core->dentry, > - (u32 *)>flags); > +>flags); Maybe we need a new debugfs API like debugfs_create_ulong_hex() or something that prints out an unsigned long as a hex value? Probably we should change it to pretty print the values and what they correspond to, with words, because that's the least confusing thing to do with regards to endianness. So the clk_flags file would have something like CLK_SET_RATE_PARENT CLK_SET_RATE_GATE if those flags are set. We don't care about ABI here either. This is debugfs. > @@ -3927,7 +3927,7 @@ static int parent_ready(struct device_node *np) > * of_clk_detect_critical() - set CLK_IS_CRITICAL flag from Device Tree > * @np: Device node pointer associated with clock provider > * @index: clock index > - * @flags: pointer to clk_core->flags > + * @flags: pointer to core clock flags Please split this off into another patch. -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Re: [PATCH] clk: Fix debugfs_create_*() usage
On 01/02, Geert Uytterhoeven wrote: > When exposing data access through debugfs, the correct > debugfs_create_*() functions must be used, depending on data type. > > Remove all casts from data pointers passed to debugfs_create_*() > functions, as such casts prevent the compiler from flagging bugs. > > clk_core.rate, .accuracy, and .flags are "unsigned long", hence casting > to "u32 *" exposed the wrong halves on big-endian 64-bit systems. > > Fix .rate and .accuracy, by using debugfs_create_ulong() instead. > > Fix .flags by changing the field to "unsigned int", as a change to > debugfs_create_x64() on 64-bit systems would change the user-visible > formatting in debugfs. > Note that __clk_get_flags() and clk_hw_get_flags() are left unchanged > and still return "unsigned long", to avoid having to change all their > users. Likewise, of_clk_detect_critical() still takes "unsigned long", > but the comment is updated as it is never passed a real pointer to > clk_core.flags. > > Signed-off-by: Geert Uytterhoeven > --- > Looks like none of the 64-bit architectures support common clock yet? arm64 does. > --- > drivers/clk/clk.c | 24 > 1 file changed, 12 insertions(+), 12 deletions(-) > > diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c > index 5ec580914089510a..b23e0249f0e3c634 100644 > --- a/drivers/clk/clk.c > +++ b/drivers/clk/clk.c > @@ -58,7 +58,7 @@ struct clk_core { > unsigned long new_rate; > struct clk_core *new_parent; > struct clk_core *new_child; > - unsigned long flags; > + unsigned intflags; This doesn't look good. > boolorphan; > unsigned intenable_count; > unsigned intprepare_count; > @@ -2600,43 +2600,43 @@ static int clk_debug_create_one(struct clk_core > *core, struct dentry *pdentry) > > core->dentry = d; > > - d = debugfs_create_u32("clk_rate", S_IRUGO, core->dentry, > - (u32 *)>rate); > + d = debugfs_create_ulong("clk_rate", S_IRUGO, core->dentry, > + >rate); As you're changing these lines, can you also change S_IRUGO to the octal values. That's the preferred style now. > if (!d) > goto err_out; > > - d = debugfs_create_u32("clk_accuracy", S_IRUGO, core->dentry, > - (u32 *)>accuracy); > + d = debugfs_create_ulong("clk_accuracy", S_IRUGO, core->dentry, > + >accuracy); > if (!d) > goto err_out; > > d = debugfs_create_u32("clk_phase", S_IRUGO, core->dentry, > - (u32 *)>phase); > +>phase); > if (!d) > goto err_out; > > d = debugfs_create_x32("clk_flags", S_IRUGO, core->dentry, > - (u32 *)>flags); > +>flags); Maybe we need a new debugfs API like debugfs_create_ulong_hex() or something that prints out an unsigned long as a hex value? Probably we should change it to pretty print the values and what they correspond to, with words, because that's the least confusing thing to do with regards to endianness. So the clk_flags file would have something like CLK_SET_RATE_PARENT CLK_SET_RATE_GATE if those flags are set. We don't care about ABI here either. This is debugfs. > @@ -3927,7 +3927,7 @@ static int parent_ready(struct device_node *np) > * of_clk_detect_critical() - set CLK_IS_CRITICAL flag from Device Tree > * @np: Device node pointer associated with clock provider > * @index: clock index > - * @flags: pointer to clk_core->flags > + * @flags: pointer to core clock flags Please split this off into another patch. -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)
On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote: > On Sat, 30 Dec 2017, Jiri Kosina wrote: > > > Seems like disabling RC6 on the kernel command line works this around, and > > I can dock / undock several times in a row with the image always coming > > up properly on the external display. > > > > On the first undock, the WARN_ONCE() below triggers, so I believe each > > undock leaks memory. > > > > [ 38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, > > pin_global=0 > > [ 38.755138] WARNING: CPU: 3 PID: 96 at > > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 > > [i915] > > OK, I am seeing this warning with current Linus' tree (5aa90a845) even > without any attempt to dock/undock, so it's probably unrelated to external > outputs and it only by coincidence appeared originally at the same time I > docked the machine. > > So there are two separate issues on this machine with latest kernel > (neither of them probably being regression): > > - I have to disable i915 RC6 at the kernel cmdline, otherwise external > (dock) display gets output only randomly (seems like always only on > first dock) Joonas, Chris, time to bring rc6_enable back on next-fixes before we remove this support entirely? > > - the warning, which triggers at not really deterministic time after boot, > but usually rather quickly Jiri, could you please report these issues separately on bugs.freedesktop.org? Are them regressions? Possible bisect? Please attach the dmesg booting with drm.debug=0x1e > > -- > Jiri Kosina > SUSE Labs > > ___ > Intel-gfx mailing list > intel-...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)
On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote: > On Sat, 30 Dec 2017, Jiri Kosina wrote: > > > Seems like disabling RC6 on the kernel command line works this around, and > > I can dock / undock several times in a row with the image always coming > > up properly on the external display. > > > > On the first undock, the WARN_ONCE() below triggers, so I believe each > > undock leaks memory. > > > > [ 38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, > > pin_global=0 > > [ 38.755138] WARNING: CPU: 3 PID: 96 at > > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 > > [i915] > > OK, I am seeing this warning with current Linus' tree (5aa90a845) even > without any attempt to dock/undock, so it's probably unrelated to external > outputs and it only by coincidence appeared originally at the same time I > docked the machine. > > So there are two separate issues on this machine with latest kernel > (neither of them probably being regression): > > - I have to disable i915 RC6 at the kernel cmdline, otherwise external > (dock) display gets output only randomly (seems like always only on > first dock) Joonas, Chris, time to bring rc6_enable back on next-fixes before we remove this support entirely? > > - the warning, which triggers at not really deterministic time after boot, > but usually rather quickly Jiri, could you please report these issues separately on bugs.freedesktop.org? Are them regressions? Possible bisect? Please attach the dmesg booting with drm.debug=0x1e > > -- > Jiri Kosina > SUSE Labs > > ___ > Intel-gfx mailing list > intel-...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [PATCH 2/3] dt-bindings: mtd: atmel-quadspi: add an optional property 'dmacap,memcpy'
On Tue, 2018-01-02 at 11:22 +0100, Ludovic Desroches wrote: > On Wed, Dec 27, 2017 at 10:40:00PM +0100, Cyrille Pitchen wrote: > > > Or maybe no change at all is required at the at_xdmac.c driver side: we > > just don't care about the provided flags in the "dmas" property, especially > > the "peripheral id". They would be ignored anyway when the atmel-quadspi.c > > driver later calls dmaengine_prep_dma_memcpy(). So I could simply set the > > dma cells to 0 in the device-tree? > > > > Ludovic, what do you think about that ? > > It may work but I won't do this. Usually, channels requested through the xlate > function have usually their capaiblities set to DMA_SLAVE and not DMA_MEMCPY. > In the at_xdmac case, it won't be an issue but if you have a controller > which has channels which can support only mem-to-mem or peripheral, it > won't work. Maybe one could create an "AT91_XDMAC_DT_" macro to indicate a memcpy channel. There are still unused bits for another flag. It also looks like at_xdma uses peripheral id 0x3f for memcpy transfers (will that work with memcpy DMA on multiple channels at the same time?). So perhaps perid 0x3f could be the indication of wanting a memcpy channel, rather than another flag bit. But however it's done, one writes: dmas = < AT91_XDMAC_DT_MEMCPY>; dma-names = "rx-tx"; I think one could have the quadspi driver automatically fill in the dma cell in the dma specifier if it is not present in the device tree. So one could write "dmas = <>" and the driver adds the AT91_XDMAC_DT_MEMCPY cell before xlating. I'm not sure if that's a good idea or not.
Re: [PATCH 2/3] dt-bindings: mtd: atmel-quadspi: add an optional property 'dmacap,memcpy'
On Tue, 2018-01-02 at 11:22 +0100, Ludovic Desroches wrote: > On Wed, Dec 27, 2017 at 10:40:00PM +0100, Cyrille Pitchen wrote: > > > Or maybe no change at all is required at the at_xdmac.c driver side: we > > just don't care about the provided flags in the "dmas" property, especially > > the "peripheral id". They would be ignored anyway when the atmel-quadspi.c > > driver later calls dmaengine_prep_dma_memcpy(). So I could simply set the > > dma cells to 0 in the device-tree? > > > > Ludovic, what do you think about that ? > > It may work but I won't do this. Usually, channels requested through the xlate > function have usually their capaiblities set to DMA_SLAVE and not DMA_MEMCPY. > In the at_xdmac case, it won't be an issue but if you have a controller > which has channels which can support only mem-to-mem or peripheral, it > won't work. Maybe one could create an "AT91_XDMAC_DT_" macro to indicate a memcpy channel. There are still unused bits for another flag. It also looks like at_xdma uses peripheral id 0x3f for memcpy transfers (will that work with memcpy DMA on multiple channels at the same time?). So perhaps perid 0x3f could be the indication of wanting a memcpy channel, rather than another flag bit. But however it's done, one writes: dmas = < AT91_XDMAC_DT_MEMCPY>; dma-names = "rx-tx"; I think one could have the quadspi driver automatically fill in the dma cell in the dma specifier if it is not present in the device tree. So one could write "dmas = <>" and the driver adds the AT91_XDMAC_DT_MEMCPY cell before xlating. I'm not sure if that's a good idea or not.
Re: [PATCH v2 1/9] PCI: Regroup all PCI related entries into drivers/pci/Makefile
On Fri, Dec 29, 2017 at 09:21:56PM +0100, Cyrille Pitchen wrote: > Hi Bjorn, > > Le 28/12/2017 à 23:47, Bjorn Helgaas a écrit : > > On Mon, Dec 18, 2017 at 07:16:01PM +0100, Cyrille Pitchen wrote: > >> This patch cleans drivers/Makefile up by moving the pci/endpoint and > >> pci/dwc entries from drivers/Makefile into drivers/pci/Makefile. > > > > Thanks a lot for doing this! > > > > s/This patch cleans/Clean up/ > > > > updated for the next series > > > Speaking of cleanup, this Makefile has useless comments and blank > > lines. Maybe you could add a new patch to remove them and reorder it > > into a sensible order, with the Intel MID special case at the end and > > the host/dwc/cadence stuff together? > > > > OK, I'm working on it. So right now I'm trying to sort entries by > alphabetical order but the first test has just failed: kernel oops > when calling pcied_init(). Sorting isn't a big deal. The blank lines and useless comments do make it hard to scan, but if we can't sort them, I'm fine with that. But I *am* curious about the failure you observed. That sounds like we might have a dependency bug there and I'd like to fix that. I don't like to rely on link ordering because it's invisible in the source code. Can you post the reordering patch you used so I can investigate it? > I guess there are more hidden dependencies than expected, solved by the > link order. This cleanup might bit risky after all, especially since I > won't be able to test all combinations or with all possible hardwares. Bjorn
Re: [PATCH v2 1/9] PCI: Regroup all PCI related entries into drivers/pci/Makefile
On Fri, Dec 29, 2017 at 09:21:56PM +0100, Cyrille Pitchen wrote: > Hi Bjorn, > > Le 28/12/2017 à 23:47, Bjorn Helgaas a écrit : > > On Mon, Dec 18, 2017 at 07:16:01PM +0100, Cyrille Pitchen wrote: > >> This patch cleans drivers/Makefile up by moving the pci/endpoint and > >> pci/dwc entries from drivers/Makefile into drivers/pci/Makefile. > > > > Thanks a lot for doing this! > > > > s/This patch cleans/Clean up/ > > > > updated for the next series > > > Speaking of cleanup, this Makefile has useless comments and blank > > lines. Maybe you could add a new patch to remove them and reorder it > > into a sensible order, with the Intel MID special case at the end and > > the host/dwc/cadence stuff together? > > > > OK, I'm working on it. So right now I'm trying to sort entries by > alphabetical order but the first test has just failed: kernel oops > when calling pcied_init(). Sorting isn't a big deal. The blank lines and useless comments do make it hard to scan, but if we can't sort them, I'm fine with that. But I *am* curious about the failure you observed. That sounds like we might have a dependency bug there and I'd like to fix that. I don't like to rely on link ordering because it's invisible in the source code. Can you post the reordering patch you used so I can investigate it? > I guess there are more hidden dependencies than expected, solved by the > link order. This cleanup might bit risky after all, especially since I > won't be able to test all combinations or with all possible hardwares. Bjorn
Re: [PATCH] PM / runtime: Rework pm_runtime_force_suspend/resume()
On Tuesday, January 2, 2018 2:04:04 PM CET Lukas Wunner wrote: > On Tue, Jan 02, 2018 at 12:02:18PM +0100, Rafael J. Wysocki wrote: > > On Tue, Jan 2, 2018 at 11:51 AM, Lukas Wunnerwrote: > > > On Tue, Jan 02, 2018 at 01:56:28AM +0100, Rafael J. Wysocki wrote: > > >> + if (atomic_read(>power.usage_count) <= 1 && > > >> + atomic_read(>power.child_count) == 0) > > >> + pm_runtime_set_suspended(dev); > > >> > > >> - pm_runtime_set_suspended(dev); > > > > > > The ->runtime_suspend callback *has* been executed at this point. > > > If the status is only updated conditionally, it may not reflect > > > the device's actual power state correctly. That doesn't seem to > > > be a good idea. > > > > It doesn't matter, because this is done with runtime PM disabled, isn't it? > > It might not make a difference for the use case I have in mind, but > pm_runtime_status_suspended() will return an incorrect result and is > called from 47 files in 4.15-rc6 according to lxr.free-electrons.com. Generally, the runtime PM status is only meaningful for devices with runtime PM enabled. There is an exception, which is during system suspend/resume, when runtime PM is automatically disabled by the core, but that only under certain assumptions. Basically, you have to assume that no one else will mess up with the device between the times you call pm_runtime_status_suspended() to check its runtime PM status (or between the first time you do that and the last time runtime PM has been enabled for the device). This patch doesn't change the situation in that respect. > > > The kerneldoc says: > > > > > > Typically this function may be invoked from a system suspend callback > > > to make sure the device is put into low power state. > > > > > > That portion is not modified by your patch. > > > > > > "Typically" implies that it's legal to call pm_runtime_force_suspend() in > > > *other* contexts than as a ->suspend hook. > > > > It should only be used during system suspend anyway, however. > > Then the kerneldoc is wrong. It isn't wrong. It may be incomplete, but the information in it is correct. And making it more complete is not part of this patch IMO. > > >> One addition that would be really helpful: pm_runtime_force_suspend() > > >> should also force-suspend all children and consumers of the given > > >> device. Likewise, those should be resumed on pm_runtime_force_resume(). > > >> Then I could just add a device link from the audio PCI device on the GPU > > >> to the graphics PCI device and just call pm_runtime_force_*() on the > > >> graphics device (supplier) to magically power them both off and on. > > > > > > Actually, the assumption is that pm_runtime_force_suspend() must be > > > called for the children before it is called for the parent even > > > without my patch, so it is just not going to work this way. > > > > Moreover, what if those devices have nonzero usage counters? There > > may be other reasons for that than just dependencies, like for example > > user space might have written "on" to their "control" files in sysfs. > > In that case pm_runtime_force_suspend() should return a negative errno. In which case it wouldn't be suitable for the system-wide PM callback role. > I envision amending control_store() so that "off" can be written to the > "control" file, allowing userspace to invoke pm_runtime_force_suspend() > to force certain devices into runtime suspend. But it isn't a good tool for the purpose which I'm trying to tell you. You need something else. > The user would get back > an error if the call failed for some reason (such as an active child or > consumer of the to be force-suspended device). That would be a clean > replacement for the ON/OFF options we currently have for the > vga_switcheroo debugfs control file. Well, it looks like you are looking for an interface to invoke pm_runtime_suspend() for the device as it behaves exactly the way you want. :-) Thanks, Rafael
Re: [PATCH] PM / runtime: Rework pm_runtime_force_suspend/resume()
On Tuesday, January 2, 2018 2:04:04 PM CET Lukas Wunner wrote: > On Tue, Jan 02, 2018 at 12:02:18PM +0100, Rafael J. Wysocki wrote: > > On Tue, Jan 2, 2018 at 11:51 AM, Lukas Wunner wrote: > > > On Tue, Jan 02, 2018 at 01:56:28AM +0100, Rafael J. Wysocki wrote: > > >> + if (atomic_read(>power.usage_count) <= 1 && > > >> + atomic_read(>power.child_count) == 0) > > >> + pm_runtime_set_suspended(dev); > > >> > > >> - pm_runtime_set_suspended(dev); > > > > > > The ->runtime_suspend callback *has* been executed at this point. > > > If the status is only updated conditionally, it may not reflect > > > the device's actual power state correctly. That doesn't seem to > > > be a good idea. > > > > It doesn't matter, because this is done with runtime PM disabled, isn't it? > > It might not make a difference for the use case I have in mind, but > pm_runtime_status_suspended() will return an incorrect result and is > called from 47 files in 4.15-rc6 according to lxr.free-electrons.com. Generally, the runtime PM status is only meaningful for devices with runtime PM enabled. There is an exception, which is during system suspend/resume, when runtime PM is automatically disabled by the core, but that only under certain assumptions. Basically, you have to assume that no one else will mess up with the device between the times you call pm_runtime_status_suspended() to check its runtime PM status (or between the first time you do that and the last time runtime PM has been enabled for the device). This patch doesn't change the situation in that respect. > > > The kerneldoc says: > > > > > > Typically this function may be invoked from a system suspend callback > > > to make sure the device is put into low power state. > > > > > > That portion is not modified by your patch. > > > > > > "Typically" implies that it's legal to call pm_runtime_force_suspend() in > > > *other* contexts than as a ->suspend hook. > > > > It should only be used during system suspend anyway, however. > > Then the kerneldoc is wrong. It isn't wrong. It may be incomplete, but the information in it is correct. And making it more complete is not part of this patch IMO. > > >> One addition that would be really helpful: pm_runtime_force_suspend() > > >> should also force-suspend all children and consumers of the given > > >> device. Likewise, those should be resumed on pm_runtime_force_resume(). > > >> Then I could just add a device link from the audio PCI device on the GPU > > >> to the graphics PCI device and just call pm_runtime_force_*() on the > > >> graphics device (supplier) to magically power them both off and on. > > > > > > Actually, the assumption is that pm_runtime_force_suspend() must be > > > called for the children before it is called for the parent even > > > without my patch, so it is just not going to work this way. > > > > Moreover, what if those devices have nonzero usage counters? There > > may be other reasons for that than just dependencies, like for example > > user space might have written "on" to their "control" files in sysfs. > > In that case pm_runtime_force_suspend() should return a negative errno. In which case it wouldn't be suitable for the system-wide PM callback role. > I envision amending control_store() so that "off" can be written to the > "control" file, allowing userspace to invoke pm_runtime_force_suspend() > to force certain devices into runtime suspend. But it isn't a good tool for the purpose which I'm trying to tell you. You need something else. > The user would get back > an error if the call failed for some reason (such as an active child or > consumer of the to be force-suspended device). That would be a clean > replacement for the ON/OFF options we currently have for the > vga_switcheroo debugfs control file. Well, it looks like you are looking for an interface to invoke pm_runtime_suspend() for the device as it behaves exactly the way you want. :-) Thanks, Rafael
Re: [PATCH v2 4/6] clk: ingenic: Add JZ47xx TCU clocks driver
On 01/01, Paul Cercueil wrote: > diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c > new file mode 100644 > index ..36afe3f02f91 > --- /dev/null > +++ b/drivers/clk/ingenic/tcu.c > @@ -0,0 +1,336 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Ingenic JZ47xx SoC TCU clocks driver > + * Copyright (C) 2018 Paul Cercueil> + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include Used? > + > +#include > + > +enum ingenic_version { > + ID_JZ4740, > + ID_JZ4770, > + ID_JZ4780, > +}; > + > +struct ingenic_tcu { > + struct device_node *np; Is this used? > + struct regmap *map; > + > + struct clk_onecell_data clocks; > +}; > + > +struct ingenic_tcu_clk_info { > + struct clk_init_data init_data; > + u8 gate_bit; > + u8 tcsr_reg; > +}; > + > +struct ingenic_tcu_clk { > + struct clk_hw hw; > + > + struct ingenic_tcu *tcu; > + const struct ingenic_tcu_clk_info *info; > + > + unsigned int idx; > +}; > + > +#define to_tcu_clk(_hw) container_of(_hw, struct ingenic_tcu_clk, hw) > + > +static int ingenic_tcu_enable(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + > + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit)); > + return 0; > +} > + > +static void ingenic_tcu_disable(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + > + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit)); > +} > + > +static int ingenic_tcu_is_enabled(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + unsigned int value; > + > + regmap_read(tcu->map, REG_TSR, ); > + > + return !(value & BIT(info->gate_bit)); > +} > + > +static u8 ingenic_tcu_get_parent(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + unsigned int val = 0; > + int ret; > + > + ret = regmap_read(tcu->map, info->tcsr_reg, ); > + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx); > + > + return (u8) ffs(val & TCSR_PARENT_CLOCK_MASK) - 1; Is the cast necessary? > +} > + > +static int ingenic_tcu_set_parent(struct clk_hw *hw, u8 idx) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + int ret; > + > + /* > + * Our clock provider has the CLK_SET_PARENT_GATE flag set, so we know > + * that the clk is in unprepared state. To be able to access TCSR > + * we must ungate the clock supply and we gate it again when done. > + */ > + > + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit)); > + > + ret = regmap_update_bits(tcu->map, info->tcsr_reg, > + TCSR_PARENT_CLOCK_MASK, BIT(idx)); > + WARN_ONCE(ret < 0, "Unable to update TCSR %i", tcu_clk->idx); > + > + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit)); > + > + return 0; > +} > + > +static unsigned long ingenic_tcu_recalc_rate(struct clk_hw *hw, > + unsigned long parent_rate) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + unsigned int prescale; > + int ret; > + > + ret = regmap_read(tcu->map, info->tcsr_reg, ); > + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx); > + > + prescale = (prescale & TCSR_PRESCALE_MASK) >> TCSR_PRESCALE_LSB; > + > + return parent_rate >> (prescale * 2); > +} > + > +static long ingenic_tcu_round_rate(struct clk_hw *hw, unsigned long req_rate, > + unsigned long *parent_rate) > +{ > + long rate = (long) *parent_rate; Is there a reason why rate is signed here? > + unsigned int shift; > + > + if (req_rate > rate) > + return -EINVAL; > + > + for (shift = 0; shift < 10; shift += 2) > + if ((rate >> shift) <= req_rate) > + return rate >> shift; > + > + return rate >> 10; Can it be? for (shift = 0; shift < 10; shift += 2) if ((rate >> shift) <= req_rate) break; return rate >> shift; > +} > + > +static int ingenic_tcu_set_rate(struct clk_hw *hw, unsigned long req_rate, > + unsigned long parent_rate) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + const
Re: [PATCH v2 4/6] clk: ingenic: Add JZ47xx TCU clocks driver
On 01/01, Paul Cercueil wrote: > diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c > new file mode 100644 > index ..36afe3f02f91 > --- /dev/null > +++ b/drivers/clk/ingenic/tcu.c > @@ -0,0 +1,336 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Ingenic JZ47xx SoC TCU clocks driver > + * Copyright (C) 2018 Paul Cercueil > + */ > + > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include > +#include Used? > + > +#include > + > +enum ingenic_version { > + ID_JZ4740, > + ID_JZ4770, > + ID_JZ4780, > +}; > + > +struct ingenic_tcu { > + struct device_node *np; Is this used? > + struct regmap *map; > + > + struct clk_onecell_data clocks; > +}; > + > +struct ingenic_tcu_clk_info { > + struct clk_init_data init_data; > + u8 gate_bit; > + u8 tcsr_reg; > +}; > + > +struct ingenic_tcu_clk { > + struct clk_hw hw; > + > + struct ingenic_tcu *tcu; > + const struct ingenic_tcu_clk_info *info; > + > + unsigned int idx; > +}; > + > +#define to_tcu_clk(_hw) container_of(_hw, struct ingenic_tcu_clk, hw) > + > +static int ingenic_tcu_enable(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + > + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit)); > + return 0; > +} > + > +static void ingenic_tcu_disable(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + > + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit)); > +} > + > +static int ingenic_tcu_is_enabled(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + unsigned int value; > + > + regmap_read(tcu->map, REG_TSR, ); > + > + return !(value & BIT(info->gate_bit)); > +} > + > +static u8 ingenic_tcu_get_parent(struct clk_hw *hw) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + unsigned int val = 0; > + int ret; > + > + ret = regmap_read(tcu->map, info->tcsr_reg, ); > + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx); > + > + return (u8) ffs(val & TCSR_PARENT_CLOCK_MASK) - 1; Is the cast necessary? > +} > + > +static int ingenic_tcu_set_parent(struct clk_hw *hw, u8 idx) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + int ret; > + > + /* > + * Our clock provider has the CLK_SET_PARENT_GATE flag set, so we know > + * that the clk is in unprepared state. To be able to access TCSR > + * we must ungate the clock supply and we gate it again when done. > + */ > + > + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit)); > + > + ret = regmap_update_bits(tcu->map, info->tcsr_reg, > + TCSR_PARENT_CLOCK_MASK, BIT(idx)); > + WARN_ONCE(ret < 0, "Unable to update TCSR %i", tcu_clk->idx); > + > + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit)); > + > + return 0; > +} > + > +static unsigned long ingenic_tcu_recalc_rate(struct clk_hw *hw, > + unsigned long parent_rate) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + struct ingenic_tcu *tcu = tcu_clk->tcu; > + const struct ingenic_tcu_clk_info *info = tcu_clk->info; > + unsigned int prescale; > + int ret; > + > + ret = regmap_read(tcu->map, info->tcsr_reg, ); > + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx); > + > + prescale = (prescale & TCSR_PRESCALE_MASK) >> TCSR_PRESCALE_LSB; > + > + return parent_rate >> (prescale * 2); > +} > + > +static long ingenic_tcu_round_rate(struct clk_hw *hw, unsigned long req_rate, > + unsigned long *parent_rate) > +{ > + long rate = (long) *parent_rate; Is there a reason why rate is signed here? > + unsigned int shift; > + > + if (req_rate > rate) > + return -EINVAL; > + > + for (shift = 0; shift < 10; shift += 2) > + if ((rate >> shift) <= req_rate) > + return rate >> shift; > + > + return rate >> 10; Can it be? for (shift = 0; shift < 10; shift += 2) if ((rate >> shift) <= req_rate) break; return rate >> shift; > +} > + > +static int ingenic_tcu_set_rate(struct clk_hw *hw, unsigned long req_rate, > + unsigned long parent_rate) > +{ > + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw); > + const struct ingenic_tcu_clk_info
Re: [PATCH 4.9 00/75] 4.9.74-stable review
From: Neal CardwellDate: Tue, 2 Jan 2018 14:11:25 -0500 > Looks like these 2 patches will cherry-pick cleanly if cherry-picked > in the following sequence, on top of 4.9.74-rc1, which already has > 6c9e73ef9aa7 ("tcp_bbr: record "full bw reached" decision in new > full_bw_reached bit"): > > $ git checkout linux-stable-rc/linux-4.9.y > > $ git cherry-pick 2f6c498e4f15 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on > loss recovery undo > Date: Thu Dec 7 12:43:31 2017 -0500 > 1 file changed, 4 insertions(+) > > $ git cherry-pick 600647d467c6 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth > sampling on loss recovery undo > Date: Thu Dec 7 12:43:32 2017 -0500 > 1 file changed, 1 insertion(+) > > $ git log --oneline --decorate | head -3 > 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on > loss recovery undo > 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo > 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1 > > I verified that this compiles without warnings, and boots, and BBR works. > > Shall I prepare another version of these 2 patches, or do we think > this recipe will be sufficient? (Sorry I am not more familiar with the > backport-to-stable process.) If this works and Greg is OK with it, I am fine with it too.
Re: [PATCH 4.9 00/75] 4.9.74-stable review
From: Neal Cardwell Date: Tue, 2 Jan 2018 14:11:25 -0500 > Looks like these 2 patches will cherry-pick cleanly if cherry-picked > in the following sequence, on top of 4.9.74-rc1, which already has > 6c9e73ef9aa7 ("tcp_bbr: record "full bw reached" decision in new > full_bw_reached bit"): > > $ git checkout linux-stable-rc/linux-4.9.y > > $ git cherry-pick 2f6c498e4f15 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on > loss recovery undo > Date: Thu Dec 7 12:43:31 2017 -0500 > 1 file changed, 4 insertions(+) > > $ git cherry-pick 600647d467c6 > Performing inexact rename detection: 100% (17803152/17803152), done. > [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth > sampling on loss recovery undo > Date: Thu Dec 7 12:43:32 2017 -0500 > 1 file changed, 1 insertion(+) > > $ git log --oneline --decorate | head -3 > 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on > loss recovery undo > 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo > 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1 > > I verified that this compiles without warnings, and boots, and BBR works. > > Shall I prepare another version of these 2 patches, or do we think > this recipe will be sufficient? (Sorry I am not more familiar with the > backport-to-stable process.) If this works and Greg is OK with it, I am fine with it too.
Re: [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read
On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote: > The ACK/NACK implementation as found in e.g. the G965 has the falling > clock edge and the release of the data line after the ACK for the received > byte happen at the same time. > > This is conformant with the I2C specification, which allows a zero hold > time, see footnote [3]: "A device must internally provide a hold time of > at least 300 ns for the SDA signal (with respect to the V IH(min) of the > SCL signal) to bridge the undefined region of the falling edge of SCL." > > Some HDMI-to-VGA converters apparently fail to adhere to this requirement > and latch SDA at the falling clock edge, so instead of an ACK > sometimes a NACK is read and the slave (i.e. the EDID ROM) ends the > transfer. > > The bitbanging releases the data line for the ACK only 1/4 bit time after > the falling clock edge, so a slave will see the correct value no matter > if it samples at the rising or the falling clock edge or in the center. > > Fallback to bitbanging is already done for the CRT connector. > > Bug: https://bugs.freedesktop.org/show_bug.cgi?id=92685 s/Bug:/Bugzilla: Did we get the confirmation that this also fix the Skylake issue initially reported? > > Signed-off-by: Stefan Brüns> > --- > > Changes in v2: > - Fix/enhance commit message, no code changes > > drivers/gpu/drm/i915/intel_hdmi.c | 14 +++--- > 1 file changed, 11 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_hdmi.c > b/drivers/gpu/drm/i915/intel_hdmi.c > index 4dea833f9d1b..847cda4c017c 100644 > --- a/drivers/gpu/drm/i915/intel_hdmi.c > +++ b/drivers/gpu/drm/i915/intel_hdmi.c > @@ -1573,12 +1573,20 @@ intel_hdmi_set_edid(struct drm_connector *connector) > struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); > struct edid *edid; > bool connected = false; > + struct i2c_adapter *i2c; > > intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); > > - edid = drm_get_edid(connector, > - intel_gmbus_get_adapter(dev_priv, > - intel_hdmi->ddc_bus)); > + i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); > + > + edid = drm_get_edid(connector, i2c); > + > + if (!edid && !intel_gmbus_is_forced_bit(i2c)) { > + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO > bit-banging\n"); > + intel_gmbus_force_bit(i2c, true); > + edid = drm_get_edid(connector, i2c); > + intel_gmbus_force_bit(i2c, false); > + } Approach seems fine for this case. I just wonder what would be the risks of forcing this bit and edid read when nothing is present on the other end? > > intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); > > -- > 2.15.1 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read
On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote: > The ACK/NACK implementation as found in e.g. the G965 has the falling > clock edge and the release of the data line after the ACK for the received > byte happen at the same time. > > This is conformant with the I2C specification, which allows a zero hold > time, see footnote [3]: "A device must internally provide a hold time of > at least 300 ns for the SDA signal (with respect to the V IH(min) of the > SCL signal) to bridge the undefined region of the falling edge of SCL." > > Some HDMI-to-VGA converters apparently fail to adhere to this requirement > and latch SDA at the falling clock edge, so instead of an ACK > sometimes a NACK is read and the slave (i.e. the EDID ROM) ends the > transfer. > > The bitbanging releases the data line for the ACK only 1/4 bit time after > the falling clock edge, so a slave will see the correct value no matter > if it samples at the rising or the falling clock edge or in the center. > > Fallback to bitbanging is already done for the CRT connector. > > Bug: https://bugs.freedesktop.org/show_bug.cgi?id=92685 s/Bug:/Bugzilla: Did we get the confirmation that this also fix the Skylake issue initially reported? > > Signed-off-by: Stefan Brüns > > --- > > Changes in v2: > - Fix/enhance commit message, no code changes > > drivers/gpu/drm/i915/intel_hdmi.c | 14 +++--- > 1 file changed, 11 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_hdmi.c > b/drivers/gpu/drm/i915/intel_hdmi.c > index 4dea833f9d1b..847cda4c017c 100644 > --- a/drivers/gpu/drm/i915/intel_hdmi.c > +++ b/drivers/gpu/drm/i915/intel_hdmi.c > @@ -1573,12 +1573,20 @@ intel_hdmi_set_edid(struct drm_connector *connector) > struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); > struct edid *edid; > bool connected = false; > + struct i2c_adapter *i2c; > > intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); > > - edid = drm_get_edid(connector, > - intel_gmbus_get_adapter(dev_priv, > - intel_hdmi->ddc_bus)); > + i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); > + > + edid = drm_get_edid(connector, i2c); > + > + if (!edid && !intel_gmbus_is_forced_bit(i2c)) { > + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO > bit-banging\n"); > + intel_gmbus_force_bit(i2c, true); > + edid = drm_get_edid(connector, i2c); > + intel_gmbus_force_bit(i2c, false); > + } Approach seems fine for this case. I just wonder what would be the risks of forcing this bit and edid read when nothing is present on the other end? > > intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); > > -- > 2.15.1 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 4.9 00/75] 4.9.74-stable review
On Tue, Jan 2, 2018 at 1:32 PM, David Millerwrote: > From: Neal Cardwell > Date: Tue, 2 Jan 2018 11:57:59 -0500 > >> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman >> wrote: >>> This is the start of the stable review cycle for the 4.9.74 release. >>> There are 75 patches in this series, all will be posted as a response >>> to this one. If anyone has any issues with these being applied, please >>> let me know. >>> >>> Responses should be made by Wed Jan 3 14:00:03 UTC 2018. >>> Anything received after that time might be too late. >>> >>> The whole patch series can be found in one patch at: >>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz >>> or in the git tree and branch at: >>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git >>> linux-4.9.y >>> and the diffstat can be found below. >> >> Hi Greg, >> >> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were >> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an >> inventory of the TCP BBR fixes, AFAICT we have: >> >> c589e69b508d tcp_bbr: record "full bw reached" decision in new >> full_bw_reached bit >> - in 4.9 and 4.14 (great) >> >> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo >> - in 4.14 (but not 4.9) >> >> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery >> undo >> - in 4.14 (but not 4.9) >> >> Lacking the second and third patches in 4.9 will not cause any new >> problems, but it will miss out on some nice fixes. If it's possible to >> get 2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I >> would be very grateful. > > These were not straight-forward to backport and I felt the risk outweighed > the gains. > > If you want to do the backport yourself and you feel confident in it, > feel free. Thanks, Greg and David. Looks like these 2 patches will cherry-pick cleanly if cherry-picked in the following sequence, on top of 4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw reached" decision in new full_bw_reached bit"): $ git checkout linux-stable-rc/linux-4.9.y $ git cherry-pick 2f6c498e4f15 Performing inexact rename detection: 100% (17803152/17803152), done. [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on loss recovery undo Date: Thu Dec 7 12:43:31 2017 -0500 1 file changed, 4 insertions(+) $ git cherry-pick 600647d467c6 Performing inexact rename detection: 100% (17803152/17803152), done. [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth sampling on loss recovery undo Date: Thu Dec 7 12:43:32 2017 -0500 1 file changed, 1 insertion(+) $ git log --oneline --decorate | head -3 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on loss recovery undo 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1 I verified that this compiles without warnings, and boots, and BBR works. Shall I prepare another version of these 2 patches, or do we think this recipe will be sufficient? (Sorry I am not more familiar with the backport-to-stable process.) Thanks! neal
Re: [PATCH 4.9 00/75] 4.9.74-stable review
On Tue, Jan 2, 2018 at 1:32 PM, David Miller wrote: > From: Neal Cardwell > Date: Tue, 2 Jan 2018 11:57:59 -0500 > >> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman >> wrote: >>> This is the start of the stable review cycle for the 4.9.74 release. >>> There are 75 patches in this series, all will be posted as a response >>> to this one. If anyone has any issues with these being applied, please >>> let me know. >>> >>> Responses should be made by Wed Jan 3 14:00:03 UTC 2018. >>> Anything received after that time might be too late. >>> >>> The whole patch series can be found in one patch at: >>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz >>> or in the git tree and branch at: >>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git >>> linux-4.9.y >>> and the diffstat can be found below. >> >> Hi Greg, >> >> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were >> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an >> inventory of the TCP BBR fixes, AFAICT we have: >> >> c589e69b508d tcp_bbr: record "full bw reached" decision in new >> full_bw_reached bit >> - in 4.9 and 4.14 (great) >> >> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo >> - in 4.14 (but not 4.9) >> >> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery >> undo >> - in 4.14 (but not 4.9) >> >> Lacking the second and third patches in 4.9 will not cause any new >> problems, but it will miss out on some nice fixes. If it's possible to >> get 2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I >> would be very grateful. > > These were not straight-forward to backport and I felt the risk outweighed > the gains. > > If you want to do the backport yourself and you feel confident in it, > feel free. Thanks, Greg and David. Looks like these 2 patches will cherry-pick cleanly if cherry-picked in the following sequence, on top of 4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw reached" decision in new full_bw_reached bit"): $ git checkout linux-stable-rc/linux-4.9.y $ git cherry-pick 2f6c498e4f15 Performing inexact rename detection: 100% (17803152/17803152), done. [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on loss recovery undo Date: Thu Dec 7 12:43:31 2017 -0500 1 file changed, 4 insertions(+) $ git cherry-pick 600647d467c6 Performing inexact rename detection: 100% (17803152/17803152), done. [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth sampling on loss recovery undo Date: Thu Dec 7 12:43:32 2017 -0500 1 file changed, 1 insertion(+) $ git log --oneline --decorate | head -3 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on loss recovery undo 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1 I verified that this compiles without warnings, and boots, and BBR works. Shall I prepare another version of these 2 patches, or do we think this recipe will be sufficient? (Sorry I am not more familiar with the backport-to-stable process.) Thanks! neal
[GIT PULL rcu/next] RCU commits for 4.15
Hello, Ingo, and Happy New Year! This pull request contains the following changes: 1. Updates to use cond_resched() instead of cond_resched_rcu_qs() where feasible (currently everywhere except in kernel/rcu and in kernel/torture.c). Also a couple of fixes to avoid sending IPIs to offline CPUs. http://lkml.kernel.org/r/20171201192122.ga19...@linux.vnet.ibm.com 2. Updates to simplify RCU's dyntick-idle handling. http://lkml.kernel.org/r/20171201193625.ga20...@linux.vnet.ibm.com 3. Miscellaneous fixes. http://lkml.kernel.org/r/20171201194139.ga22...@linux.vnet.ibm.com 4. Updates to remove almost all uses of smp_read_barrier_depends() and read_barrier_depends(). http://lkml.kernel.org/r/20171201195053.ga23...@linux.vnet.ibm.com 5. Torture-test updates. http://lkml.kernel.org/r/20171201200819.ga25...@linux.vnet.ibm.com All of these changes have been subjected to 0day Test Robot and -next testing, and are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git for-mingo for you to fetch changes up to 1dfa55e01987288d847220b8c027204871440ed1: Merge branches 'cond_resched.2017.12.04a', 'dyntick.2017.11.28a', 'fixes.2017.12.11a', 'srbd.2017.12.05a' and 'torture.2017.12.11a' into HEAD (2017-12-11 09:21:58 -0800) Davidlohr Bueso (2): locking/locktorture: Fix rwsem reader_delay locking/locktorture: Fix num reader/writer corner cases Paul E. McKenney (59): rcu: Avoid ->dynticks_nmi_nesting store tearing rcu: Reduce dyntick-idle state space rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}() rcu: Make ->dynticks_nesting be a simple counter rcu: Eliminate rcu_irq_enter_disabled() rcu: Add tracing to irq/NMI dyntick-idle transitions rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long rcu: Add ->dynticks field to rcu_dyntick trace event rcu: Stop duplicating lockdep checks in RCU's idle-entry code rcu: Avoid ->dynticks_nesting store tearing rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter() rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit() rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code doc: Update dyntick-idle design documentation for NMI/irq consolidation srcu: Prohibit call_srcu() use under raw spinlocks torture: Suppress CPU stall warnings during shutdown ftrace dump torture: Prepare scripting for shift from %p to %pK sched: Stop resched_cpu() from sending IPIs to offline CPUs sched: Stop switched_to_rt() from sending IPIs to offline CPUs netfilter: Eliminate cond_resched_rcu_qs() in favor of cond_resched() mm: Eliminate cond_resched_rcu_qs() in favor of cond_resched() workqueue: Eliminate cond_resched_rcu_qs() in favor of cond_resched() trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched() softirq: Eliminate cond_resched_rcu_qs() in favor of cond_resched() fs: Eliminate cond_resched_rcu_qs() in favor of cond_resched() doc: Eliminate cond_resched_rcu_qs() in favor of cond_resched() rcu: Account for rcu_all_qs() in cond_resched() doc: READ_ONCE() now implies smp_barrier_depends() mn10300: READ_ONCE() now implies smp_read_barrier_depends() drivers/net/ethernet/qlogic/qed: Fix __qed_spq_block() ordering fs/dcache: Use release-acquire for name/length update percpu: READ_ONCE() now implies smp_read_barrier_depends() rcu: Adjust read-side accessor comments for READ_ONCE() rtnetlink: Update now-misleading smp_read_barrier_depends() comment seqlock: Remove now-redundant smp_read_barrier_depends() uprobes: Remove now-redundant smp_read_barrier_depends() locking: Remove smp_read_barrier_depends() from queued_spin_lock_slowpath() tracepoint: Remove smp_read_barrier_depends() from comment lib/assoc_array: Remove smp_read_barrier_depends() mm/ksm: Remove now-redundant smp_read_barrier_depends() netfilter: Remove now-redundant smp_read_barrier_depends() keyring: Remove now-redundant smp_read_barrier_depends() drivers/infiniband: Remove now-redundant smp_read_barrier_depends() drivers/dma/ioat: Remove now-redundant smp_read_barrier_depends() doc: De-emphasize smp_read_barrier_depends genetlink: Remove smp_read_barrier_depends() from comment netlink: Remove smp_read_barrier_depends() from comment checkpatch: Add warnings for {smp_,}read_barrier_depends() drivers/vhost: Remove now-redundant read_barrier_depends() rcu: Add comment giving debug strategy for double call_rcu() torture: Reduce #ifdefs for
[GIT PULL rcu/next] RCU commits for 4.15
Hello, Ingo, and Happy New Year! This pull request contains the following changes: 1. Updates to use cond_resched() instead of cond_resched_rcu_qs() where feasible (currently everywhere except in kernel/rcu and in kernel/torture.c). Also a couple of fixes to avoid sending IPIs to offline CPUs. http://lkml.kernel.org/r/20171201192122.ga19...@linux.vnet.ibm.com 2. Updates to simplify RCU's dyntick-idle handling. http://lkml.kernel.org/r/20171201193625.ga20...@linux.vnet.ibm.com 3. Miscellaneous fixes. http://lkml.kernel.org/r/20171201194139.ga22...@linux.vnet.ibm.com 4. Updates to remove almost all uses of smp_read_barrier_depends() and read_barrier_depends(). http://lkml.kernel.org/r/20171201195053.ga23...@linux.vnet.ibm.com 5. Torture-test updates. http://lkml.kernel.org/r/20171201200819.ga25...@linux.vnet.ibm.com All of these changes have been subjected to 0day Test Robot and -next testing, and are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git for-mingo for you to fetch changes up to 1dfa55e01987288d847220b8c027204871440ed1: Merge branches 'cond_resched.2017.12.04a', 'dyntick.2017.11.28a', 'fixes.2017.12.11a', 'srbd.2017.12.05a' and 'torture.2017.12.11a' into HEAD (2017-12-11 09:21:58 -0800) Davidlohr Bueso (2): locking/locktorture: Fix rwsem reader_delay locking/locktorture: Fix num reader/writer corner cases Paul E. McKenney (59): rcu: Avoid ->dynticks_nmi_nesting store tearing rcu: Reduce dyntick-idle state space rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}() rcu: Make ->dynticks_nesting be a simple counter rcu: Eliminate rcu_irq_enter_disabled() rcu: Add tracing to irq/NMI dyntick-idle transitions rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long rcu: Add ->dynticks field to rcu_dyntick trace event rcu: Stop duplicating lockdep checks in RCU's idle-entry code rcu: Avoid ->dynticks_nesting store tearing rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter() rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit() rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code doc: Update dyntick-idle design documentation for NMI/irq consolidation srcu: Prohibit call_srcu() use under raw spinlocks torture: Suppress CPU stall warnings during shutdown ftrace dump torture: Prepare scripting for shift from %p to %pK sched: Stop resched_cpu() from sending IPIs to offline CPUs sched: Stop switched_to_rt() from sending IPIs to offline CPUs netfilter: Eliminate cond_resched_rcu_qs() in favor of cond_resched() mm: Eliminate cond_resched_rcu_qs() in favor of cond_resched() workqueue: Eliminate cond_resched_rcu_qs() in favor of cond_resched() trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched() softirq: Eliminate cond_resched_rcu_qs() in favor of cond_resched() fs: Eliminate cond_resched_rcu_qs() in favor of cond_resched() doc: Eliminate cond_resched_rcu_qs() in favor of cond_resched() rcu: Account for rcu_all_qs() in cond_resched() doc: READ_ONCE() now implies smp_barrier_depends() mn10300: READ_ONCE() now implies smp_read_barrier_depends() drivers/net/ethernet/qlogic/qed: Fix __qed_spq_block() ordering fs/dcache: Use release-acquire for name/length update percpu: READ_ONCE() now implies smp_read_barrier_depends() rcu: Adjust read-side accessor comments for READ_ONCE() rtnetlink: Update now-misleading smp_read_barrier_depends() comment seqlock: Remove now-redundant smp_read_barrier_depends() uprobes: Remove now-redundant smp_read_barrier_depends() locking: Remove smp_read_barrier_depends() from queued_spin_lock_slowpath() tracepoint: Remove smp_read_barrier_depends() from comment lib/assoc_array: Remove smp_read_barrier_depends() mm/ksm: Remove now-redundant smp_read_barrier_depends() netfilter: Remove now-redundant smp_read_barrier_depends() keyring: Remove now-redundant smp_read_barrier_depends() drivers/infiniband: Remove now-redundant smp_read_barrier_depends() drivers/dma/ioat: Remove now-redundant smp_read_barrier_depends() doc: De-emphasize smp_read_barrier_depends genetlink: Remove smp_read_barrier_depends() from comment netlink: Remove smp_read_barrier_depends() from comment checkpatch: Add warnings for {smp_,}read_barrier_depends() drivers/vhost: Remove now-redundant read_barrier_depends() rcu: Add comment giving debug strategy for double call_rcu() torture: Reduce #ifdefs for
Re: [PATCH 0/3] Ktest: add email support
Hi Steve, did your mailer find all the patches? I made sure they all reply to the same mail ID this time. Thanks, Tim On 12/15/2017 03:20 PM, Tim Tianyang Chen wrote: This patch set will let users define a mailer, an email address and when to receive notifications during automated testings. Users need to setup the specified mailer prior to using this feature. Tim Tianyang Chen (3): Ktest: add email support Ktest: use dodie for critical falures Ktest: add email options to sample.config ktest.pl| 131 +--- sample.conf | 10 + 2 files changed, 109 insertions(+), 32 deletions(-)
Re: [PATCH 0/3] Ktest: add email support
Hi Steve, did your mailer find all the patches? I made sure they all reply to the same mail ID this time. Thanks, Tim On 12/15/2017 03:20 PM, Tim Tianyang Chen wrote: This patch set will let users define a mailer, an email address and when to receive notifications during automated testings. Users need to setup the specified mailer prior to using this feature. Tim Tianyang Chen (3): Ktest: add email support Ktest: use dodie for critical falures Ktest: add email options to sample.config ktest.pl| 131 +--- sample.conf | 10 + 2 files changed, 109 insertions(+), 32 deletions(-)
Re: [PATCH v2 0/4] Address error and recovery for AER and DPC
On 1/2/2018 2:02 PM, Bjorn Helgaas wrote: > I read that as suggesting that we should enable DPC support in Linux > if and only if we also enable AER. But I don't see anything in DPC > that looks like that. Should there be something there? Should DPC be > restructured so it's enabled and handled inside the AER driver instead > of being a separate driver? I think Keith posted a patch to do this. If firmware first is enabled, DPC init is skipped after his patch. Oza was able to plumb the DPC handling into error recovery callbacks of the portdrv since the portdrv layer already provides this facilities such as reset_link and resume. The way DPC and AER works is almost identical from AER portdrv perspective. I really like his plumbing. Putting DPC code into AER makes it more convoluted in my opinion. -- Sinan Kaya Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
Re: [PATCH v2 0/4] Address error and recovery for AER and DPC
On 1/2/2018 2:02 PM, Bjorn Helgaas wrote: > I read that as suggesting that we should enable DPC support in Linux > if and only if we also enable AER. But I don't see anything in DPC > that looks like that. Should there be something there? Should DPC be > restructured so it's enabled and handled inside the AER driver instead > of being a separate driver? I think Keith posted a patch to do this. If firmware first is enabled, DPC init is skipped after his patch. Oza was able to plumb the DPC handling into error recovery callbacks of the portdrv since the portdrv layer already provides this facilities such as reset_link and resume. The way DPC and AER works is almost identical from AER portdrv perspective. I really like his plumbing. Putting DPC code into AER makes it more convoluted in my opinion. -- Sinan Kaya Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.
Re: [PATCH v2 0/4] Address error and recovery for AER and DPC
On Tue, Jan 02, 2018 at 01:02:15PM -0600, Bjorn Helgaas wrote: > On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote: > > This patch set brings in support for DPC and AER to co-exist and not to > > race for recovery. > > > > The current implementation of AER and error message broadcasting to the > > EP driver is tightly coupled and limited to AER service driver. > > It is important to factor out broadcasting and other link handling > > callbacks. So that not only when AER gets triggered, but also when DPC get > > triggered, or both get triggered simultaneously (for e.g. ERR_FATAL), > > callbacks are handled appropriately. > > having modularized the code, the race between AER and DPC is handled > > gracefully. > > for e.g. when DPC is active and kicked in, AER should not attempt to do > > recovery, because DPC takes care of it. > > High-level question: > > We have some convoluted code in negotiate_os_control() and > aer_service_init() that (I think) essentially disables AER unless the > platform firmware grants us permission to use it. > > The last implementation note in PCIe r3.1, sec 6.2.10 says > > DPC may be controlled in some configurations by platform firmware > and in other configurations by the operating system. DPC > functionality is strongly linked with the functionality in Advanced > Error Reporting. To avoid conflicts over whether platform firmware > or the operating system have control of DPC, it is recommended that > platform firmware and operating systems always link the control of > DPC to the control of Advanced Error Reporting. > > I read that as suggesting that we should enable DPC support in Linux > if and only if we also enable AER. But I don't see anything in DPC > that looks like that. Should there be something there? Should DPC be > restructured so it's enabled and handled inside the AER driver instead > of being a separate driver? Yes, I agree the two should be linked. I submitted a patch for that here, though driver responsibilities are still separate in this series: https://marc.info/?l=linux-pci=151371742225111=2
Re: [PATCH v2 0/4] Address error and recovery for AER and DPC
On Tue, Jan 02, 2018 at 01:02:15PM -0600, Bjorn Helgaas wrote: > On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote: > > This patch set brings in support for DPC and AER to co-exist and not to > > race for recovery. > > > > The current implementation of AER and error message broadcasting to the > > EP driver is tightly coupled and limited to AER service driver. > > It is important to factor out broadcasting and other link handling > > callbacks. So that not only when AER gets triggered, but also when DPC get > > triggered, or both get triggered simultaneously (for e.g. ERR_FATAL), > > callbacks are handled appropriately. > > having modularized the code, the race between AER and DPC is handled > > gracefully. > > for e.g. when DPC is active and kicked in, AER should not attempt to do > > recovery, because DPC takes care of it. > > High-level question: > > We have some convoluted code in negotiate_os_control() and > aer_service_init() that (I think) essentially disables AER unless the > platform firmware grants us permission to use it. > > The last implementation note in PCIe r3.1, sec 6.2.10 says > > DPC may be controlled in some configurations by platform firmware > and in other configurations by the operating system. DPC > functionality is strongly linked with the functionality in Advanced > Error Reporting. To avoid conflicts over whether platform firmware > or the operating system have control of DPC, it is recommended that > platform firmware and operating systems always link the control of > DPC to the control of Advanced Error Reporting. > > I read that as suggesting that we should enable DPC support in Linux > if and only if we also enable AER. But I don't see anything in DPC > that looks like that. Should there be something there? Should DPC be > restructured so it's enabled and handled inside the AER driver instead > of being a separate driver? Yes, I agree the two should be linked. I submitted a patch for that here, though driver responsibilities are still separate in this series: https://marc.info/?l=linux-pci=151371742225111=2
Re: [PATCH v4 16/19] fs: only set S_VERSION when updating times if necessary
On Tue, 2018-01-02 at 17:50 +0100, Jan Kara wrote: > On Fri 22-12-17 07:05:53, Jeff Layton wrote: > > From: Jeff Layton> > > > We only really need to update i_version if someone has queried for it > > since we last incremented it. By doing that, we can avoid having to > > update the inode if the times haven't changed. > > > > If the times have changed, then we go ahead and forcibly increment the > > counter, under the assumption that we'll be going to the storage > > anyway, and the increment itself is relatively cheap. > > > > Signed-off-by: Jeff Layton > > --- > > fs/inode.c | 10 +++--- > > 1 file changed, 7 insertions(+), 3 deletions(-) > > > > diff --git a/fs/inode.c b/fs/inode.c > > index 19e72f500f71..2fa920188759 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -1635,17 +1635,21 @@ static int relatime_need_update(const struct path > > *path, struct inode *inode, > > int generic_update_time(struct inode *inode, struct timespec *time, int > > flags) > > { > > int iflags = I_DIRTY_TIME; > > + bool dirty = false; > > > > if (flags & S_ATIME) > > inode->i_atime = *time; > > if (flags & S_VERSION) > > - inode_inc_iversion(inode); > > + dirty |= inode_maybe_inc_iversion(inode, dirty); > > if (flags & S_CTIME) > > inode->i_ctime = *time; > > if (flags & S_MTIME) > > inode->i_mtime = *time; > > + if ((flags & (S_ATIME | S_CTIME | S_MTIME)) && > > + !(inode->i_sb->s_flags & SB_LAZYTIME)) > > + dirty = true; > > When you pass 'dirty' to inode_maybe_inc_iversion(), it is always false. > Maybe this condition should be at the beginning of the function? Once you > fix that the patch looks good so you can add: > > Reviewed-by: Jan Kara > Thanks for the review! I've fixed it in my tree. I'll not re-post the set unless I have to make another significant change or someone requests it. I did make one other change, and that was to drop the "const" qualifiers on the integer arguments in the new API. David Howells pointed out that they don't really help anything, and the prototypes look cleaner without them. This set is now in linux-next as well, so I'm going to try to get this merged into v4.16, assuming no problems between now and the merge window. -- Jeff Layton
Re: [PATCH v4 16/19] fs: only set S_VERSION when updating times if necessary
On Tue, 2018-01-02 at 17:50 +0100, Jan Kara wrote: > On Fri 22-12-17 07:05:53, Jeff Layton wrote: > > From: Jeff Layton > > > > We only really need to update i_version if someone has queried for it > > since we last incremented it. By doing that, we can avoid having to > > update the inode if the times haven't changed. > > > > If the times have changed, then we go ahead and forcibly increment the > > counter, under the assumption that we'll be going to the storage > > anyway, and the increment itself is relatively cheap. > > > > Signed-off-by: Jeff Layton > > --- > > fs/inode.c | 10 +++--- > > 1 file changed, 7 insertions(+), 3 deletions(-) > > > > diff --git a/fs/inode.c b/fs/inode.c > > index 19e72f500f71..2fa920188759 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -1635,17 +1635,21 @@ static int relatime_need_update(const struct path > > *path, struct inode *inode, > > int generic_update_time(struct inode *inode, struct timespec *time, int > > flags) > > { > > int iflags = I_DIRTY_TIME; > > + bool dirty = false; > > > > if (flags & S_ATIME) > > inode->i_atime = *time; > > if (flags & S_VERSION) > > - inode_inc_iversion(inode); > > + dirty |= inode_maybe_inc_iversion(inode, dirty); > > if (flags & S_CTIME) > > inode->i_ctime = *time; > > if (flags & S_MTIME) > > inode->i_mtime = *time; > > + if ((flags & (S_ATIME | S_CTIME | S_MTIME)) && > > + !(inode->i_sb->s_flags & SB_LAZYTIME)) > > + dirty = true; > > When you pass 'dirty' to inode_maybe_inc_iversion(), it is always false. > Maybe this condition should be at the beginning of the function? Once you > fix that the patch looks good so you can add: > > Reviewed-by: Jan Kara > Thanks for the review! I've fixed it in my tree. I'll not re-post the set unless I have to make another significant change or someone requests it. I did make one other change, and that was to drop the "const" qualifiers on the integer arguments in the new API. David Howells pointed out that they don't really help anything, and the prototypes look cleaner without them. This set is now in linux-next as well, so I'm going to try to get this merged into v4.16, assuming no problems between now and the merge window. -- Jeff Layton
Re: [PATCH v2 0/4] Address error and recovery for AER and DPC
On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote: > This patch set brings in support for DPC and AER to co-exist and not to > race for recovery. > > The current implementation of AER and error message broadcasting to the > EP driver is tightly coupled and limited to AER service driver. > It is important to factor out broadcasting and other link handling > callbacks. So that not only when AER gets triggered, but also when DPC get > triggered, or both get triggered simultaneously (for e.g. ERR_FATAL), > callbacks are handled appropriately. > having modularized the code, the race between AER and DPC is handled > gracefully. > for e.g. when DPC is active and kicked in, AER should not attempt to do > recovery, because DPC takes care of it. High-level question: We have some convoluted code in negotiate_os_control() and aer_service_init() that (I think) essentially disables AER unless the platform firmware grants us permission to use it. The last implementation note in PCIe r3.1, sec 6.2.10 says DPC may be controlled in some configurations by platform firmware and in other configurations by the operating system. DPC functionality is strongly linked with the functionality in Advanced Error Reporting. To avoid conflicts over whether platform firmware or the operating system have control of DPC, it is recommended that platform firmware and operating systems always link the control of DPC to the control of Advanced Error Reporting. I read that as suggesting that we should enable DPC support in Linux if and only if we also enable AER. But I don't see anything in DPC that looks like that. Should there be something there? Should DPC be restructured so it's enabled and handled inside the AER driver instead of being a separate driver? Bjorn
Re: [PATCH v2 0/4] Address error and recovery for AER and DPC
On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote: > This patch set brings in support for DPC and AER to co-exist and not to > race for recovery. > > The current implementation of AER and error message broadcasting to the > EP driver is tightly coupled and limited to AER service driver. > It is important to factor out broadcasting and other link handling > callbacks. So that not only when AER gets triggered, but also when DPC get > triggered, or both get triggered simultaneously (for e.g. ERR_FATAL), > callbacks are handled appropriately. > having modularized the code, the race between AER and DPC is handled > gracefully. > for e.g. when DPC is active and kicked in, AER should not attempt to do > recovery, because DPC takes care of it. High-level question: We have some convoluted code in negotiate_os_control() and aer_service_init() that (I think) essentially disables AER unless the platform firmware grants us permission to use it. The last implementation note in PCIe r3.1, sec 6.2.10 says DPC may be controlled in some configurations by platform firmware and in other configurations by the operating system. DPC functionality is strongly linked with the functionality in Advanced Error Reporting. To avoid conflicts over whether platform firmware or the operating system have control of DPC, it is recommended that platform firmware and operating systems always link the control of DPC to the control of Advanced Error Reporting. I read that as suggesting that we should enable DPC support in Linux if and only if we also enable AER. But I don't see anything in DPC that looks like that. Should there be something there? Should DPC be restructured so it's enabled and handled inside the AER driver instead of being a separate driver? Bjorn
Re: [PATCH 01/33] clk_ops: change round_rate() to return unsigned long
On 12/31, Bryan O'Donoghue wrote: > On 30/12/17 16:36, Mikko Perttunen wrote: > >FWIW, we had this problem some years ago with the Tegra CPU clock > >- then it was determined that a simpler solution was to have the > >determine_rate callback support unsigned long rates - so clock > >drivers that need to return rates higher than 2^31 can instead > >implement the determine_rate callback. That is what's currently > >implemented. > > > >Mikko > > Granted we could work around it but, having both zero and less than > zero indicate error means you can't support larger than LONG_MAX > which is I think worth fixing. > Ok. But can you implement the determine_rate op instead of the round_rate op for your clk? It's not a work-around, it's the preferred solution. That would allow rates larger than 2^31 for the clk without pushing through a change to all the drivers to express zero as "error" and non-zero as the rounded rate. I'm not entirely opposed to this approach, because we probably don't care to pass the particular error value from a clk provider to a clk consumer about what the error is. It's actually what we proposed as the solution for clk_round_rate() to return values larger than LONG_MAX to consumers. But doing that consumer API change or this provider side change is going to require us to evaluate all the consumers of these clks to make sure they don't check for some error value that's less than zero. This series does half the work, by changing the provider side, while ignoring the consumer side and any potential fallout of the less than zero to zero return value change. -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
Re: [PATCH 01/33] clk_ops: change round_rate() to return unsigned long
On 12/31, Bryan O'Donoghue wrote: > On 30/12/17 16:36, Mikko Perttunen wrote: > >FWIW, we had this problem some years ago with the Tegra CPU clock > >- then it was determined that a simpler solution was to have the > >determine_rate callback support unsigned long rates - so clock > >drivers that need to return rates higher than 2^31 can instead > >implement the determine_rate callback. That is what's currently > >implemented. > > > >Mikko > > Granted we could work around it but, having both zero and less than > zero indicate error means you can't support larger than LONG_MAX > which is I think worth fixing. > Ok. But can you implement the determine_rate op instead of the round_rate op for your clk? It's not a work-around, it's the preferred solution. That would allow rates larger than 2^31 for the clk without pushing through a change to all the drivers to express zero as "error" and non-zero as the rounded rate. I'm not entirely opposed to this approach, because we probably don't care to pass the particular error value from a clk provider to a clk consumer about what the error is. It's actually what we proposed as the solution for clk_round_rate() to return values larger than LONG_MAX to consumers. But doing that consumer API change or this provider side change is going to require us to evaluate all the consumers of these clks to make sure they don't check for some error value that's less than zero. This series does half the work, by changing the provider side, while ignoring the consumer side and any potential fallout of the less than zero to zero return value change. -- Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
[PATCH] alpha: fix crash if pthread_create races with signal delivery
On alpha, a process will crash if it attempts to start a thread and a signal is delivered at the same time. The crash can be reproduced with this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html The reason for the crash is this: * we call the clone syscall * we go to the function copy_process * copy process calls copy_thread_tls, it is a wrapper around copy_thread * copy_thread sets the tls pointer: childti->pcb.unique = regs->r20 * copy_thread sets regs->r20 to zero * we go back to copy_process * copy process checks "if (signal_pending(current))" and returns -ERESTARTNOINTR * the clone syscall is restarted, but this time, regs->r20 is zero, so the new thread is created with zero tls pointer * the new thread crashes in start_thread when attempting to access tls The comment in the code says that setting the register r20 is some compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug by zeroing regs->r20 only if CLONE_SETTLS is not set. Signed-off-by: Mikulas PatockaCc: sta...@vger.kernel.org --- arch/alpha/kernel/process.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: linux-stable/arch/alpha/kernel/process.c === --- linux-stable.orig/arch/alpha/kernel/process.c 2017-12-31 17:42:12.0 +0100 +++ linux-stable/arch/alpha/kernel/process.c2018-01-02 18:06:24.0 +0100 @@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, u application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork;
[PATCH] alpha: fix crash if pthread_create races with signal delivery
On alpha, a process will crash if it attempts to start a thread and a signal is delivered at the same time. The crash can be reproduced with this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html The reason for the crash is this: * we call the clone syscall * we go to the function copy_process * copy process calls copy_thread_tls, it is a wrapper around copy_thread * copy_thread sets the tls pointer: childti->pcb.unique = regs->r20 * copy_thread sets regs->r20 to zero * we go back to copy_process * copy process checks "if (signal_pending(current))" and returns -ERESTARTNOINTR * the clone syscall is restarted, but this time, regs->r20 is zero, so the new thread is created with zero tls pointer * the new thread crashes in start_thread when attempting to access tls The comment in the code says that setting the register r20 is some compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug by zeroing regs->r20 only if CLONE_SETTLS is not set. Signed-off-by: Mikulas Patocka Cc: sta...@vger.kernel.org --- arch/alpha/kernel/process.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: linux-stable/arch/alpha/kernel/process.c === --- linux-stable.orig/arch/alpha/kernel/process.c 2017-12-31 17:42:12.0 +0100 +++ linux-stable/arch/alpha/kernel/process.c2018-01-02 18:06:24.0 +0100 @@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, u application calling fork. */ if (clone_flags & CLONE_SETTLS) childti->pcb.unique = regs->r20; + else + regs->r20 = 0; /* OSF/1 has some strange fork() semantics. */ childti->pcb.usp = usp ?: rdusp(); *childregs = *regs; childregs->r0 = 0; childregs->r19 = 0; childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */ - regs->r20 = 0; stack = ((struct switch_stack *) regs) - 1; *childstack = *stack; childstack->r26 = (unsigned long) ret_from_fork;
Re: pci driver loads right after unload
[+cc Greg, linux-kernel] Hi Max, Thanks for the report! On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote: > hi all, > I encountered a strange phenomena using 2 different pci drivers > (nvme and mlx5_core) since 4.15-rc1: > when I try to unload the modules using "modprobe -r" cmd it calls > the .probe function right after calling the .remove function and the > module is not realy unloaded. > I think there is some race condition because when I added a > msleep(1000) after "pci_unregister_driver(_driver);" (in the > nvme module testing, it also worked in the mlx5_core), the issue > seems to dissapear. You say "since 4.15-rc1". Does that mean it's a regression? If so, what's the most recent kernel that does not have this problem? Worst case, you could bisect to find where it broke. I don't see anything obvious in the drivers/pci changes between v4.14 and v4.15-rc1. Module loading and driver binding is mostly driven by the driver core and udev. Maybe you could learn something with "udevadm monitor" or by turning on the some of the debug in lib/kobject_uevent.c? Bjorn
Re: pci driver loads right after unload
[+cc Greg, linux-kernel] Hi Max, Thanks for the report! On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote: > hi all, > I encountered a strange phenomena using 2 different pci drivers > (nvme and mlx5_core) since 4.15-rc1: > when I try to unload the modules using "modprobe -r" cmd it calls > the .probe function right after calling the .remove function and the > module is not realy unloaded. > I think there is some race condition because when I added a > msleep(1000) after "pci_unregister_driver(_driver);" (in the > nvme module testing, it also worked in the mlx5_core), the issue > seems to dissapear. You say "since 4.15-rc1". Does that mean it's a regression? If so, what's the most recent kernel that does not have this problem? Worst case, you could bisect to find where it broke. I don't see anything obvious in the drivers/pci changes between v4.14 and v4.15-rc1. Module loading and driver binding is mostly driven by the driver core and udev. Maybe you could learn something with "udevadm monitor" or by turning on the some of the debug in lib/kobject_uevent.c? Bjorn
perf test BPF failing on 4.15.0-rc6
Hi Wang, I just updated my machine to Fedora 27 and 4.15.0-rc6 and the only test failing for me is: [root@jouet linux]# perf test bpf 39: BPF filter: 39.1: Basic BPF filtering : FAILED! 39.2: BPF pinning : Skip 39.3: BPF prologue generation : Skip 39.4: BPF relocation checker : Skip [root@jouet linux]# I haven't checked but perhaps the problem is that SyS_epoll_wait seems to now be inlined in three places and perhaps the eBPF proggie is being added to just one of them? Seemingly relevant excerpt: Open Debuginfo file: /lib/modules/4.15.0-rc6/build/vmlinux Try to find probe point from debuginfo. Matched function: SyS_epoll_wait [2f40eb7] found inline addr: 0x812b6ff1 Probe point found: compat_SyS_epoll_pwait+129 found inline addr: 0x812b6de7 Probe point found: SyS_epoll_pwait+135 found inline addr: 0x812b6c80 Probe point found: SyS_epoll_wait+0 Found 3 probe_trace_events. - Arnaldo P.S.: Full -v output: [root@jouet linux]# uname -a Linux jouet 4.15.0-rc6 #4 SMP Tue Jan 2 14:30:53 -03 2018 x86_64 x86_64 x86_64 GNU/Linux [root@jouet linux]# gcc -v Using built-in specs. COLLECT_GCC=/usr/bin/gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/7/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,objc,obj-c++,fortran,ada,go,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl --enable-libmpx --enable-offload-targets=nvptx-none --without-cuda-driver --enable-gnu-indirect-function --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux Thread model: posix gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC) [root@jouet linux]# clang -v clang version 6.0.0 (http://llvm.org/git/clang.git 56cc8f8880db2ebc433eeb6b6a707c101467a186) (http://llvm.org/git/llvm.git 3656d83960a4f3fedf6d8f19043abf52379f78c3) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/local/bin Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7 Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7 Candidate multilib: .;@m64 Candidate multilib: 32;@m32 Selected multilib: .;@m64 [root@jouet linux]# perf test -v BPF 39: BPF filter: 39.1: Basic BPF filtering : --- start --- test child forked, pid 24304 Kernel build dir is set to /lib/modules/4.15.0-rc6/build set env: KBUILD_DIR=/lib/modules/4.15.0-rc6/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x40f00 set env: CLANG_EXEC=/usr/local/bin/clang set env: CLANG_OPTIONS=-xc set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: WORKING_DIR=/lib/modules/4.15.0-rc6/build set env: CLANG_SOURCE=- llvm compiling command template: echo '/* * bpf-script-example.c * Test basic LLVM building */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' #endif #define BPF_ANY 0 #define BPF_MAP_TYPE_ARRAY 2 #define BPF_FUNC_map_lookup_elem 1 #define BPF_FUNC_map_update_elem 2 static void *(*bpf_map_lookup_elem)(void *map, void *key) = (void *) BPF_FUNC_map_lookup_elem; static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = (void *) BPF_FUNC_map_update_elem; struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def SEC("maps") flip_table = {
perf test BPF failing on 4.15.0-rc6
Hi Wang, I just updated my machine to Fedora 27 and 4.15.0-rc6 and the only test failing for me is: [root@jouet linux]# perf test bpf 39: BPF filter: 39.1: Basic BPF filtering : FAILED! 39.2: BPF pinning : Skip 39.3: BPF prologue generation : Skip 39.4: BPF relocation checker : Skip [root@jouet linux]# I haven't checked but perhaps the problem is that SyS_epoll_wait seems to now be inlined in three places and perhaps the eBPF proggie is being added to just one of them? Seemingly relevant excerpt: Open Debuginfo file: /lib/modules/4.15.0-rc6/build/vmlinux Try to find probe point from debuginfo. Matched function: SyS_epoll_wait [2f40eb7] found inline addr: 0x812b6ff1 Probe point found: compat_SyS_epoll_pwait+129 found inline addr: 0x812b6de7 Probe point found: SyS_epoll_pwait+135 found inline addr: 0x812b6c80 Probe point found: SyS_epoll_wait+0 Found 3 probe_trace_events. - Arnaldo P.S.: Full -v output: [root@jouet linux]# uname -a Linux jouet 4.15.0-rc6 #4 SMP Tue Jan 2 14:30:53 -03 2018 x86_64 x86_64 x86_64 GNU/Linux [root@jouet linux]# gcc -v Using built-in specs. COLLECT_GCC=/usr/bin/gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/7/lto-wrapper OFFLOAD_TARGET_NAMES=nvptx-none OFFLOAD_TARGET_DEFAULT=1 Target: x86_64-redhat-linux Configured with: ../configure --enable-bootstrap --enable-languages=c,c++,objc,obj-c++,fortran,ada,go,lto --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared --enable-threads=posix --enable-checking=release --enable-multilib --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-gnu-unique-object --enable-linker-build-id --with-gcc-major-version-only --with-linker-hash-style=gnu --enable-plugin --enable-initfini-array --with-isl --enable-libmpx --enable-offload-targets=nvptx-none --without-cuda-driver --enable-gnu-indirect-function --with-tune=generic --with-arch_32=i686 --build=x86_64-redhat-linux Thread model: posix gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC) [root@jouet linux]# clang -v clang version 6.0.0 (http://llvm.org/git/clang.git 56cc8f8880db2ebc433eeb6b6a707c101467a186) (http://llvm.org/git/llvm.git 3656d83960a4f3fedf6d8f19043abf52379f78c3) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /usr/local/bin Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7 Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7 Candidate multilib: .;@m64 Candidate multilib: 32;@m32 Selected multilib: .;@m64 [root@jouet linux]# perf test -v BPF 39: BPF filter: 39.1: Basic BPF filtering : --- start --- test child forked, pid 24304 Kernel build dir is set to /lib/modules/4.15.0-rc6/build set env: KBUILD_DIR=/lib/modules/4.15.0-rc6/build unset env: KBUILD_OPTS include option is set to -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: NR_CPUS=4 set env: LINUX_VERSION_CODE=0x40f00 set env: CLANG_EXEC=/usr/local/bin/clang set env: CLANG_OPTIONS=-xc set env: KERNEL_INC_OPTIONS= -nostdinc -isystem /usr/lib/gcc/x86_64-redhat-linux/7/include -I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated -I/home/acme/git/linux/include -I./include -I/home/acme/git/linux/arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi -I./include/generated/uapi -include /home/acme/git/linux/include/linux/kconfig.h set env: WORKING_DIR=/lib/modules/4.15.0-rc6/build set env: CLANG_SOURCE=- llvm compiling command template: echo '/* * bpf-script-example.c * Test basic LLVM building */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' #endif #define BPF_ANY 0 #define BPF_MAP_TYPE_ARRAY 2 #define BPF_FUNC_map_lookup_elem 1 #define BPF_FUNC_map_update_elem 2 static void *(*bpf_map_lookup_elem)(void *map, void *key) = (void *) BPF_FUNC_map_lookup_elem; static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = (void *) BPF_FUNC_map_update_elem; struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def SEC("maps") flip_table = {