[PATCH v7 05/10] PCI: Add support for relative addressing in quirk tables

2018-01-02 Thread Ard Biesheuvel
Allow the PCI quirk tables to be emitted in a way that avoids absolute
references to the hook functions. This reduces the size of the entries,
and, more importantly, makes them invariant under runtime relocation
(e.g., for KASLR)

Acked-by: Bjorn Helgaas 
Signed-off-by: Ard Biesheuvel 
---
 drivers/pci/quirks.c | 13 ++---
 include/linux/pci.h  | 20 
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 10684b17d0bd..b6d51b4d5ce1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3556,9 +3556,16 @@ static void pci_do_fixups(struct pci_dev *dev, struct 
pci_fixup *f,
 f->vendor == (u16) PCI_ANY_ID) &&
(f->device == dev->device ||
 f->device == (u16) PCI_ANY_ID)) {
-   calltime = fixup_debug_start(dev, f->hook);
-   f->hook(dev);
-   fixup_debug_report(dev, calltime, f->hook);
+   void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   hook = (void *)((unsigned long)>hook_offset +
+   f->hook_offset);
+#else
+   hook = f->hook;
+#endif
+   calltime = fixup_debug_start(dev, hook);
+   hook(dev);
+   fixup_debug_report(dev, calltime, hook);
}
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c170c9250c8b..086c3965710b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1792,7 +1792,11 @@ struct pci_fixup {
u16 device; /* You can use PCI_ANY_ID here of course */
u32 class;  /* You can use PCI_ANY_ID here too */
unsigned int class_shift;   /* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   int hook_offset;
+#else
void (*hook)(struct pci_dev *dev);
+#endif
 };
 
 enum pci_fixup_pass {
@@ -1806,12 +1810,28 @@ enum pci_fixup_pass {
pci_fixup_suspend_late, /* pci_device_suspend_late() */
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+   class_shift, hook)  \
+   __ADDRESSABLE(hook) \
+   asm(".section " #sec ", \"a\"   \n" \
+   ".balign16  \n" \
+   ".short "   #vendor ", " #device "  \n" \
+   ".long "#class ", " #class_shift "  \n" \
+   ".long "VMLINUX_SYMBOL_STR(hook) " - .  \n" \
+   ".previous  \n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,\
+ class_shift, hook)\
+   __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,   \
+ class_shift, hook)
+#else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,
\
  class_shift, hook)\
static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) 
__used   \
__attribute__((__section__(#section), aligned((sizeof(void *)\
= { vendor, device, class, class_shift, hook };
+#endif
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,   \
 class_shift, hook) \
-- 
2.11.0



[PATCH v7 05/10] PCI: Add support for relative addressing in quirk tables

2018-01-02 Thread Ard Biesheuvel
Allow the PCI quirk tables to be emitted in a way that avoids absolute
references to the hook functions. This reduces the size of the entries,
and, more importantly, makes them invariant under runtime relocation
(e.g., for KASLR)

Acked-by: Bjorn Helgaas 
Signed-off-by: Ard Biesheuvel 
---
 drivers/pci/quirks.c | 13 ++---
 include/linux/pci.h  | 20 
 2 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 10684b17d0bd..b6d51b4d5ce1 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3556,9 +3556,16 @@ static void pci_do_fixups(struct pci_dev *dev, struct 
pci_fixup *f,
 f->vendor == (u16) PCI_ANY_ID) &&
(f->device == dev->device ||
 f->device == (u16) PCI_ANY_ID)) {
-   calltime = fixup_debug_start(dev, f->hook);
-   f->hook(dev);
-   fixup_debug_report(dev, calltime, f->hook);
+   void (*hook)(struct pci_dev *dev);
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   hook = (void *)((unsigned long)>hook_offset +
+   f->hook_offset);
+#else
+   hook = f->hook;
+#endif
+   calltime = fixup_debug_start(dev, hook);
+   hook(dev);
+   fixup_debug_report(dev, calltime, hook);
}
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c170c9250c8b..086c3965710b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1792,7 +1792,11 @@ struct pci_fixup {
u16 device; /* You can use PCI_ANY_ID here of course */
u32 class;  /* You can use PCI_ANY_ID here too */
unsigned int class_shift;   /* should be 0, 8, 16 */
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   int hook_offset;
+#else
void (*hook)(struct pci_dev *dev);
+#endif
 };
 
 enum pci_fixup_pass {
@@ -1806,12 +1810,28 @@ enum pci_fixup_pass {
pci_fixup_suspend_late, /* pci_device_suspend_late() */
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+   class_shift, hook)  \
+   __ADDRESSABLE(hook) \
+   asm(".section " #sec ", \"a\"   \n" \
+   ".balign16  \n" \
+   ".short "   #vendor ", " #device "  \n" \
+   ".long "#class ", " #class_shift "  \n" \
+   ".long "VMLINUX_SYMBOL_STR(hook) " - .  \n" \
+   ".previous  \n");
+#define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,\
+ class_shift, hook)\
+   __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,   \
+ class_shift, hook)
+#else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,
\
  class_shift, hook)\
static const struct pci_fixup __PASTE(__pci_fixup_##name,__LINE__) 
__used   \
__attribute__((__section__(#section), aligned((sizeof(void *)\
= { vendor, device, class, class_shift, hook };
+#endif
 
 #define DECLARE_PCI_FIXUP_CLASS_EARLY(vendor, device, class,   \
 class_shift, hook) \
-- 
2.11.0



Re: [RESEND PATCH v2 08/15] ASoC: qcom: q6asm: add support to audio stream apis

2018-01-02 Thread Bjorn Andersson
On Thu 14 Dec 09:33 PST 2017, srinivas.kandaga...@linaro.org wrote:

> From: Srinivas Kandagatla 
> 
> This patch adds support to open, write and media format commands
> in the q6asm module.
> 
> Signed-off-by: Srinivas Kandagatla 
> ---
>  sound/soc/qcom/qdsp6/q6asm.c | 530 
> ++-
>  sound/soc/qcom/qdsp6/q6asm.h |  42 
>  2 files changed, 571 insertions(+), 1 deletion(-)
> 
> diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c
> index 4be92441f524..dabd6509ef99 100644
> --- a/sound/soc/qcom/qdsp6/q6asm.c
> +++ b/sound/soc/qcom/qdsp6/q6asm.c
> @@ -8,16 +8,34 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include "q6asm.h"
>  #include "common.h"
>  
> +#define ASM_STREAM_CMD_CLOSE 0x00010BCD
> +#define ASM_STREAM_CMD_FLUSH 0x00010BCE
> +#define ASM_SESSION_CMD_PAUSE0x00010BD3
> +#define ASM_DATA_CMD_EOS 0x00010BDB
> +#define DEFAULT_POPP_TOPOLOGY0x00010BE4
> +#define ASM_STREAM_CMD_FLUSH_READBUFS0x00010C09
>  #define ASM_CMD_SHARED_MEM_MAP_REGIONS   0x00010D92
>  #define ASM_CMDRSP_SHARED_MEM_MAP_REGIONS0x00010D93
>  #define ASM_CMD_SHARED_MEM_UNMAP_REGIONS 0x00010D94
> -
> +#define ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2 0x00010D98
> +#define ASM_DATA_EVENT_WRITE_DONE_V2 0x00010D99
> +#define ASM_SESSION_CMD_RUN_V2   0x00010DAA
> +#define ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2   0x00010DA5
> +#define ASM_DATA_CMD_WRITE_V20x00010DAB
> +#define ASM_SESSION_CMD_SUSPEND  0x00010DEC
> +#define ASM_STREAM_CMD_OPEN_WRITE_V3 0x00010DB3
> +
> +#define ASM_LEGACY_STREAM_SESSION0
> +#define ASM_END_POINT_DEVICE_MATRIX  0
> +#define DEFAULT_APP_TYPE 0
> +#define TUN_WRITE_IO_MODE0x0008  /* tunnel read write mode */
>  #define TUN_READ_IO_MODE 0x0004  /* tunnel read write mode */
>  #define SYNC_IO_MODE 0x0001
>  #define ASYNC_IO_MODE0x0002

Probably prettier to reorder these and make them Q6ASM_IO_MODE_xyz

[..]
>  
> +static int32_t q6asm_callback(struct apr_device *adev,

This callback is an extracted part of q6asm_srvc_callback(), can it be
given a more descriptive name?

> +   struct apr_client_data *data, int session_id)
> +{
> + struct audio_client *ac;// = (struct audio_client *)priv;
> + uint32_t token;
> + uint32_t *payload;
> + uint32_t wakeup_flag = 1;
> + uint32_t client_event = 0;
> + struct q6asm *q6asm = dev_get_drvdata(>dev);
> +
> + if (data == NULL)
> + return -EINVAL;
> +
> + ac = q6asm_get_audio_client(q6asm, session_id);
> + if (!q6asm_is_valid_audio_client(ac))
> + return -EINVAL;
> +
> + payload = data->payload;
> +
> + if (data->opcode == APR_BASIC_RSP_RESULT) {

Move this into the switch.

> + token = data->token;
> + switch (payload[0]) {

This is again that common response struct.

> + case ASM_SESSION_CMD_PAUSE:
> + client_event = ASM_CLIENT_EVENT_CMD_PAUSE_DONE;
> + break;
> + case ASM_SESSION_CMD_SUSPEND:
> + client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE;
> + break;
> + case ASM_DATA_CMD_EOS:
> + client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE;
> + break;
> + break;
> + case ASM_STREAM_CMD_FLUSH:
> + client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE;
> + break;
> + case ASM_SESSION_CMD_RUN_V2:
> + client_event = ASM_CLIENT_EVENT_CMD_RUN_DONE;
> + break;
> +
> + case ASM_STREAM_CMD_FLUSH_READBUFS:
> + if (token != ac->session) {
> + dev_err(ac->dev, "session invalid\n");
> + return -EINVAL;
> + }
> + case ASM_STREAM_CMD_CLOSE:
> + client_event = ASM_CLIENT_EVENT_CMD_CLOSE_DONE;
> + break;
> + case ASM_STREAM_CMD_OPEN_WRITE_V3:
> + case ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2:
> + if (payload[1] != 0) {
> + dev_err(ac->dev,
> + "cmd = 0x%x returned error = 0x%x\n",
> + payload[0], payload[1]);
> + if (wakeup_flag) {
> + ac->cmd_state = payload[1];
> + wake_up(>cmd_wait);
> + }
> + return 0;
> +   

Re: [RESEND PATCH v2 08/15] ASoC: qcom: q6asm: add support to audio stream apis

2018-01-02 Thread Bjorn Andersson
On Thu 14 Dec 09:33 PST 2017, srinivas.kandaga...@linaro.org wrote:

> From: Srinivas Kandagatla 
> 
> This patch adds support to open, write and media format commands
> in the q6asm module.
> 
> Signed-off-by: Srinivas Kandagatla 
> ---
>  sound/soc/qcom/qdsp6/q6asm.c | 530 
> ++-
>  sound/soc/qcom/qdsp6/q6asm.h |  42 
>  2 files changed, 571 insertions(+), 1 deletion(-)
> 
> diff --git a/sound/soc/qcom/qdsp6/q6asm.c b/sound/soc/qcom/qdsp6/q6asm.c
> index 4be92441f524..dabd6509ef99 100644
> --- a/sound/soc/qcom/qdsp6/q6asm.c
> +++ b/sound/soc/qcom/qdsp6/q6asm.c
> @@ -8,16 +8,34 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include "q6asm.h"
>  #include "common.h"
>  
> +#define ASM_STREAM_CMD_CLOSE 0x00010BCD
> +#define ASM_STREAM_CMD_FLUSH 0x00010BCE
> +#define ASM_SESSION_CMD_PAUSE0x00010BD3
> +#define ASM_DATA_CMD_EOS 0x00010BDB
> +#define DEFAULT_POPP_TOPOLOGY0x00010BE4
> +#define ASM_STREAM_CMD_FLUSH_READBUFS0x00010C09
>  #define ASM_CMD_SHARED_MEM_MAP_REGIONS   0x00010D92
>  #define ASM_CMDRSP_SHARED_MEM_MAP_REGIONS0x00010D93
>  #define ASM_CMD_SHARED_MEM_UNMAP_REGIONS 0x00010D94
> -
> +#define ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2 0x00010D98
> +#define ASM_DATA_EVENT_WRITE_DONE_V2 0x00010D99
> +#define ASM_SESSION_CMD_RUN_V2   0x00010DAA
> +#define ASM_MEDIA_FMT_MULTI_CHANNEL_PCM_V2   0x00010DA5
> +#define ASM_DATA_CMD_WRITE_V20x00010DAB
> +#define ASM_SESSION_CMD_SUSPEND  0x00010DEC
> +#define ASM_STREAM_CMD_OPEN_WRITE_V3 0x00010DB3
> +
> +#define ASM_LEGACY_STREAM_SESSION0
> +#define ASM_END_POINT_DEVICE_MATRIX  0
> +#define DEFAULT_APP_TYPE 0
> +#define TUN_WRITE_IO_MODE0x0008  /* tunnel read write mode */
>  #define TUN_READ_IO_MODE 0x0004  /* tunnel read write mode */
>  #define SYNC_IO_MODE 0x0001
>  #define ASYNC_IO_MODE0x0002

Probably prettier to reorder these and make them Q6ASM_IO_MODE_xyz

[..]
>  
> +static int32_t q6asm_callback(struct apr_device *adev,

This callback is an extracted part of q6asm_srvc_callback(), can it be
given a more descriptive name?

> +   struct apr_client_data *data, int session_id)
> +{
> + struct audio_client *ac;// = (struct audio_client *)priv;
> + uint32_t token;
> + uint32_t *payload;
> + uint32_t wakeup_flag = 1;
> + uint32_t client_event = 0;
> + struct q6asm *q6asm = dev_get_drvdata(>dev);
> +
> + if (data == NULL)
> + return -EINVAL;
> +
> + ac = q6asm_get_audio_client(q6asm, session_id);
> + if (!q6asm_is_valid_audio_client(ac))
> + return -EINVAL;
> +
> + payload = data->payload;
> +
> + if (data->opcode == APR_BASIC_RSP_RESULT) {

Move this into the switch.

> + token = data->token;
> + switch (payload[0]) {

This is again that common response struct.

> + case ASM_SESSION_CMD_PAUSE:
> + client_event = ASM_CLIENT_EVENT_CMD_PAUSE_DONE;
> + break;
> + case ASM_SESSION_CMD_SUSPEND:
> + client_event = ASM_CLIENT_EVENT_CMD_SUSPEND_DONE;
> + break;
> + case ASM_DATA_CMD_EOS:
> + client_event = ASM_CLIENT_EVENT_CMD_EOS_DONE;
> + break;
> + break;
> + case ASM_STREAM_CMD_FLUSH:
> + client_event = ASM_CLIENT_EVENT_CMD_FLUSH_DONE;
> + break;
> + case ASM_SESSION_CMD_RUN_V2:
> + client_event = ASM_CLIENT_EVENT_CMD_RUN_DONE;
> + break;
> +
> + case ASM_STREAM_CMD_FLUSH_READBUFS:
> + if (token != ac->session) {
> + dev_err(ac->dev, "session invalid\n");
> + return -EINVAL;
> + }
> + case ASM_STREAM_CMD_CLOSE:
> + client_event = ASM_CLIENT_EVENT_CMD_CLOSE_DONE;
> + break;
> + case ASM_STREAM_CMD_OPEN_WRITE_V3:
> + case ASM_DATA_CMD_MEDIA_FMT_UPDATE_V2:
> + if (payload[1] != 0) {
> + dev_err(ac->dev,
> + "cmd = 0x%x returned error = 0x%x\n",
> + payload[0], payload[1]);
> + if (wakeup_flag) {
> + ac->cmd_state = payload[1];
> + wake_up(>cmd_wait);
> + }
> + return 0;
> + }
> + break;
> + 

Re: [PATCH 4.9 00/75] 4.9.74-stable review

2018-01-02 Thread Greg KH
On Tue, Jan 02, 2018 at 02:11:25PM -0500, Neal Cardwell wrote:
> On Tue, Jan 2, 2018 at 1:32 PM, David Miller  wrote:
> > From: Neal Cardwell 
> > Date: Tue, 2 Jan 2018 11:57:59 -0500
> >
> >> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman
> >>  wrote:
> >>> This is the start of the stable review cycle for the 4.9.74 release.
> >>> There are 75 patches in this series, all will be posted as a response
> >>> to this one.  If anyone has any issues with these being applied, please
> >>> let me know.
> >>>
> >>> Responses should be made by Wed Jan  3 14:00:03 UTC 2018.
> >>> Anything received after that time might be too late.
> >>>
> >>> The whole patch series can be found in one patch at:
> >>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz
> >>> or in the git tree and branch at:
> >>>   
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> >>> linux-4.9.y
> >>> and the diffstat can be found below.
> >>
> >> Hi Greg,
> >>
> >> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were
> >> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an
> >> inventory of the TCP BBR fixes, AFAICT we have:
> >>
> >> c589e69b508d tcp_bbr: record "full bw reached" decision in new
> >> full_bw_reached bit
> >>  - in 4.9 and 4.14 (great)
> >>
> >> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo
> >>   - in 4.14 (but not 4.9)
> >>
> >> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery 
> >> undo
> >>   - in 4.14 (but not 4.9)
> >>
> >> Lacking the second and third patches in 4.9 will not cause any new
> >> problems, but it will miss out on some nice fixes. If it's possible to
> >> get  2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I
> >> would be very grateful.
> >
> > These were not straight-forward to backport and I felt the risk outweighed
> > the gains.
> >
> > If you want to do the backport yourself and you feel confident in it,
> > feel free.
> 
> Thanks, Greg and David. Looks like these 2 patches will cherry-pick
> cleanly if cherry-picked in the following sequence, on top of
> 4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw
> reached" decision in new full_bw_reached bit"):
> 
> $ git checkout linux-stable-rc/linux-4.9.y
> 
> $ git cherry-pick 2f6c498e4f15
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on
> loss recovery undo
>  Date: Thu Dec 7 12:43:31 2017 -0500
>  1 file changed, 4 insertions(+)
> 
> $ git cherry-pick 600647d467c6
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth
> sampling on loss recovery undo
>  Date: Thu Dec 7 12:43:32 2017 -0500
>  1 file changed, 1 insertion(+)
> 
> $ git log --oneline --decorate | head -3
> 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on
> loss recovery undo
> 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo
> 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1
> 
> I verified that this compiles without warnings, and boots, and BBR works.
> 
> Shall I prepare another version of these 2 patches, or do we think
> this recipe will be sufficient? (Sorry I am not more familiar with the
> backport-to-stable process.)

That works, those two patches are now queued up for the next stable
release, thanks!

greg k-h


Re: [PATCH 4.9 00/75] 4.9.74-stable review

2018-01-02 Thread Greg KH
On Tue, Jan 02, 2018 at 02:11:25PM -0500, Neal Cardwell wrote:
> On Tue, Jan 2, 2018 at 1:32 PM, David Miller  wrote:
> > From: Neal Cardwell 
> > Date: Tue, 2 Jan 2018 11:57:59 -0500
> >
> >> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman
> >>  wrote:
> >>> This is the start of the stable review cycle for the 4.9.74 release.
> >>> There are 75 patches in this series, all will be posted as a response
> >>> to this one.  If anyone has any issues with these being applied, please
> >>> let me know.
> >>>
> >>> Responses should be made by Wed Jan  3 14:00:03 UTC 2018.
> >>> Anything received after that time might be too late.
> >>>
> >>> The whole patch series can be found in one patch at:
> >>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz
> >>> or in the git tree and branch at:
> >>>   
> >>> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> >>> linux-4.9.y
> >>> and the diffstat can be found below.
> >>
> >> Hi Greg,
> >>
> >> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were
> >> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an
> >> inventory of the TCP BBR fixes, AFAICT we have:
> >>
> >> c589e69b508d tcp_bbr: record "full bw reached" decision in new
> >> full_bw_reached bit
> >>  - in 4.9 and 4.14 (great)
> >>
> >> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo
> >>   - in 4.14 (but not 4.9)
> >>
> >> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery 
> >> undo
> >>   - in 4.14 (but not 4.9)
> >>
> >> Lacking the second and third patches in 4.9 will not cause any new
> >> problems, but it will miss out on some nice fixes. If it's possible to
> >> get  2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I
> >> would be very grateful.
> >
> > These were not straight-forward to backport and I felt the risk outweighed
> > the gains.
> >
> > If you want to do the backport yourself and you feel confident in it,
> > feel free.
> 
> Thanks, Greg and David. Looks like these 2 patches will cherry-pick
> cleanly if cherry-picked in the following sequence, on top of
> 4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw
> reached" decision in new full_bw_reached bit"):
> 
> $ git checkout linux-stable-rc/linux-4.9.y
> 
> $ git cherry-pick 2f6c498e4f15
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on
> loss recovery undo
>  Date: Thu Dec 7 12:43:31 2017 -0500
>  1 file changed, 4 insertions(+)
> 
> $ git cherry-pick 600647d467c6
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth
> sampling on loss recovery undo
>  Date: Thu Dec 7 12:43:32 2017 -0500
>  1 file changed, 1 insertion(+)
> 
> $ git log --oneline --decorate | head -3
> 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on
> loss recovery undo
> 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo
> 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1
> 
> I verified that this compiles without warnings, and boots, and BBR works.
> 
> Shall I prepare another version of these 2 patches, or do we think
> this recipe will be sufficient? (Sorry I am not more familiar with the
> backport-to-stable process.)

That works, those two patches are now queued up for the next stable
release, thanks!

greg k-h


[PATCH v7 08/10] arm64/kernel: jump_label: use relative references

2018-01-02 Thread Ard Biesheuvel
On a randomly chosen distro kernel build for arm64, vmlinux.o shows the
following sections, containing jump label entries, and the associated
RELA relocation records, respectively:

  ...
  [38088] __jump_table  PROGBITS   00e19f30
   0002ea10    WA   0 0 8
  [38089] .rela__jump_table RELA   01fd8bb0
   0008be30  0018   I  38178   38088 8
  ...

In other words, we have 190 KB worth of 'struct jump_entry' instances,
and 573 KB worth of RELA entries to relocate each entry's code, target
and key members. This means the RELA section occupies 10% of the .init
segment, and the two sections combined represent 5% of vmlinux's entire
memory footprint.

So let's switch from 64-bit absolute references to 32-bit relative
references: this reduces the size of the __jump_table by 50%, and gets
rid of the RELA section entirely.

Note that this requires some extra care in the sorting routine, given
that the offsets change when entries are moved around in the jump_entry
table.

Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/include/asm/jump_label.h | 27 
 arch/arm64/kernel/jump_label.c  | 22 +---
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/jump_label.h 
b/arch/arm64/include/asm/jump_label.h
index 9d6e46355c89..8f82adeb7b0b 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -30,8 +30,8 @@ static __always_inline bool arch_static_branch(struct 
static_key *key, bool bran
 {
asm goto("1: nop\n\t"
 ".pushsection __jump_table,  \"aw\"\n\t"
-".align 3\n\t"
-".quad 1b, %l[l_yes], %c0\n\t"
+".align 2\n\t"
+".long 1b - ., %l[l_yes] - ., %c0 - .\n\t"
 ".popsection\n\t"
 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
@@ -44,8 +44,8 @@ static __always_inline bool arch_static_branch_jump(struct 
static_key *key, bool
 {
asm goto("1: b %l[l_yes]\n\t"
 ".pushsection __jump_table,  \"aw\"\n\t"
-".align 3\n\t"
-".quad 1b, %l[l_yes], %c0\n\t"
+".align 2\n\t"
+".long 1b - ., %l[l_yes] - ., %c0 - .\n\t"
 ".popsection\n\t"
 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
@@ -57,19 +57,26 @@ static __always_inline bool arch_static_branch_jump(struct 
static_key *key, bool
 typedef u64 jump_label_t;
 
 struct jump_entry {
-   jump_label_t code;
-   jump_label_t target;
-   jump_label_t key;
+   s32 code;
+   s32 target;
+   s32 key;
 };
 
 static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
 {
-   return entry->code;
+   return (unsigned long)>code + entry->code;
+}
+
+static inline jump_label_t jump_entry_target(const struct jump_entry *entry)
+{
+   return (unsigned long)>target + entry->target;
 }
 
 static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 {
-   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+   unsigned long key = (unsigned long)>key + entry->key;
+
+   return (struct static_key *)(key & ~1UL);
 }
 
 static inline bool jump_entry_is_branch(const struct jump_entry *entry)
@@ -87,7 +94,7 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
entry->code = 0;
 }
 
-#define jump_label_swapNULL
+void jump_label_swap(void *a, void *b, int size);
 
 #endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index c2dd1ad3e648..2b8e459e91f7 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -25,12 +25,12 @@
 void arch_jump_label_transform(struct jump_entry *entry,
   enum jump_label_type type)
 {
-   void *addr = (void *)entry->code;
+   void *addr = (void *)jump_entry_code(entry);
u32 insn;
 
if (type == JUMP_LABEL_JMP) {
-   insn = aarch64_insn_gen_branch_imm(entry->code,
-  entry->target,
+   insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry),
+  jump_entry_target(entry),
   AARCH64_INSN_BRANCH_NOLINK);
} else {
insn = aarch64_insn_gen_nop();
@@ -50,4 +50,20 @@ void arch_jump_label_transform_static(struct jump_entry 
*entry,
 */
 }
 
+void jump_label_swap(void *a, void *b, int size)
+{
+   long delta = (unsigned long)a - (unsigned long)b;
+   struct jump_entry *jea = a;
+   struct jump_entry *jeb = b;
+   struct jump_entry tmp = *jea;
+
+   jea->code   = jeb->code - delta;
+ 

[PATCH v7 08/10] arm64/kernel: jump_label: use relative references

2018-01-02 Thread Ard Biesheuvel
On a randomly chosen distro kernel build for arm64, vmlinux.o shows the
following sections, containing jump label entries, and the associated
RELA relocation records, respectively:

  ...
  [38088] __jump_table  PROGBITS   00e19f30
   0002ea10    WA   0 0 8
  [38089] .rela__jump_table RELA   01fd8bb0
   0008be30  0018   I  38178   38088 8
  ...

In other words, we have 190 KB worth of 'struct jump_entry' instances,
and 573 KB worth of RELA entries to relocate each entry's code, target
and key members. This means the RELA section occupies 10% of the .init
segment, and the two sections combined represent 5% of vmlinux's entire
memory footprint.

So let's switch from 64-bit absolute references to 32-bit relative
references: this reduces the size of the __jump_table by 50%, and gets
rid of the RELA section entirely.

Note that this requires some extra care in the sorting routine, given
that the offsets change when entries are moved around in the jump_entry
table.

Signed-off-by: Ard Biesheuvel 
---
 arch/arm64/include/asm/jump_label.h | 27 
 arch/arm64/kernel/jump_label.c  | 22 +---
 2 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/jump_label.h 
b/arch/arm64/include/asm/jump_label.h
index 9d6e46355c89..8f82adeb7b0b 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -30,8 +30,8 @@ static __always_inline bool arch_static_branch(struct 
static_key *key, bool bran
 {
asm goto("1: nop\n\t"
 ".pushsection __jump_table,  \"aw\"\n\t"
-".align 3\n\t"
-".quad 1b, %l[l_yes], %c0\n\t"
+".align 2\n\t"
+".long 1b - ., %l[l_yes] - ., %c0 - .\n\t"
 ".popsection\n\t"
 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
@@ -44,8 +44,8 @@ static __always_inline bool arch_static_branch_jump(struct 
static_key *key, bool
 {
asm goto("1: b %l[l_yes]\n\t"
 ".pushsection __jump_table,  \"aw\"\n\t"
-".align 3\n\t"
-".quad 1b, %l[l_yes], %c0\n\t"
+".align 2\n\t"
+".long 1b - ., %l[l_yes] - ., %c0 - .\n\t"
 ".popsection\n\t"
 :  :  "i"(&((char *)key)[branch]) :  : l_yes);
 
@@ -57,19 +57,26 @@ static __always_inline bool arch_static_branch_jump(struct 
static_key *key, bool
 typedef u64 jump_label_t;
 
 struct jump_entry {
-   jump_label_t code;
-   jump_label_t target;
-   jump_label_t key;
+   s32 code;
+   s32 target;
+   s32 key;
 };
 
 static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
 {
-   return entry->code;
+   return (unsigned long)>code + entry->code;
+}
+
+static inline jump_label_t jump_entry_target(const struct jump_entry *entry)
+{
+   return (unsigned long)>target + entry->target;
 }
 
 static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 {
-   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+   unsigned long key = (unsigned long)>key + entry->key;
+
+   return (struct static_key *)(key & ~1UL);
 }
 
 static inline bool jump_entry_is_branch(const struct jump_entry *entry)
@@ -87,7 +94,7 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
entry->code = 0;
 }
 
-#define jump_label_swapNULL
+void jump_label_swap(void *a, void *b, int size);
 
 #endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c
index c2dd1ad3e648..2b8e459e91f7 100644
--- a/arch/arm64/kernel/jump_label.c
+++ b/arch/arm64/kernel/jump_label.c
@@ -25,12 +25,12 @@
 void arch_jump_label_transform(struct jump_entry *entry,
   enum jump_label_type type)
 {
-   void *addr = (void *)entry->code;
+   void *addr = (void *)jump_entry_code(entry);
u32 insn;
 
if (type == JUMP_LABEL_JMP) {
-   insn = aarch64_insn_gen_branch_imm(entry->code,
-  entry->target,
+   insn = aarch64_insn_gen_branch_imm(jump_entry_code(entry),
+  jump_entry_target(entry),
   AARCH64_INSN_BRANCH_NOLINK);
} else {
insn = aarch64_insn_gen_nop();
@@ -50,4 +50,20 @@ void arch_jump_label_transform_static(struct jump_entry 
*entry,
 */
 }
 
+void jump_label_swap(void *a, void *b, int size)
+{
+   long delta = (unsigned long)a - (unsigned long)b;
+   struct jump_entry *jea = a;
+   struct jump_entry *jeb = b;
+   struct jump_entry tmp = *jea;
+
+   jea->code   = jeb->code - delta;
+   jea->target = 

[PATCH v7 09/10] x86: jump_label: switch to jump_entry accessors

2018-01-02 Thread Ard Biesheuvel
In preparation of switching x86 to use place-relative references for
the code, target and key members of struct jump_entry, replace direct
references to the struct member with invocations of the new accessors.
This will allow us to make the switch by modifying the accessors only.

Signed-off-by: Ard Biesheuvel 
---
 arch/x86/kernel/jump_label.c | 43 
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e56c95be2808..d64296092ef5 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -52,22 +52,24 @@ static void __jump_label_transform(struct jump_entry *entry,
 * Jump label is enabled for the first time.
 * So we expect a default_nop...
 */
-   if (unlikely(memcmp((void *)entry->code, default_nop, 5)
-!= 0))
-   bug_at((void *)entry->code, __LINE__);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+   default_nop, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
} else {
/*
 * ...otherwise expect an ideal_nop. Otherwise
 * something went horribly wrong.
 */
-   if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
-!= 0))
-   bug_at((void *)entry->code, __LINE__);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+   ideal_nop, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
}
 
code.jump = 0xe9;
-   code.offset = entry->target -
-   (entry->code + JUMP_LABEL_NOP_SIZE);
+   code.offset = jump_entry_target(entry) -
+ (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
} else {
/*
 * We are disabling this jump label. If it is not what
@@ -76,14 +78,18 @@ static void __jump_label_transform(struct jump_entry *entry,
 * are converting the default nop to the ideal nop.
 */
if (init) {
-   if (unlikely(memcmp((void *)entry->code, default_nop, 
5) != 0))
-   bug_at((void *)entry->code, __LINE__);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+   default_nop, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
} else {
code.jump = 0xe9;
-   code.offset = entry->target -
-   (entry->code + JUMP_LABEL_NOP_SIZE);
-   if (unlikely(memcmp((void *)entry->code, , 5) != 
0))
-   bug_at((void *)entry->code, __LINE__);
+   code.offset = jump_entry_target(entry) -
+   (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
}
memcpy(, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
}
@@ -97,10 +103,13 @@ static void __jump_label_transform(struct jump_entry 
*entry,
 *
 */
if (poker)
-   (*poker)((void *)entry->code, , JUMP_LABEL_NOP_SIZE);
+   (*poker)((void *)jump_entry_code(entry), ,
+JUMP_LABEL_NOP_SIZE);
else
-   text_poke_bp((void *)entry->code, , JUMP_LABEL_NOP_SIZE,
-(void *)entry->code + JUMP_LABEL_NOP_SIZE);
+   text_poke_bp((void *)jump_entry_code(entry), ,
+JUMP_LABEL_NOP_SIZE,
+(void *)jump_entry_code(entry) +
+JUMP_LABEL_NOP_SIZE);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
-- 
2.11.0



[PATCH v7 10/10] x86/kernel: jump_table: use relative references

2018-01-02 Thread Ard Biesheuvel
Similar to the arm64 case, 64-bit x86 can benefit from using 32-bit
relative references rather than 64-bit absolute ones when emitting
struct jump_entry instances. Not only does this reduce the memory
footprint of the entries themselves by 50%, it also removes the need
for carrying relocation metadata on relocatable builds (i.e., for KASLR)
which saves a fair chunk of .init space as well (although the savings
are not as dramatic as on arm64)

Signed-off-by: Ard Biesheuvel 
---
 arch/x86/include/asm/jump_label.h | 35 
 arch/x86/kernel/jump_label.c  | 16 +
 tools/objtool/special.c   |  4 +--
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h 
b/arch/x86/include/asm/jump_label.h
index 009ff2699d07..35fc2c5ec846 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -36,8 +36,8 @@ static __always_inline bool arch_static_branch(struct 
static_key *key, bool bran
asm_volatile_goto("1:"
".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table,  \"aw\" \n\t"
-   _ASM_ALIGN "\n\t"
-   _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
+   ".balign 4\n\t"
+   ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t"
".popsection \n\t"
: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -52,8 +52,8 @@ static __always_inline bool arch_static_branch_jump(struct 
static_key *key, bool
".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
"2:\n\t"
".pushsection __jump_table,  \"aw\" \n\t"
-   _ASM_ALIGN "\n\t"
-   _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
+   ".balign 4\n\t"
+   ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t"
".popsection \n\t"
: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -69,19 +69,26 @@ typedef u32 jump_label_t;
 #endif
 
 struct jump_entry {
-   jump_label_t code;
-   jump_label_t target;
-   jump_label_t key;
+   s32 code;
+   s32 target;
+   s32 key;
 };
 
 static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
 {
-   return entry->code;
+   return (unsigned long)>code + entry->code;
+}
+
+static inline jump_label_t jump_entry_target(const struct jump_entry *entry)
+{
+   return (unsigned long)>target + entry->target;
 }
 
 static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 {
-   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+   unsigned long key = (unsigned long)>key + entry->key;
+
+   return (struct static_key *)(key & ~1UL);
 }
 
 static inline bool jump_entry_is_branch(const struct jump_entry *entry)
@@ -99,7 +106,7 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
entry->code = 0;
 }
 
-#define jump_label_swapNULL
+void jump_label_swap(void *a, void *b, int size);
 
 #else  /* __ASSEMBLY__ */
 
@@ -114,8 +121,8 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
.byte   STATIC_KEY_INIT_NOP
.endif
.pushsection __jump_table, "aw"
-   _ASM_ALIGN
-   _ASM_PTR.Lstatic_jump_\@, \target, \key
+   .balign 4
+   .long   .Lstatic_jump_\@ - ., \target - ., \key - .
.popsection
 .endm
 
@@ -130,8 +137,8 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
 .Lstatic_jump_after_\@:
.endif
.pushsection __jump_table, "aw"
-   _ASM_ALIGN
-   _ASM_PTR.Lstatic_jump_\@, \target, \key + 1
+   .balign 4
+   .long   .Lstatic_jump_\@ - ., \target - ., \key + 1 - .
.popsection
 .endm
 
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index d64296092ef5..cc5034b42335 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -149,4 +149,20 @@ __init_or_module void 
arch_jump_label_transform_static(struct jump_entry *entry,
__jump_label_transform(entry, type, text_poke_early, 1);
 }
 
+void jump_label_swap(void *a, void *b, int size)
+{
+   long delta = (unsigned long)a - (unsigned long)b;
+   struct jump_entry *jea = a;
+   struct jump_entry *jeb = b;
+   struct jump_entry tmp = *jea;
+
+   jea->code   = jeb->code - delta;
+   jea->target = jeb->target - delta;
+   jea->key= jeb->key - delta;
+
+   jeb->code   = tmp.code + delta;
+   jeb->target = tmp.target + delta;
+   jeb->key= tmp.key + delta;
+}
+
 #endif
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 84f001d52322..98ae55b39037 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -30,9 +30,9 @@
 #define EX_ORIG_OFFSET 0
 #define EX_NEW_OFFSET  4
 

[PATCH v7 09/10] x86: jump_label: switch to jump_entry accessors

2018-01-02 Thread Ard Biesheuvel
In preparation of switching x86 to use place-relative references for
the code, target and key members of struct jump_entry, replace direct
references to the struct member with invocations of the new accessors.
This will allow us to make the switch by modifying the accessors only.

Signed-off-by: Ard Biesheuvel 
---
 arch/x86/kernel/jump_label.c | 43 
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e56c95be2808..d64296092ef5 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -52,22 +52,24 @@ static void __jump_label_transform(struct jump_entry *entry,
 * Jump label is enabled for the first time.
 * So we expect a default_nop...
 */
-   if (unlikely(memcmp((void *)entry->code, default_nop, 5)
-!= 0))
-   bug_at((void *)entry->code, __LINE__);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+   default_nop, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
} else {
/*
 * ...otherwise expect an ideal_nop. Otherwise
 * something went horribly wrong.
 */
-   if (unlikely(memcmp((void *)entry->code, ideal_nop, 5)
-!= 0))
-   bug_at((void *)entry->code, __LINE__);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+   ideal_nop, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
}
 
code.jump = 0xe9;
-   code.offset = entry->target -
-   (entry->code + JUMP_LABEL_NOP_SIZE);
+   code.offset = jump_entry_target(entry) -
+ (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
} else {
/*
 * We are disabling this jump label. If it is not what
@@ -76,14 +78,18 @@ static void __jump_label_transform(struct jump_entry *entry,
 * are converting the default nop to the ideal nop.
 */
if (init) {
-   if (unlikely(memcmp((void *)entry->code, default_nop, 
5) != 0))
-   bug_at((void *)entry->code, __LINE__);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+   default_nop, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
} else {
code.jump = 0xe9;
-   code.offset = entry->target -
-   (entry->code + JUMP_LABEL_NOP_SIZE);
-   if (unlikely(memcmp((void *)entry->code, , 5) != 
0))
-   bug_at((void *)entry->code, __LINE__);
+   code.offset = jump_entry_target(entry) -
+   (jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+   if (unlikely(memcmp((void *)jump_entry_code(entry),
+, 5) != 0))
+   bug_at((void *)jump_entry_code(entry),
+  __LINE__);
}
memcpy(, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
}
@@ -97,10 +103,13 @@ static void __jump_label_transform(struct jump_entry 
*entry,
 *
 */
if (poker)
-   (*poker)((void *)entry->code, , JUMP_LABEL_NOP_SIZE);
+   (*poker)((void *)jump_entry_code(entry), ,
+JUMP_LABEL_NOP_SIZE);
else
-   text_poke_bp((void *)entry->code, , JUMP_LABEL_NOP_SIZE,
-(void *)entry->code + JUMP_LABEL_NOP_SIZE);
+   text_poke_bp((void *)jump_entry_code(entry), ,
+JUMP_LABEL_NOP_SIZE,
+(void *)jump_entry_code(entry) +
+JUMP_LABEL_NOP_SIZE);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
-- 
2.11.0



[PATCH v7 10/10] x86/kernel: jump_table: use relative references

2018-01-02 Thread Ard Biesheuvel
Similar to the arm64 case, 64-bit x86 can benefit from using 32-bit
relative references rather than 64-bit absolute ones when emitting
struct jump_entry instances. Not only does this reduce the memory
footprint of the entries themselves by 50%, it also removes the need
for carrying relocation metadata on relocatable builds (i.e., for KASLR)
which saves a fair chunk of .init space as well (although the savings
are not as dramatic as on arm64)

Signed-off-by: Ard Biesheuvel 
---
 arch/x86/include/asm/jump_label.h | 35 
 arch/x86/kernel/jump_label.c  | 16 +
 tools/objtool/special.c   |  4 +--
 3 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/jump_label.h 
b/arch/x86/include/asm/jump_label.h
index 009ff2699d07..35fc2c5ec846 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -36,8 +36,8 @@ static __always_inline bool arch_static_branch(struct 
static_key *key, bool bran
asm_volatile_goto("1:"
".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
".pushsection __jump_table,  \"aw\" \n\t"
-   _ASM_ALIGN "\n\t"
-   _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
+   ".balign 4\n\t"
+   ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t"
".popsection \n\t"
: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -52,8 +52,8 @@ static __always_inline bool arch_static_branch_jump(struct 
static_key *key, bool
".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
"2:\n\t"
".pushsection __jump_table,  \"aw\" \n\t"
-   _ASM_ALIGN "\n\t"
-   _ASM_PTR "1b, %l[l_yes], %c0 + %c1 \n\t"
+   ".balign 4\n\t"
+   ".long 1b - ., %l[l_yes] - ., %c0 + %c1 - .\n\t"
".popsection \n\t"
: :  "i" (key), "i" (branch) : : l_yes);
 
@@ -69,19 +69,26 @@ typedef u32 jump_label_t;
 #endif
 
 struct jump_entry {
-   jump_label_t code;
-   jump_label_t target;
-   jump_label_t key;
+   s32 code;
+   s32 target;
+   s32 key;
 };
 
 static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
 {
-   return entry->code;
+   return (unsigned long)>code + entry->code;
+}
+
+static inline jump_label_t jump_entry_target(const struct jump_entry *entry)
+{
+   return (unsigned long)>target + entry->target;
 }
 
 static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 {
-   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+   unsigned long key = (unsigned long)>key + entry->key;
+
+   return (struct static_key *)(key & ~1UL);
 }
 
 static inline bool jump_entry_is_branch(const struct jump_entry *entry)
@@ -99,7 +106,7 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
entry->code = 0;
 }
 
-#define jump_label_swapNULL
+void jump_label_swap(void *a, void *b, int size);
 
 #else  /* __ASSEMBLY__ */
 
@@ -114,8 +121,8 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
.byte   STATIC_KEY_INIT_NOP
.endif
.pushsection __jump_table, "aw"
-   _ASM_ALIGN
-   _ASM_PTR.Lstatic_jump_\@, \target, \key
+   .balign 4
+   .long   .Lstatic_jump_\@ - ., \target - ., \key - .
.popsection
 .endm
 
@@ -130,8 +137,8 @@ static inline void jump_entry_set_module_init(struct 
jump_entry *entry)
 .Lstatic_jump_after_\@:
.endif
.pushsection __jump_table, "aw"
-   _ASM_ALIGN
-   _ASM_PTR.Lstatic_jump_\@, \target, \key + 1
+   .balign 4
+   .long   .Lstatic_jump_\@ - ., \target - ., \key + 1 - .
.popsection
 .endm
 
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index d64296092ef5..cc5034b42335 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -149,4 +149,20 @@ __init_or_module void 
arch_jump_label_transform_static(struct jump_entry *entry,
__jump_label_transform(entry, type, text_poke_early, 1);
 }
 
+void jump_label_swap(void *a, void *b, int size)
+{
+   long delta = (unsigned long)a - (unsigned long)b;
+   struct jump_entry *jea = a;
+   struct jump_entry *jeb = b;
+   struct jump_entry tmp = *jea;
+
+   jea->code   = jeb->code - delta;
+   jea->target = jeb->target - delta;
+   jea->key= jeb->key - delta;
+
+   jeb->code   = tmp.code + delta;
+   jeb->target = tmp.target + delta;
+   jeb->key= tmp.key + delta;
+}
+
 #endif
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 84f001d52322..98ae55b39037 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -30,9 +30,9 @@
 #define EX_ORIG_OFFSET 0
 #define EX_NEW_OFFSET  4
 
-#define JUMP_ENTRY_SIZE  

[PATCH v7 04/10] init: allow initcall tables to be emitted using relative references

2018-01-02 Thread Ard Biesheuvel
Allow the initcall tables to be emitted using relative references that
are only half the size on 64-bit architectures and don't require fixups
at runtime on relocatable kernels.

Cc: Petr Mladek 
Cc: Sergey Senozhatsky 
Cc: Steven Rostedt 
Cc: James Morris 
Cc: "Serge E. Hallyn" 
Signed-off-by: Ard Biesheuvel 
---
 include/linux/init.h   | 44 +++-
 init/main.c| 32 +++---
 kernel/printk/printk.c |  4 +-
 security/security.c|  4 +-
 4 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index ea1b31101d9e..cef8e817e5a5 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -109,8 +109,24 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+   return (initcall_t)((unsigned long)entry + *entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+   return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
 
 /* Used for contructor calls. */
 typedef void (*ctor_fn_t)(void);
@@ -160,9 +176,20 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec)  \
+   __ADDRESSABLE(fn)   \
+   asm(".section   \"" #__sec ".init\", \"a\"  \n" \
+   "__initcall_" #fn #id ":\n" \
+   ".long "VMLINUX_SYMBOL_STR(fn) " - .\n" \
+   ".previous  \n");
+#else
+#define ___define_initcall(fn, id, __sec) \
static initcall_t __initcall_##fn##id __used \
-   __attribute__((__section__(".initcall" #id ".init"))) = fn;
+   __attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
  * Early initcalls run before initializing SMP.
@@ -201,13 +228,8 @@ extern bool initcall_debug;
 #define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)   \
-   static initcall_t __initcall_##fn   \
-   __used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn)  \
-   static initcall_t __initcall_##fn   \
-   __used __section(.security_initcall.init) = fn
+#define console_initcall(fn)   ___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn)  ___define_initcall(fn,, .security_initcall)
 
 struct obs_kernel_param {
const char *str;
diff --git a/init/main.c b/init/main.c
index a8100b954839..d81487cc126d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -848,18 +848,18 @@ int __init_or_module do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
-
-static initcall_t *initcall_levels[] __initdata = {
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
__initcall2_start,
@@ -885,7 +885,7 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-   initcall_t *fn;
+   initcall_entry_t *fn;
 
strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
@@ -895,7 +895,7 @@ static void __init do_initcall_level(int level)
   NULL, _env_string);
 
for (fn 

[PATCH v7 04/10] init: allow initcall tables to be emitted using relative references

2018-01-02 Thread Ard Biesheuvel
Allow the initcall tables to be emitted using relative references that
are only half the size on 64-bit architectures and don't require fixups
at runtime on relocatable kernels.

Cc: Petr Mladek 
Cc: Sergey Senozhatsky 
Cc: Steven Rostedt 
Cc: James Morris 
Cc: "Serge E. Hallyn" 
Signed-off-by: Ard Biesheuvel 
---
 include/linux/init.h   | 44 +++-
 init/main.c| 32 +++---
 kernel/printk/printk.c |  4 +-
 security/security.c|  4 +-
 4 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/include/linux/init.h b/include/linux/init.h
index ea1b31101d9e..cef8e817e5a5 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -109,8 +109,24 @@
 typedef int (*initcall_t)(void);
 typedef void (*exitcall_t)(void);
 
-extern initcall_t __con_initcall_start[], __con_initcall_end[];
-extern initcall_t __security_initcall_start[], __security_initcall_end[];
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef int initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+   return (initcall_t)((unsigned long)entry + *entry);
+}
+#else
+typedef initcall_t initcall_entry_t;
+
+static inline initcall_t initcall_from_entry(initcall_entry_t *entry)
+{
+   return *entry;
+}
+#endif
+
+extern initcall_entry_t __con_initcall_start[], __con_initcall_end[];
+extern initcall_entry_t __security_initcall_start[], __security_initcall_end[];
 
 /* Used for contructor calls. */
 typedef void (*ctor_fn_t)(void);
@@ -160,9 +176,20 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
-#define __define_initcall(fn, id) \
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define ___define_initcall(fn, id, __sec)  \
+   __ADDRESSABLE(fn)   \
+   asm(".section   \"" #__sec ".init\", \"a\"  \n" \
+   "__initcall_" #fn #id ":\n" \
+   ".long "VMLINUX_SYMBOL_STR(fn) " - .\n" \
+   ".previous  \n");
+#else
+#define ___define_initcall(fn, id, __sec) \
static initcall_t __initcall_##fn##id __used \
-   __attribute__((__section__(".initcall" #id ".init"))) = fn;
+   __attribute__((__section__(#__sec ".init"))) = fn;
+#endif
+
+#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
  * Early initcalls run before initializing SMP.
@@ -201,13 +228,8 @@ extern bool initcall_debug;
 #define __exitcall(fn) \
static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)   \
-   static initcall_t __initcall_##fn   \
-   __used __section(.con_initcall.init) = fn
-
-#define security_initcall(fn)  \
-   static initcall_t __initcall_##fn   \
-   __used __section(.security_initcall.init) = fn
+#define console_initcall(fn)   ___define_initcall(fn,, .con_initcall)
+#define security_initcall(fn)  ___define_initcall(fn,, .security_initcall)
 
 struct obs_kernel_param {
const char *str;
diff --git a/init/main.c b/init/main.c
index a8100b954839..d81487cc126d 100644
--- a/init/main.c
+++ b/init/main.c
@@ -848,18 +848,18 @@ int __init_or_module do_one_initcall(initcall_t fn)
 }
 
 
-extern initcall_t __initcall_start[];
-extern initcall_t __initcall0_start[];
-extern initcall_t __initcall1_start[];
-extern initcall_t __initcall2_start[];
-extern initcall_t __initcall3_start[];
-extern initcall_t __initcall4_start[];
-extern initcall_t __initcall5_start[];
-extern initcall_t __initcall6_start[];
-extern initcall_t __initcall7_start[];
-extern initcall_t __initcall_end[];
-
-static initcall_t *initcall_levels[] __initdata = {
+extern initcall_entry_t __initcall_start[];
+extern initcall_entry_t __initcall0_start[];
+extern initcall_entry_t __initcall1_start[];
+extern initcall_entry_t __initcall2_start[];
+extern initcall_entry_t __initcall3_start[];
+extern initcall_entry_t __initcall4_start[];
+extern initcall_entry_t __initcall5_start[];
+extern initcall_entry_t __initcall6_start[];
+extern initcall_entry_t __initcall7_start[];
+extern initcall_entry_t __initcall_end[];
+
+static initcall_entry_t *initcall_levels[] __initdata = {
__initcall0_start,
__initcall1_start,
__initcall2_start,
@@ -885,7 +885,7 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-   initcall_t *fn;
+   initcall_entry_t *fn;
 
strcpy(initcall_command_line, saved_command_line);
parse_args(initcall_level_names[level],
@@ -895,7 +895,7 @@ static void __init do_initcall_level(int level)
   NULL, _env_string);
 
for (fn = initcall_levels[level]; fn < initcall_levels[level+1]; fn++)
-   do_one_initcall(*fn);
+   

[PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors

2018-01-02 Thread Ard Biesheuvel
In preparation of allowing architectures to use relative references
in jump_label entries [which can dramatically reduce the memory
footprint], introduce abstractions for references to the 'code' and
'key' members of struct jump_entry.

Signed-off-by: Ard Biesheuvel 
---
 arch/arm/include/asm/jump_label.h | 27 ++
 arch/arm64/include/asm/jump_label.h   | 27 ++
 arch/mips/include/asm/jump_label.h| 27 ++
 arch/powerpc/include/asm/jump_label.h | 27 ++
 arch/s390/include/asm/jump_label.h| 27 ++
 arch/sparc/include/asm/jump_label.h   | 27 ++
 arch/tile/include/asm/jump_label.h| 27 ++
 arch/x86/include/asm/jump_label.h | 27 ++
 kernel/jump_label.c   | 38 +---
 9 files changed, 232 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/jump_label.h 
b/arch/arm/include/asm/jump_label.h
index e12d7d096fc0..7b05b404063a 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -45,5 +45,32 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/include/asm/jump_label.h 
b/arch/arm64/include/asm/jump_label.h
index 1b5e0e843c3a..9d6e46355c89 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -62,5 +62,32 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/mips/include/asm/jump_label.h 
b/arch/mips/include/asm/jump_label.h
index e77672539e8e..70df9293dc49 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -66,5 +66,32 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #endif  /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_JUMP_LABEL_H */
diff --git a/arch/powerpc/include/asm/jump_label.h 
b/arch/powerpc/include/asm/jump_label.h
index 9a287e0ac8b1..412b2699c9f6 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -59,6 +59,33 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #else
 #define ARCH_STATIC_BRANCH(LABEL, KEY) \
 1098:  nop;\
diff --git 

[PATCH v7 06/10] kernel: tracepoints: add support for relative references

2018-01-02 Thread Ard Biesheuvel
To avoid the need for relocating absolute references to tracepoint
structures at boot time when running relocatable kernels (which may
take a disproportionate amount of space), add the option to emit
these tables as relative references instead.

Cc: Ingo Molnar 
Acked-by: Steven Rostedt (VMware) 
Signed-off-by: Ard Biesheuvel 
---
 include/linux/tracepoint.h | 19 ++--
 kernel/tracepoint.c| 50 +++-
 2 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a26ffbe09e71..d02bf1a695e8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -228,6 +228,19 @@ extern void syscall_unregfunc(void);
return static_key_false(&__tracepoint_##name.key);  \
}
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name)\
+   asm("   .section \"__tracepoints_ptrs\", \"a\"   \n" \
+   "   .balign 4\n" \
+   "   .long " VMLINUX_SYMBOL_STR(__tracepoint_##name) " - .\n" \
+   "   .previous\n")
+#else
+#define __TRACEPOINT_ENTRY(name)\
+   static struct tracepoint * const __tracepoint_ptr_##name __used  \
+   __attribute__((section("__tracepoints_ptrs"))) = \
+   &__tracepoint_##name
+#endif
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for 
iteration
@@ -237,11 +250,9 @@ extern void syscall_unregfunc(void);
static const char __tpstrtab_##name[]\
__attribute__((section("__tracepoints_strings"))) = #name;   \
struct tracepoint __tracepoint_##name\
-   __attribute__((section("__tracepoints"))) =  \
+   __attribute__((section("__tracepoints"), used)) =\
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
-   static struct tracepoint * const __tracepoint_ptr_##name __used  \
-   __attribute__((section("__tracepoints_ptrs"))) = \
-   &__tracepoint_##name;
+   __TRACEPOINT_ENTRY(name);
 
 #define DEFINE_TRACE(name) \
DEFINE_TRACE_FN(name, NULL, NULL);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 685c50ae6300..05649fef106c 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -327,6 +327,28 @@ int tracepoint_probe_unregister(struct tracepoint *tp, 
void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+   struct tracepoint * const *end,
+   void (*fct)(struct tracepoint *tp, void *priv),
+   void *priv)
+{
+   if (!begin)
+   return;
+
+   if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+   const int *iter;
+
+   for (iter = (const int *)begin; iter < (const int *)end; iter++)
+   fct((struct tracepoint *)((unsigned long)iter + *iter),
+   priv);
+   } else {
+   struct tracepoint * const *iter;
+
+   for (iter = begin; iter < end; iter++)
+   fct(*iter, priv);
+   }
+}
+
 #ifdef CONFIG_MODULES
 bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -391,15 +413,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
  * Ensure the tracer unregistered the module's probes before the module
  * teardown is performed. Prevents leaks of probe and data pointers.
  */
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
-   struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
 {
-   struct tracepoint * const *iter;
-
-   if (!begin)
-   return;
-   for (iter = begin; iter < end; iter++)
-   WARN_ON_ONCE((*iter)->funcs);
+   WARN_ON_ONCE(tp->funcs);
 }
 
 static int tracepoint_module_coming(struct module *mod)
@@ -450,8 +466,9 @@ static void tracepoint_module_going(struct module *mod)
 * Called the going notifier before checking for
 * quiescence.
 */
-   tp_module_going_check_quiescent(mod->tracepoints_ptrs,
-   mod->tracepoints_ptrs + mod->num_tracepoints);
+   for_each_tracepoint_range(mod->tracepoints_ptrs,
+   mod->tracepoints_ptrs + mod->num_tracepoints,
+   

[PATCH v7 07/10] kernel/jump_label: abstract jump_entry member accessors

2018-01-02 Thread Ard Biesheuvel
In preparation of allowing architectures to use relative references
in jump_label entries [which can dramatically reduce the memory
footprint], introduce abstractions for references to the 'code' and
'key' members of struct jump_entry.

Signed-off-by: Ard Biesheuvel 
---
 arch/arm/include/asm/jump_label.h | 27 ++
 arch/arm64/include/asm/jump_label.h   | 27 ++
 arch/mips/include/asm/jump_label.h| 27 ++
 arch/powerpc/include/asm/jump_label.h | 27 ++
 arch/s390/include/asm/jump_label.h| 27 ++
 arch/sparc/include/asm/jump_label.h   | 27 ++
 arch/tile/include/asm/jump_label.h| 27 ++
 arch/x86/include/asm/jump_label.h | 27 ++
 kernel/jump_label.c   | 38 +---
 9 files changed, 232 insertions(+), 22 deletions(-)

diff --git a/arch/arm/include/asm/jump_label.h 
b/arch/arm/include/asm/jump_label.h
index e12d7d096fc0..7b05b404063a 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -45,5 +45,32 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #endif  /* __ASSEMBLY__ */
 #endif
diff --git a/arch/arm64/include/asm/jump_label.h 
b/arch/arm64/include/asm/jump_label.h
index 1b5e0e843c3a..9d6e46355c89 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -62,5 +62,32 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/mips/include/asm/jump_label.h 
b/arch/mips/include/asm/jump_label.h
index e77672539e8e..70df9293dc49 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -66,5 +66,32 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #endif  /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_JUMP_LABEL_H */
diff --git a/arch/powerpc/include/asm/jump_label.h 
b/arch/powerpc/include/asm/jump_label.h
index 9a287e0ac8b1..412b2699c9f6 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -59,6 +59,33 @@ struct jump_entry {
jump_label_t key;
 };
 
+static inline jump_label_t jump_entry_code(const struct jump_entry *entry)
+{
+   return entry->code;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+   return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+   return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_module_init(const struct jump_entry *entry)
+{
+   return entry->code == 0;
+}
+
+static inline void jump_entry_set_module_init(struct jump_entry *entry)
+{
+   entry->code = 0;
+}
+
+#define jump_label_swapNULL
+
 #else
 #define ARCH_STATIC_BRANCH(LABEL, KEY) \
 1098:  nop;\
diff --git 

[PATCH v7 06/10] kernel: tracepoints: add support for relative references

2018-01-02 Thread Ard Biesheuvel
To avoid the need for relocating absolute references to tracepoint
structures at boot time when running relocatable kernels (which may
take a disproportionate amount of space), add the option to emit
these tables as relative references instead.

Cc: Ingo Molnar 
Acked-by: Steven Rostedt (VMware) 
Signed-off-by: Ard Biesheuvel 
---
 include/linux/tracepoint.h | 19 ++--
 kernel/tracepoint.c| 50 +++-
 2 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index a26ffbe09e71..d02bf1a695e8 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -228,6 +228,19 @@ extern void syscall_unregfunc(void);
return static_key_false(&__tracepoint_##name.key);  \
}
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#define __TRACEPOINT_ENTRY(name)\
+   asm("   .section \"__tracepoints_ptrs\", \"a\"   \n" \
+   "   .balign 4\n" \
+   "   .long " VMLINUX_SYMBOL_STR(__tracepoint_##name) " - .\n" \
+   "   .previous\n")
+#else
+#define __TRACEPOINT_ENTRY(name)\
+   static struct tracepoint * const __tracepoint_ptr_##name __used  \
+   __attribute__((section("__tracepoints_ptrs"))) = \
+   &__tracepoint_##name
+#endif
+
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for 
iteration
@@ -237,11 +250,9 @@ extern void syscall_unregfunc(void);
static const char __tpstrtab_##name[]\
__attribute__((section("__tracepoints_strings"))) = #name;   \
struct tracepoint __tracepoint_##name\
-   __attribute__((section("__tracepoints"))) =  \
+   __attribute__((section("__tracepoints"), used)) =\
{ __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\
-   static struct tracepoint * const __tracepoint_ptr_##name __used  \
-   __attribute__((section("__tracepoints_ptrs"))) = \
-   &__tracepoint_##name;
+   __TRACEPOINT_ENTRY(name);
 
 #define DEFINE_TRACE(name) \
DEFINE_TRACE_FN(name, NULL, NULL);
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 685c50ae6300..05649fef106c 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -327,6 +327,28 @@ int tracepoint_probe_unregister(struct tracepoint *tp, 
void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
+static void for_each_tracepoint_range(struct tracepoint * const *begin,
+   struct tracepoint * const *end,
+   void (*fct)(struct tracepoint *tp, void *priv),
+   void *priv)
+{
+   if (!begin)
+   return;
+
+   if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
+   const int *iter;
+
+   for (iter = (const int *)begin; iter < (const int *)end; iter++)
+   fct((struct tracepoint *)((unsigned long)iter + *iter),
+   priv);
+   } else {
+   struct tracepoint * const *iter;
+
+   for (iter = begin; iter < end; iter++)
+   fct(*iter, priv);
+   }
+}
+
 #ifdef CONFIG_MODULES
 bool trace_module_has_bad_taint(struct module *mod)
 {
@@ -391,15 +413,9 @@ EXPORT_SYMBOL_GPL(unregister_tracepoint_module_notifier);
  * Ensure the tracer unregistered the module's probes before the module
  * teardown is performed. Prevents leaks of probe and data pointers.
  */
-static void tp_module_going_check_quiescent(struct tracepoint * const *begin,
-   struct tracepoint * const *end)
+static void tp_module_going_check_quiescent(struct tracepoint *tp, void *priv)
 {
-   struct tracepoint * const *iter;
-
-   if (!begin)
-   return;
-   for (iter = begin; iter < end; iter++)
-   WARN_ON_ONCE((*iter)->funcs);
+   WARN_ON_ONCE(tp->funcs);
 }
 
 static int tracepoint_module_coming(struct module *mod)
@@ -450,8 +466,9 @@ static void tracepoint_module_going(struct module *mod)
 * Called the going notifier before checking for
 * quiescence.
 */
-   tp_module_going_check_quiescent(mod->tracepoints_ptrs,
-   mod->tracepoints_ptrs + mod->num_tracepoints);
+   for_each_tracepoint_range(mod->tracepoints_ptrs,
+   mod->tracepoints_ptrs + mod->num_tracepoints,
+   tp_module_going_check_quiescent, NULL);
break;
}

[PATCH v7 02/10] module: allow symbol exports to be disabled

2018-01-02 Thread Ard Biesheuvel
To allow existing C code to be incorporated into the decompressor or
the UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx
declarations into nops, and #define it in places where such exports
are undesirable. Note that this gets rid of a rather dodgy redefine
of linux/export.h's header guard.

Cc: m...@codeblueprint.co.uk
Cc: keesc...@chromium.org
Cc: j...@kernel.org
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/boot/compressed/kaslr.c  | 5 +
 drivers/firmware/efi/libstub/Makefile | 3 ++-
 include/linux/export.h| 9 +
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 8199a6187251..3a2a6d7049e4 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
  * which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
  */
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
 
 #include "misc.h"
 #include "error.h"
diff --git a/drivers/firmware/efi/libstub/Makefile 
b/drivers/firmware/efi/libstub/Makefile
index adaa4a964f0c..312bd0b64a61 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB)  += 
-I$(srctree)/scripts/dtc/libfdt
 KBUILD_CFLAGS  := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
   -D__NO_FORTIFY \
   $(call cc-option,-ffreestanding) \
-  $(call cc-option,-fno-stack-protector)
+  $(call cc-option,-fno-stack-protector) \
+  -D__DISABLE_EXPORTS
 
 GCOV_PROFILE   := n
 KASAN_SANITIZE := n
diff --git a/include/linux/export.h b/include/linux/export.h
index 1a1dfdb2a5c6..6dba2fb08f77 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -83,6 +83,15 @@ extern struct module __this_module;
  */
 #define __EXPORT_SYMBOL(sym, sec)  === __KSYM_##sym ===
 
+#elif defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
 #elif defined(CONFIG_TRIM_UNUSED_KSYMS)
 
 #include 
-- 
2.11.0



[PATCH v7 02/10] module: allow symbol exports to be disabled

2018-01-02 Thread Ard Biesheuvel
To allow existing C code to be incorporated into the decompressor or
the UEFI stub, introduce a CPP macro that turns all EXPORT_SYMBOL_xxx
declarations into nops, and #define it in places where such exports
are undesirable. Note that this gets rid of a rather dodgy redefine
of linux/export.h's header guard.

Cc: m...@codeblueprint.co.uk
Cc: keesc...@chromium.org
Cc: j...@kernel.org
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/boot/compressed/kaslr.c  | 5 +
 drivers/firmware/efi/libstub/Makefile | 3 ++-
 include/linux/export.h| 9 +
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 8199a6187251..3a2a6d7049e4 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -23,11 +23,8 @@
  * _ctype[] in lib/ctype.c is needed by isspace() of linux/ctype.h.
  * While both lib/ctype.c and lib/cmdline.c will bring EXPORT_SYMBOL
  * which is meaningless and will cause compiling error in some cases.
- * So do not include linux/export.h and define EXPORT_SYMBOL(sym)
- * as empty.
  */
-#define _LINUX_EXPORT_H
-#define EXPORT_SYMBOL(sym)
+#define __DISABLE_EXPORTS
 
 #include "misc.h"
 #include "error.h"
diff --git a/drivers/firmware/efi/libstub/Makefile 
b/drivers/firmware/efi/libstub/Makefile
index adaa4a964f0c..312bd0b64a61 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB)  += 
-I$(srctree)/scripts/dtc/libfdt
 KBUILD_CFLAGS  := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
   -D__NO_FORTIFY \
   $(call cc-option,-ffreestanding) \
-  $(call cc-option,-fno-stack-protector)
+  $(call cc-option,-fno-stack-protector) \
+  -D__DISABLE_EXPORTS
 
 GCOV_PROFILE   := n
 KASAN_SANITIZE := n
diff --git a/include/linux/export.h b/include/linux/export.h
index 1a1dfdb2a5c6..6dba2fb08f77 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -83,6 +83,15 @@ extern struct module __this_module;
  */
 #define __EXPORT_SYMBOL(sym, sec)  === __KSYM_##sym ===
 
+#elif defined(__DISABLE_EXPORTS)
+
+/*
+ * Allow symbol exports to be disabled completely so that C code may
+ * be reused in other execution contexts such as the UEFI stub or the
+ * decompressor.
+ */
+#define __EXPORT_SYMBOL(sym, sec)
+
 #elif defined(CONFIG_TRIM_UNUSED_KSYMS)
 
 #include 
-- 
2.11.0



[PATCH v7 03/10] module: use relative references for __ksymtab entries

2018-01-02 Thread Ard Biesheuvel
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab
entries, each consisting of two 64-bit fields containing absolute
references, to the symbol itself and to a char array containing
its name, respectively.

When we build the same configuration with KASLR enabled, we end
up with an additional ~192 KB of relocations in the .init section,
i.e., one 24 byte entry for each absolute reference, which all need
to be processed at boot time.

Given how the struct kernel_symbol that describes each entry is
completely local to module.c (except for the references emitted
by EXPORT_SYMBOL() itself), we can easily modify it to contain
two 32-bit relative references instead. This reduces the size of
the __ksymtab section by 50% for all 64-bit architectures, and
gets rid of the runtime relocations entirely for architectures
implementing KASLR, either via standard PIE linking (arm64) or
using custom host tools (x86).

Note that the binary search involving __ksymtab contents relies
on each section being sorted by symbol name. This is implemented
based on the input section names, not the names in the ksymtab
entries, so this patch does not interfere with that.

Given that the use of place-relative relocations requires support
both in the toolchain and in the module loader, we cannot enable
this feature for all architectures. So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.

Cc: Arnd Bergmann 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Garnier 
Cc: Nicolas Pitre 
Acked-by: Jessica Yu 
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/include/asm/Kbuild   |  1 +
 arch/x86/include/asm/export.h |  5 ---
 include/asm-generic/export.h  | 12 -
 include/linux/compiler.h  | 10 +
 include/linux/export.h| 46 +++-
 kernel/module.c   | 33 +++---
 6 files changed, 83 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 5d6a53fd7521..3e8a88dcaa1d 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -9,5 +9,6 @@ generated-y += xen-hypercalls.h
 generic-y += clkdev.h
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
+generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include 
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 719db1968d81..97ce606459ae 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -5,12 +5,10 @@
 #define KSYM_FUNC(x) x
 #endif
 #ifdef CONFIG_64BIT
-#define __put .quad
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
 #else
-#define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
@@ -25,6 +23,16 @@
 #define KSYM(name) name
 #endif
 
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   .long   \val - ., \name - .
+#elif defined(CONFIG_64BIT)
+   .quad   \val, \name
+#else
+   .long   \val, \name
+#endif
+.endm
+
 /*
  * note on .section use: @progbits vs %progbits nastiness doesn't matter,
  * since we immediately emit into those sections anyway.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 52e611ab9a6c..79db4aa87d75 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -327,4 +327,14 @@ static __always_inline void __write_once_size(volatile 
void *p, void *res, int s
compiletime_assert(__native_word(t),\
"Need native word sized stores/loads for atomicity.")
 
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+   static void * const __attribute__((section(".discard"), used))  \
+   __PASTE(__addressable_##sym, __LINE__) = (void *)
+
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/export.h b/include/linux/export.h
index 6dba2fb08f77..4744cf4736b0 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -24,12 +24,6 @@
 #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
 
 #ifndef __ASSEMBLY__
-struct kernel_symbol
-{
-   unsigned long value;
-   const char *name;
-};
-
 #ifdef MODULE
 extern struct module __this_module;
 #define THIS_MODULE (&__this_module)
@@ -60,17 +54,47 @@ extern struct module __this_module;
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
+#ifdef 

[PATCH v7 03/10] module: use relative references for __ksymtab entries

2018-01-02 Thread Ard Biesheuvel
An ordinary arm64 defconfig build has ~64 KB worth of __ksymtab
entries, each consisting of two 64-bit fields containing absolute
references, to the symbol itself and to a char array containing
its name, respectively.

When we build the same configuration with KASLR enabled, we end
up with an additional ~192 KB of relocations in the .init section,
i.e., one 24 byte entry for each absolute reference, which all need
to be processed at boot time.

Given how the struct kernel_symbol that describes each entry is
completely local to module.c (except for the references emitted
by EXPORT_SYMBOL() itself), we can easily modify it to contain
two 32-bit relative references instead. This reduces the size of
the __ksymtab section by 50% for all 64-bit architectures, and
gets rid of the runtime relocations entirely for architectures
implementing KASLR, either via standard PIE linking (arm64) or
using custom host tools (x86).

Note that the binary search involving __ksymtab contents relies
on each section being sorted by symbol name. This is implemented
based on the input section names, not the names in the ksymtab
entries, so this patch does not interfere with that.

Given that the use of place-relative relocations requires support
both in the toolchain and in the module loader, we cannot enable
this feature for all architectures. So make it dependent on whether
CONFIG_HAVE_ARCH_PREL32_RELOCATIONS is defined.

Cc: Arnd Bergmann 
Cc: Andrew Morton 
Cc: Ingo Molnar 
Cc: Kees Cook 
Cc: Thomas Garnier 
Cc: Nicolas Pitre 
Acked-by: Jessica Yu 
Signed-off-by: Ard Biesheuvel 
---
 arch/x86/include/asm/Kbuild   |  1 +
 arch/x86/include/asm/export.h |  5 ---
 include/asm-generic/export.h  | 12 -
 include/linux/compiler.h  | 10 +
 include/linux/export.h| 46 +++-
 kernel/module.c   | 33 +++---
 6 files changed, 83 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 5d6a53fd7521..3e8a88dcaa1d 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -9,5 +9,6 @@ generated-y += xen-hypercalls.h
 generic-y += clkdev.h
 generic-y += dma-contiguous.h
 generic-y += early_ioremap.h
+generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/x86/include/asm/export.h b/arch/x86/include/asm/export.h
deleted file mode 100644
index 2a51d66689c5..
--- a/arch/x86/include/asm/export.h
+++ /dev/null
@@ -1,5 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_64BIT
-#define KSYM_ALIGN 16
-#endif
-#include 
diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h
index 719db1968d81..97ce606459ae 100644
--- a/include/asm-generic/export.h
+++ b/include/asm-generic/export.h
@@ -5,12 +5,10 @@
 #define KSYM_FUNC(x) x
 #endif
 #ifdef CONFIG_64BIT
-#define __put .quad
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 8
 #endif
 #else
-#define __put .long
 #ifndef KSYM_ALIGN
 #define KSYM_ALIGN 4
 #endif
@@ -25,6 +23,16 @@
 #define KSYM(name) name
 #endif
 
+.macro __put, val, name
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+   .long   \val - ., \name - .
+#elif defined(CONFIG_64BIT)
+   .quad   \val, \name
+#else
+   .long   \val, \name
+#endif
+.endm
+
 /*
  * note on .section use: @progbits vs %progbits nastiness doesn't matter,
  * since we immediately emit into those sections anyway.
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 52e611ab9a6c..79db4aa87d75 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -327,4 +327,14 @@ static __always_inline void __write_once_size(volatile 
void *p, void *res, int s
compiletime_assert(__native_word(t),\
"Need native word sized stores/loads for atomicity.")
 
+/*
+ * Force the compiler to emit 'sym' as a symbol, so that we can reference
+ * it from inline assembler. Necessary in case 'sym' could be inlined
+ * otherwise, or eliminated entirely due to lack of references that are
+ * visible to the compiler.
+ */
+#define __ADDRESSABLE(sym) \
+   static void * const __attribute__((section(".discard"), used))  \
+   __PASTE(__addressable_##sym, __LINE__) = (void *)
+
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/export.h b/include/linux/export.h
index 6dba2fb08f77..4744cf4736b0 100644
--- a/include/linux/export.h
+++ b/include/linux/export.h
@@ -24,12 +24,6 @@
 #define VMLINUX_SYMBOL_STR(x) __VMLINUX_SYMBOL_STR(x)
 
 #ifndef __ASSEMBLY__
-struct kernel_symbol
-{
-   unsigned long value;
-   const char *name;
-};
-
 #ifdef MODULE
 extern struct module __this_module;
 #define THIS_MODULE (&__this_module)
@@ -60,17 +54,47 @@ extern struct module __this_module;
 #define __CRC_SYMBOL(sym, sec)
 #endif
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+#include 
+/*
+ * Emit the ksymtab entry as a pair of relative references: this reduces
+ * the size by half on 64-bit 

[PATCH v7 00/10] add support for relative references in special sections

2018-01-02 Thread Ard Biesheuvel
This adds support for emitting special sections such as initcall arrays,
PCI fixups and tracepoints as relative references rather than absolute
references. This reduces the size by 50% on 64-bit architectures, but
more importantly, it removes the need for carrying relocation metadata
for these sections in relocatables kernels (e.g., for KASLR) that need
to fix up these absolute references at boot time. On arm64, this reduces
the vmlinux footprint of such a reference by 8x (8 byte absolute reference
+ 24 byte RELA entry vs 4 byte relative reference)

Patch #3 was sent out before as a single patch. This series supersedes
the previous submission. This version makes relative ksymtab entries
dependent on the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS rather
than trying to infer from kbuild test robot replies for which architectures
it should be blacklisted.

Patch #1 introduces the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS,
and sets it for the main architectures that are expected to benefit the
most from this feature, i.e., 64-bit architectures or ones that use
runtime relocations.

Patches #4 - #6 implement relative references for initcalls, PCI fixups
and tracepoints, respectively, all of which produce sections with order
~1000 entries on an arm64 defconfig kernel with tracing enabled. This
means we save about 28 KB of vmlinux space for each of these patches.

Patches #7 - #10 have been added in v5, and implement relative references
in jump tables for arm64 and x86. On arm64, this results in significant
space savings (650+ KB on a typical distro kernel). On x86, the savings
are not as impressive, but still worthwhile. (Note that these patches
do not rely on CONFIG_HAVE_ARCH_PREL32_RELOCATIONS, given that the
inline asm that is emitted is already per-arch)

For the arm64 kernel, all patches combined reduce the memory footprint of
vmlinux by about 1.3 MB (using a config copied from Ubuntu that has KASLR
enabled), of which ~1 MB is the size reduction of the RELA section in .init,
and the remaining 300 KB is reduction of .text/.data.

Branch:
git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git 
relative-special-sections-v7

Changes since v6:
- drop S390 from patch #1 introducing HAVE_ARCH_PREL32_RELOCATIONS: kbuild
  robot threw me some s390 curveballs, and given that s390 does not define
  CONFIG_RELOCATABLE in the first place, it does not benefit as much from
  relative references as arm64, x86 and power do
- add patch to allow symbol exports to be disabled at compilation unit
  granularity (#2)
- get rid of arm64 vmlinux.lds.S hunk to ensure code generated by __ADDRESSABLE
  gets discarded from the EFI stub - it is no longer needed after adding #2 (#1)
- change _ADDRESSABLE() to emit a data reference, not a code reference - this
  is another simplification made possible by patch #2 (#3)
- add Steven's ack to #6
- split x86 jump_label patch into two (#9, #10)

Changes since v5:
- add missing jump_label prototypes to s390 jump_label.h (#6)
- fix inverted condition in call to jump_entry_is_module_init() (#6)

Changes since v4:
- add patches to convert x86 and arm64 to use relative references for jump
  tables (#6 - #8)
- rename PCI patch and add Bjorn's ack (#4)
- rebase onto v4.15-rc5

Changes since v3:
- fix module unload issue in patch #5 reported by Jessica, by reusing the
  updated routine for_each_tracepoint_range() for the quiescent check at
  module unload time; this requires this routine to be moved before
  tracepoint_module_going() in kernel/tracepoint.c
- add Jessica's ack to #2
- rebase onto v4.14-rc1

Changes since v2:
- Revert my slightly misguided attempt to appease checkpatch, which resulted
  in needless churn and worse code. This v3 is based on v1 with a few tweaks
  that were actually reasonable checkpatch warnings: unnecessary braces (as
  pointed out by Ingo) and other minor whitespace misdemeanors.

Changes since v1:
- Remove checkpatch errors to the extent feasible: in some cases, this
  involves moving extern declarations into C files, and switching to
  struct definitions rather than typedefs. Some errors are impossible
  to fix: please find the remaining ones after the diffstat.
- Used 'int' instead if 'signed int' for the various offset fields: there
  is no ambiguity between architectures regarding its signedness (unlike
  'char')
- Refactor the different patches to be more uniform in the way they define
  the section entry type and accessors in the .h file, and avoid the need to
  add #ifdefs to the C code.

Cc: "H. Peter Anvin" 
Cc: Ralf Baechle 
Cc: Arnd Bergmann 
Cc: Heiko Carstens 
Cc: Kees Cook 
Cc: Will Deacon 
Cc: Michael Ellerman 
Cc: Thomas Garnier 
Cc: Thomas Gleixner 
Cc: "Serge E. Hallyn" 
Cc: Bjorn Helgaas 
Cc: Benjamin 

[PATCH v7 01/10] arch: enable relative relocations for arm64, power and x86

2018-01-02 Thread Ard Biesheuvel
Before updating certain subsystems to use place relative 32-bit
relocations in special sections, to save space  and reduce the
number of absolute relocations that need to be processed at runtime
by relocatable kernels, introduce the Kconfig symbol and define it
for some architectures that should be able to support and benefit
from it.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: x...@kernel.org
Signed-off-by: Ard Biesheuvel 
---
 arch/Kconfig | 10 ++
 arch/arm64/Kconfig   |  1 +
 arch/powerpc/Kconfig |  1 +
 arch/x86/Kconfig |  1 +
 4 files changed, 13 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 400b9e1b2f27..dbc036a7bd1b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -959,4 +959,14 @@ config REFCOUNT_FULL
  against various use-after-free conditions that can be used in
  security flaw exploits.
 
+config HAVE_ARCH_PREL32_RELOCATIONS
+   bool
+   help
+ May be selected by an architecture if it supports place-relative
+ 32-bit relocations, both in the toolchain and in the module loader,
+ in which case relative references can be used in special sections
+ for PCI fixup, initcalls etc which are only half the size on 64 bit
+ architectures, and don't require runtime relocation on relocatable
+ kernels.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c9a7e9e1414f..66c7b9ab2a3d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -89,6 +89,7 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c51e6ce42e7a..e172478e2ae7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,6 +177,7 @@ config PPC
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!RELOCATABLE && !HIBERNATION)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4fc98c50378..9f2bb853aedb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -115,6 +115,7 @@ config X86
select HAVE_ARCH_MMAP_RND_BITS  if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES  if MMU && COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
-- 
2.11.0



[PATCH v7 00/10] add support for relative references in special sections

2018-01-02 Thread Ard Biesheuvel
This adds support for emitting special sections such as initcall arrays,
PCI fixups and tracepoints as relative references rather than absolute
references. This reduces the size by 50% on 64-bit architectures, but
more importantly, it removes the need for carrying relocation metadata
for these sections in relocatables kernels (e.g., for KASLR) that need
to fix up these absolute references at boot time. On arm64, this reduces
the vmlinux footprint of such a reference by 8x (8 byte absolute reference
+ 24 byte RELA entry vs 4 byte relative reference)

Patch #3 was sent out before as a single patch. This series supersedes
the previous submission. This version makes relative ksymtab entries
dependent on the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS rather
than trying to infer from kbuild test robot replies for which architectures
it should be blacklisted.

Patch #1 introduces the new Kconfig symbol HAVE_ARCH_PREL32_RELOCATIONS,
and sets it for the main architectures that are expected to benefit the
most from this feature, i.e., 64-bit architectures or ones that use
runtime relocations.

Patches #4 - #6 implement relative references for initcalls, PCI fixups
and tracepoints, respectively, all of which produce sections with order
~1000 entries on an arm64 defconfig kernel with tracing enabled. This
means we save about 28 KB of vmlinux space for each of these patches.

Patches #7 - #10 have been added in v5, and implement relative references
in jump tables for arm64 and x86. On arm64, this results in significant
space savings (650+ KB on a typical distro kernel). On x86, the savings
are not as impressive, but still worthwhile. (Note that these patches
do not rely on CONFIG_HAVE_ARCH_PREL32_RELOCATIONS, given that the
inline asm that is emitted is already per-arch)

For the arm64 kernel, all patches combined reduce the memory footprint of
vmlinux by about 1.3 MB (using a config copied from Ubuntu that has KASLR
enabled), of which ~1 MB is the size reduction of the RELA section in .init,
and the remaining 300 KB is reduction of .text/.data.

Branch:
git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git 
relative-special-sections-v7

Changes since v6:
- drop S390 from patch #1 introducing HAVE_ARCH_PREL32_RELOCATIONS: kbuild
  robot threw me some s390 curveballs, and given that s390 does not define
  CONFIG_RELOCATABLE in the first place, it does not benefit as much from
  relative references as arm64, x86 and power do
- add patch to allow symbol exports to be disabled at compilation unit
  granularity (#2)
- get rid of arm64 vmlinux.lds.S hunk to ensure code generated by __ADDRESSABLE
  gets discarded from the EFI stub - it is no longer needed after adding #2 (#1)
- change _ADDRESSABLE() to emit a data reference, not a code reference - this
  is another simplification made possible by patch #2 (#3)
- add Steven's ack to #6
- split x86 jump_label patch into two (#9, #10)

Changes since v5:
- add missing jump_label prototypes to s390 jump_label.h (#6)
- fix inverted condition in call to jump_entry_is_module_init() (#6)

Changes since v4:
- add patches to convert x86 and arm64 to use relative references for jump
  tables (#6 - #8)
- rename PCI patch and add Bjorn's ack (#4)
- rebase onto v4.15-rc5

Changes since v3:
- fix module unload issue in patch #5 reported by Jessica, by reusing the
  updated routine for_each_tracepoint_range() for the quiescent check at
  module unload time; this requires this routine to be moved before
  tracepoint_module_going() in kernel/tracepoint.c
- add Jessica's ack to #2
- rebase onto v4.14-rc1

Changes since v2:
- Revert my slightly misguided attempt to appease checkpatch, which resulted
  in needless churn and worse code. This v3 is based on v1 with a few tweaks
  that were actually reasonable checkpatch warnings: unnecessary braces (as
  pointed out by Ingo) and other minor whitespace misdemeanors.

Changes since v1:
- Remove checkpatch errors to the extent feasible: in some cases, this
  involves moving extern declarations into C files, and switching to
  struct definitions rather than typedefs. Some errors are impossible
  to fix: please find the remaining ones after the diffstat.
- Used 'int' instead if 'signed int' for the various offset fields: there
  is no ambiguity between architectures regarding its signedness (unlike
  'char')
- Refactor the different patches to be more uniform in the way they define
  the section entry type and accessors in the .h file, and avoid the need to
  add #ifdefs to the C code.

Cc: "H. Peter Anvin" 
Cc: Ralf Baechle 
Cc: Arnd Bergmann 
Cc: Heiko Carstens 
Cc: Kees Cook 
Cc: Will Deacon 
Cc: Michael Ellerman 
Cc: Thomas Garnier 
Cc: Thomas Gleixner 
Cc: "Serge E. Hallyn" 
Cc: Bjorn Helgaas 
Cc: Benjamin Herrenschmidt 
Cc: Russell King 
Cc: Paul Mackerras 
Cc: Catalin Marinas 
Cc: "David S. Miller" 
Cc: Petr Mladek 
Cc: Ingo Molnar 
Cc: James Morris 
Cc: Andrew Morton 
Cc: Nicolas Pitre 
Cc: Josh Poimboeuf 
Cc: Steven Rostedt 
Cc: 

[PATCH v7 01/10] arch: enable relative relocations for arm64, power and x86

2018-01-02 Thread Ard Biesheuvel
Before updating certain subsystems to use place relative 32-bit
relocations in special sections, to save space  and reduce the
number of absolute relocations that need to be processed at runtime
by relocatable kernels, introduce the Kconfig symbol and define it
for some architectures that should be able to support and benefit
from it.

Cc: Catalin Marinas 
Cc: Will Deacon 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Michael Ellerman 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: "H. Peter Anvin" 
Cc: x...@kernel.org
Signed-off-by: Ard Biesheuvel 
---
 arch/Kconfig | 10 ++
 arch/arm64/Kconfig   |  1 +
 arch/powerpc/Kconfig |  1 +
 arch/x86/Kconfig |  1 +
 4 files changed, 13 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 400b9e1b2f27..dbc036a7bd1b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -959,4 +959,14 @@ config REFCOUNT_FULL
  against various use-after-free conditions that can be used in
  security flaw exploits.
 
+config HAVE_ARCH_PREL32_RELOCATIONS
+   bool
+   help
+ May be selected by an architecture if it supports place-relative
+ 32-bit relocations, both in the toolchain and in the module loader,
+ in which case relative references can be used in special sections
+ for PCI fixup, initcalls etc which are only half the size on 64 bit
+ architectures, and don't require runtime relocation on relocatable
+ kernels.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c9a7e9e1414f..66c7b9ab2a3d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -89,6 +89,7 @@ config ARM64
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c51e6ce42e7a..e172478e2ae7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -177,6 +177,7 @@ config PPC
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!RELOCATABLE && !HIBERNATION)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d4fc98c50378..9f2bb853aedb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -115,6 +115,7 @@ config X86
select HAVE_ARCH_MMAP_RND_BITS  if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES  if MMU && COMPAT
+   select HAVE_ARCH_PREL32_RELOCATIONS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
-- 
2.11.0



Re: [alsa-devel] [PATCH -next] soundwire: Fix typo in return value check of sdw_read()

2018-01-02 Thread Pierre-Louis Bossart

On 1/1/18 10:08 PM, Wei Yongjun wrote:

Fix the typo, 'status' should be instead of 'status2'.


well spotted, thanks!

Acked-by: Pierre-Louis Bossart 



Fixes: b0a9c37b0178 ("soundwire: Add slave status handling")
Signed-off-by: Wei Yongjun 
---
  drivers/soundwire/bus.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 4c34519..266d2b3 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -671,8 +671,8 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave 
*slave, u8 *slave_status)
status2 = sdw_read(slave, SDW_DP0_INT);
if (status2 < 0) {
dev_err(slave->bus->dev,
-   "SDW_DP0_INT read failed:%d", status);
-   return status;
+   "SDW_DP0_INT read failed:%d", status2);
+   return status2;
}
status &= status2;
  
@@ -741,10 +741,10 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
  
  		/* Read DPN interrupt again */

status2 = sdw_read(slave, addr);
-   if (status < 0) {
+   if (status2 < 0) {
dev_err(slave->bus->dev,
-   "SDW_DPN_INT read failed:%d", status);
-   return status;
+   "SDW_DPN_INT read failed:%d", status2);
+   return status2;
}
status &= status2;

___
Alsa-devel mailing list
alsa-de...@alsa-project.org
http://mailman.alsa-project.org/mailman/listinfo/alsa-devel





Re: [alsa-devel] [PATCH -next] soundwire: Fix typo in return value check of sdw_read()

2018-01-02 Thread Pierre-Louis Bossart

On 1/1/18 10:08 PM, Wei Yongjun wrote:

Fix the typo, 'status' should be instead of 'status2'.


well spotted, thanks!

Acked-by: Pierre-Louis Bossart 



Fixes: b0a9c37b0178 ("soundwire: Add slave status handling")
Signed-off-by: Wei Yongjun 
---
  drivers/soundwire/bus.c | 10 +-
  1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index 4c34519..266d2b3 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -671,8 +671,8 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave 
*slave, u8 *slave_status)
status2 = sdw_read(slave, SDW_DP0_INT);
if (status2 < 0) {
dev_err(slave->bus->dev,
-   "SDW_DP0_INT read failed:%d", status);
-   return status;
+   "SDW_DP0_INT read failed:%d", status2);
+   return status2;
}
status &= status2;
  
@@ -741,10 +741,10 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
  
  		/* Read DPN interrupt again */

status2 = sdw_read(slave, addr);
-   if (status < 0) {
+   if (status2 < 0) {
dev_err(slave->bus->dev,
-   "SDW_DPN_INT read failed:%d", status);
-   return status;
+   "SDW_DPN_INT read failed:%d", status2);
+   return status2;
}
status &= status2;

___
Alsa-devel mailing list
alsa-de...@alsa-project.org
http://mailman.alsa-project.org/mailman/listinfo/alsa-devel





Re: linux-next: build failure after merge of the clk tree

2018-01-02 Thread Stephen Boyd
On 01/02, Stephen Rothwell wrote:
> Hi all,
> 
> After merging the clk tree, today's linux-next build (x86_64 allmodconfig)
> failed like this:
> 
> ERROR: "clk_regmap_mux_div_ops" [drivers/clk/qcom/apcs-msm8916.ko] undefined!
> ERROR: "__mux_div_set_src_div" [drivers/clk/qcom/apcs-msm8916.ko] undefined!
> 
> Caused by commit
> 
>   8a77f61118a2 ("clk: qcom: Add APCS clock controller support")
> 
> I have used the clk tree from next-20171222 for today.
> 

Thanks for the report. I've fixed it up and I'm going to roll
more build coverage into my scripts now.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: linux-next: build failure after merge of the clk tree

2018-01-02 Thread Stephen Boyd
On 01/02, Stephen Rothwell wrote:
> Hi all,
> 
> After merging the clk tree, today's linux-next build (x86_64 allmodconfig)
> failed like this:
> 
> ERROR: "clk_regmap_mux_div_ops" [drivers/clk/qcom/apcs-msm8916.ko] undefined!
> ERROR: "__mux_div_set_src_div" [drivers/clk/qcom/apcs-msm8916.ko] undefined!
> 
> Caused by commit
> 
>   8a77f61118a2 ("clk: qcom: Add APCS clock controller support")
> 
> I have used the clk tree from next-20171222 for today.
> 

Thanks for the report. I've fixed it up and I'm going to roll
more build coverage into my scripts now.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [Intel-wired-lan] [PATCH] i40e: Delete an error message for a failed memory allocation in i40e_init_interrupt_scheme()

2018-01-02 Thread Jesse Brandeburg
On Mon, 1 Jan 2018 20:43:35 +0100
SF Markus Elfring  wrote:

> From: Markus Elfring 
> Date: Mon, 1 Jan 2018 20:38:14 +0100
> 
> Omit an extra message for a memory allocation failure in this function.
> 
> This issue was detected by using the Coccinelle software.
> 
> Signed-off-by: Markus Elfring 

Thanks for the patch.

Acked-by: Jesse Brandeburg 


Re: [Intel-wired-lan] [PATCH] i40e: Delete an error message for a failed memory allocation in i40e_init_interrupt_scheme()

2018-01-02 Thread Jesse Brandeburg
On Mon, 1 Jan 2018 20:43:35 +0100
SF Markus Elfring  wrote:

> From: Markus Elfring 
> Date: Mon, 1 Jan 2018 20:38:14 +0100
> 
> Omit an extra message for a memory allocation failure in this function.
> 
> This issue was detected by using the Coccinelle software.
> 
> Signed-off-by: Markus Elfring 

Thanks for the patch.

Acked-by: Jesse Brandeburg 


Re: [alsa-devel] [PATCH 15/27] ALSA: hda - Use timecounter_initialize interface

2018-01-02 Thread Pierre-Louis Bossart

On 1/2/18 12:21 PM, Richard Cochran wrote:

On Tue, Jan 02, 2018 at 11:15:45AM -0600, Pierre-Louis Bossart wrote:

I wrote the code for HDaudio and I remember wasting time trying to figure
out the gory details of the cycle counter stuff when all I wanted was a
conversion from a 24MHz counter to ns values using a 125/3 operation in the
right order - as explained in the comments


Would using clocks_calc_mult_shift() work for you?


In theory yes, but I'd need to re-check what the results would be.
I remember applying the 1/3 factor separately to avoid wrap-around after 
4 hours [1], but I can't remember the details on the analysis. I can't 
figure out what the 'maxsec' argument should be either.


[1] 
http://elixir.free-electrons.com/linux/latest/source/sound/hda/hdac_stream.c#L486


Re: [alsa-devel] [PATCH 15/27] ALSA: hda - Use timecounter_initialize interface

2018-01-02 Thread Pierre-Louis Bossart

On 1/2/18 12:21 PM, Richard Cochran wrote:

On Tue, Jan 02, 2018 at 11:15:45AM -0600, Pierre-Louis Bossart wrote:

I wrote the code for HDaudio and I remember wasting time trying to figure
out the gory details of the cycle counter stuff when all I wanted was a
conversion from a 24MHz counter to ns values using a 125/3 operation in the
right order - as explained in the comments


Would using clocks_calc_mult_shift() work for you?


In theory yes, but I'd need to re-check what the results would be.
I remember applying the 1/3 factor separately to avoid wrap-around after 
4 hours [1], but I can't remember the details on the analysis. I can't 
figure out what the 'maxsec' argument should be either.


[1] 
http://elixir.free-electrons.com/linux/latest/source/sound/hda/hdac_stream.c#L486


Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint

2018-01-02 Thread David Miller
From: Brendan Gregg 
Date: Tue, 2 Jan 2018 11:46:26 -0800

> If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like
> sk->sk_protocol exposed as a tracepoint argument so I can match on
> IPPROTO_TCP.

Agreed.


Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint

2018-01-02 Thread David Miller
From: Brendan Gregg 
Date: Tue, 2 Jan 2018 11:46:26 -0800

> If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like
> sk->sk_protocol exposed as a tracepoint argument so I can match on
> IPPROTO_TCP.

Agreed.


Re: [PATCH] NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625

2018-01-02 Thread David Miller
From: SZ Lin (林上智) 
Date: Fri, 29 Dec 2017 17:02:17 +0800

> This patch adds support for PID 0x9625 of YUGA CLM920-NC5.
> 
> YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation.
> 
> qmicli -d /dev/cdc-wdm0 -p --dms-get-revision
> [/dev/cdc-wdm0] Device revision retrieved:
> Revision: 'CLM920_NC5-V1  1  [Oct 23 2016 19:00:00]'
> 
> Signed-off-by: SZ Lin (林上智) 

Applied, thank you.


Re: [PATCH] NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625

2018-01-02 Thread David Miller
From: SZ Lin (林上智) 
Date: Fri, 29 Dec 2017 17:02:17 +0800

> This patch adds support for PID 0x9625 of YUGA CLM920-NC5.
> 
> YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation.
> 
> qmicli -d /dev/cdc-wdm0 -p --dms-get-revision
> [/dev/cdc-wdm0] Device revision retrieved:
> Revision: 'CLM920_NC5-V1  1  [Oct 23 2016 19:00:00]'
> 
> Signed-off-by: SZ Lin (林上智) 

Applied, thank you.


Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint

2018-01-02 Thread Brendan Gregg
On Sat, Dec 30, 2017 at 7:06 PM, Yafang Shao  wrote:
> On Sun, Dec 31, 2017 at 6:33 AM, Brendan Gregg
>  wrote:
>> On Tue, Dec 19, 2017 at 7:12 PM, Yafang Shao  wrote:
>>> As sk_state is a common field for struct sock, so the state
>>> transition tracepoint should not be a TCP specific feature.
>>> Currently it traces all AF_INET state transition, so I rename this
>>> tracepoint to inet_sock_set_state tracepoint with some minor changes and 
>>> move it
>>> into trace/events/sock.h.
>>
>> The tcp:tcp_set_state probe is tcp_set_state(), so it's only going to
>> fire for TCP sessions. It's not broken, and we could add a
>> sctp:sctp_set_state as well. Replacing tcp:tcp_set_state with
>> inet_sk_set_state is feeling like we might be baking too much
>> implementation detail into the tracepoint API.
>>
>> If we must have inet_sk_set_state, then must we also delete 
>> tcp:tcp_set_state?
>>
>
> Hi Brendan,
>
> The reason we have to make this change could be got from this mail
> thread, https://patchwork.kernel.org/patch/10099243/ .
>
> The original tcp:tcp_set_state probe doesn't traced all TCP state transitions.
> There're some state transitions in inet_connection_sock.c and
> inet_hashtables.c are missed.
> So we have to place this probe into these two files to fix the issue.
> But as inet_connection_sock.c and inet_hashtables.c are common files
> for all IPv4 protocols, not only for TCP, so it is not proper to place
> a tcp_ function in these two files.
> That's why we decide to rename tcp:tcp_set_state probe to
> sock:inet_sock_set_state.

It kinda feels like we are fixing one exposing-implementation problem
(the missing state changes, which I'm happy to see fixed), by exposing
another (there's no tcp:tcp_set_state because we don't want to put tcp
functions in inet*.c files). Anyway...

If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like
sk->sk_protocol exposed as a tracepoint argument so I can match on
IPPROTO_TCP. Otherwise I'll have to keep digging it out of (void
*)skaddr. (And if we're adding arguments, maybe consider sk_family as
well, to make it easier to see which address arguments to use).

Brendan


Re: [PATCH v3 net-next 2/5] net: tracepoint: replace tcp_set_state tracepoint with inet_sock_set_state tracepoint

2018-01-02 Thread Brendan Gregg
On Sat, Dec 30, 2017 at 7:06 PM, Yafang Shao  wrote:
> On Sun, Dec 31, 2017 at 6:33 AM, Brendan Gregg
>  wrote:
>> On Tue, Dec 19, 2017 at 7:12 PM, Yafang Shao  wrote:
>>> As sk_state is a common field for struct sock, so the state
>>> transition tracepoint should not be a TCP specific feature.
>>> Currently it traces all AF_INET state transition, so I rename this
>>> tracepoint to inet_sock_set_state tracepoint with some minor changes and 
>>> move it
>>> into trace/events/sock.h.
>>
>> The tcp:tcp_set_state probe is tcp_set_state(), so it's only going to
>> fire for TCP sessions. It's not broken, and we could add a
>> sctp:sctp_set_state as well. Replacing tcp:tcp_set_state with
>> inet_sk_set_state is feeling like we might be baking too much
>> implementation detail into the tracepoint API.
>>
>> If we must have inet_sk_set_state, then must we also delete 
>> tcp:tcp_set_state?
>>
>
> Hi Brendan,
>
> The reason we have to make this change could be got from this mail
> thread, https://patchwork.kernel.org/patch/10099243/ .
>
> The original tcp:tcp_set_state probe doesn't traced all TCP state transitions.
> There're some state transitions in inet_connection_sock.c and
> inet_hashtables.c are missed.
> So we have to place this probe into these two files to fix the issue.
> But as inet_connection_sock.c and inet_hashtables.c are common files
> for all IPv4 protocols, not only for TCP, so it is not proper to place
> a tcp_ function in these two files.
> That's why we decide to rename tcp:tcp_set_state probe to
> sock:inet_sock_set_state.

It kinda feels like we are fixing one exposing-implementation problem
(the missing state changes, which I'm happy to see fixed), by exposing
another (there's no tcp:tcp_set_state because we don't want to put tcp
functions in inet*.c files). Anyway...

If I'm to use sock:inet_sock_set_state for TCP tracing, I'd like
sk->sk_protocol exposed as a tracepoint argument so I can match on
IPPROTO_TCP. Otherwise I'll have to keep digging it out of (void
*)skaddr. (And if we're adding arguments, maybe consider sk_family as
well, to make it easier to see which address arguments to use).

Brendan


Re: pci driver loads right after unload

2018-01-02 Thread Greg Kroah-Hartman
On Tue, Jan 02, 2018 at 01:00:03PM -0600, Bjorn Helgaas wrote:
> [+cc Greg, linux-kernel]
> 
> Hi Max,
> 
> Thanks for the report!
> 
> On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote:
> > hi all,
> > I encountered a strange phenomena using 2 different pci drivers
> > (nvme and mlx5_core) since 4.15-rc1:
> > when I try to unload the modules using "modprobe -r" cmd it calls
> > the .probe function right after calling the .remove function and the
> > module is not realy unloaded.
> > I think there is some race condition because when I added a
> > msleep(1000) after "pci_unregister_driver(_driver);" (in the
> > nvme module testing, it also worked in the mlx5_core), the issue
> > seems to dissapear.
> 
> You say "since 4.15-rc1".  Does that mean it's a regression?  If so,
> what's the most recent kernel that does not have this problem?  Worst
> case, you could bisect to find where it broke.
> 
> I don't see anything obvious in the drivers/pci changes between v4.14
> and v4.15-rc1.  Module loading and driver binding is mostly driven by
> the driver core and udev.  Maybe you could learn something with
> "udevadm monitor" or by turning on the some of the debug in
> lib/kobject_uevent.c?


This should be resolved in 4.15-rc6, there was a regression in -rc1 in
this area when dealing with uevents over netlink.

Max, can you test -rc6 to verify if this is really fixed or not?

thanks,

greg k-h


Re: [PATCH] Nokia N9: add support for up/down keys in the dts

2018-01-02 Thread Sebastian Reichel
Hi,

On Tue, Jan 02, 2018 at 01:59:48PM +0100, Pavel Machek wrote:
> 
> This adds support for volume up/down keys in the dts.
> 
> Signed-off-by: Pavel Machek 

Reviewed-by: Sebastian Reichel 

-- Sebastian

> 
> diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts
> index 39e35f8..57a6679 100644
> --- a/arch/arm/boot/dts/omap3-n9.dts
> +++ b/arch/arm/boot/dts/omap3-n9.dts
> @@ -11,9 +11,10 @@
>  /dts-v1/;
>  
>  #include "omap3-n950-n9.dtsi"
> +#include 
>  
>  / {
>   model = "Nokia N9";
>   compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3";
>  };
>  
> @@ -72,3 +133,9 @@
>   st,max-limit-y = <46>;
>   st,max-limit-z = <46>;
>  };
> +
> +_keypad {
> + linux,keymap = < MATRIX_KEY(6, 8, KEY_VOLUMEUP)
> +  MATRIX_KEY(7, 8, KEY_VOLUMEDOWN)
> +  >;
> +};
> 
> 
> -- 
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) 
> http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html




signature.asc
Description: PGP signature


Re: pci driver loads right after unload

2018-01-02 Thread Greg Kroah-Hartman
On Tue, Jan 02, 2018 at 01:00:03PM -0600, Bjorn Helgaas wrote:
> [+cc Greg, linux-kernel]
> 
> Hi Max,
> 
> Thanks for the report!
> 
> On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote:
> > hi all,
> > I encountered a strange phenomena using 2 different pci drivers
> > (nvme and mlx5_core) since 4.15-rc1:
> > when I try to unload the modules using "modprobe -r" cmd it calls
> > the .probe function right after calling the .remove function and the
> > module is not realy unloaded.
> > I think there is some race condition because when I added a
> > msleep(1000) after "pci_unregister_driver(_driver);" (in the
> > nvme module testing, it also worked in the mlx5_core), the issue
> > seems to dissapear.
> 
> You say "since 4.15-rc1".  Does that mean it's a regression?  If so,
> what's the most recent kernel that does not have this problem?  Worst
> case, you could bisect to find where it broke.
> 
> I don't see anything obvious in the drivers/pci changes between v4.14
> and v4.15-rc1.  Module loading and driver binding is mostly driven by
> the driver core and udev.  Maybe you could learn something with
> "udevadm monitor" or by turning on the some of the debug in
> lib/kobject_uevent.c?


This should be resolved in 4.15-rc6, there was a regression in -rc1 in
this area when dealing with uevents over netlink.

Max, can you test -rc6 to verify if this is really fixed or not?

thanks,

greg k-h


Re: [PATCH] Nokia N9: add support for up/down keys in the dts

2018-01-02 Thread Sebastian Reichel
Hi,

On Tue, Jan 02, 2018 at 01:59:48PM +0100, Pavel Machek wrote:
> 
> This adds support for volume up/down keys in the dts.
> 
> Signed-off-by: Pavel Machek 

Reviewed-by: Sebastian Reichel 

-- Sebastian

> 
> diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts
> index 39e35f8..57a6679 100644
> --- a/arch/arm/boot/dts/omap3-n9.dts
> +++ b/arch/arm/boot/dts/omap3-n9.dts
> @@ -11,9 +11,10 @@
>  /dts-v1/;
>  
>  #include "omap3-n950-n9.dtsi"
> +#include 
>  
>  / {
>   model = "Nokia N9";
>   compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3";
>  };
>  
> @@ -72,3 +133,9 @@
>   st,max-limit-y = <46>;
>   st,max-limit-z = <46>;
>  };
> +
> +_keypad {
> + linux,keymap = < MATRIX_KEY(6, 8, KEY_VOLUMEUP)
> +  MATRIX_KEY(7, 8, KEY_VOLUMEDOWN)
> +  >;
> +};
> 
> 
> -- 
> (english) http://www.livejournal.com/~pavelmachek
> (cesky, pictures) 
> http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html




signature.asc
Description: PGP signature


Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)

2018-01-02 Thread Chris Wilson
Quoting Rodrigo Vivi (2018-01-02 19:21:08)
> On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote:
> > On Sat, 30 Dec 2017, Jiri Kosina wrote:
> > 
> > > Seems like disabling RC6 on the kernel command line works this around, 
> > > and 
> > > I can dock / undock several times in a row with the image always coming 
> > > up properly on the external display.
> > > 
> > > On the first undock, the WARN_ONCE() below triggers, so I believe each 
> > > undock leaks memory.
> > > 
> > > [   38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, 
> > > pin_global=0
> > > [   38.755138] WARNING: CPU: 3 PID: 96 at 
> > > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 
> > > [i915]

Not a leak, just a warning the shadow pte are stale due to someone
pinning a (gfx) vma.

> > OK, I am seeing this warning with current Linus' tree (5aa90a845) even 
> > without any attempt to dock/undock, so it's probably unrelated to external 
> > outputs and it only by coincidence appeared originally at the same time I 
> > docked the machine.
> > 
> > So there are two separate issues on this machine with latest kernel 
> > (neither of them probably being regression):
> > 
> > - I have to disable i915 RC6 at the kernel cmdline, otherwise external 
> >   (dock) display gets output only randomly (seems like always only on 
> >   first dock)
> 
> Joonas, Chris, time to bring rc6_enable back on next-fixes before we
> remove this support entirely?

No. It's precisely because of this mistake (thinking that rc6 has
anything to do with the issue when it's the consequential shotgun
disabling of rpm) that the modparam is not coming back.
-Chris


Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)

2018-01-02 Thread Chris Wilson
Quoting Rodrigo Vivi (2018-01-02 19:21:08)
> On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote:
> > On Sat, 30 Dec 2017, Jiri Kosina wrote:
> > 
> > > Seems like disabling RC6 on the kernel command line works this around, 
> > > and 
> > > I can dock / undock several times in a row with the image always coming 
> > > up properly on the external display.
> > > 
> > > On the first undock, the WARN_ONCE() below triggers, so I believe each 
> > > undock leaks memory.
> > > 
> > > [   38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, 
> > > pin_global=0
> > > [   38.755138] WARNING: CPU: 3 PID: 96 at 
> > > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 
> > > [i915]

Not a leak, just a warning the shadow pte are stale due to someone
pinning a (gfx) vma.

> > OK, I am seeing this warning with current Linus' tree (5aa90a845) even 
> > without any attempt to dock/undock, so it's probably unrelated to external 
> > outputs and it only by coincidence appeared originally at the same time I 
> > docked the machine.
> > 
> > So there are two separate issues on this machine with latest kernel 
> > (neither of them probably being regression):
> > 
> > - I have to disable i915 RC6 at the kernel cmdline, otherwise external 
> >   (dock) display gets output only randomly (seems like always only on 
> >   first dock)
> 
> Joonas, Chris, time to bring rc6_enable back on next-fixes before we
> remove this support entirely?

No. It's precisely because of this mistake (thinking that rc6 has
anything to do with the issue when it's the consequential shotgun
disabling of rpm) that the modparam is not coming back.
-Chris


Re: [PATCH 2/2] serial: imx: fix endless loop during suspend

2018-01-02 Thread Fabio Estevam
Hi Martin,

On Tue, Jan 2, 2018 at 2:15 PM, Martin Kaiser  wrote:

> Fabio, could you post the output of
>
> cat /sys/kernel/debug/suspend_stats
>
> after supend failed, to confirm that we're failing below
> device_suspend_noirq()?

Here it goes:

# cat /sys/kernel/debug/suspend_stats
success: 0
fail: 1
failed_freeze: 0
failed_prepare: 0
failed_suspend: 0
failed_suspend_late: 0
failed_suspend_noirq: 1
failed_resume: 0
failed_resume_early: 0
failed_resume_noirq: 0
failures:
  last_failed_dev:

  last_failed_errno:-16
0
  last_failed_step: suspend_noirq


Re: [PATCH 2/2] serial: imx: fix endless loop during suspend

2018-01-02 Thread Fabio Estevam
Hi Martin,

On Tue, Jan 2, 2018 at 2:15 PM, Martin Kaiser  wrote:

> Fabio, could you post the output of
>
> cat /sys/kernel/debug/suspend_stats
>
> after supend failed, to confirm that we're failing below
> device_suspend_noirq()?

Here it goes:

# cat /sys/kernel/debug/suspend_stats
success: 0
fail: 1
failed_freeze: 0
failed_prepare: 0
failed_suspend: 0
failed_suspend_late: 0
failed_suspend_noirq: 1
failed_resume: 0
failed_resume_early: 0
failed_resume_noirq: 0
failures:
  last_failed_dev:

  last_failed_errno:-16
0
  last_failed_step: suspend_noirq


Re: [PATCH 0/3] Ktest: add email support

2018-01-02 Thread Steven Rostedt
On Tue, 2 Jan 2018 11:08:00 -0800
Tim Tianyang Chen  wrote:

> Hi Steve, did your mailer find all the patches? I made sure they all 
> reply to the same mail ID this time.
> 

Yes, sorry due to end of year work, these were put on the back burner.

I'll see if I can get to them sometime this week.

-- Steve


Re: [PATCH 0/3] Ktest: add email support

2018-01-02 Thread Steven Rostedt
On Tue, 2 Jan 2018 11:08:00 -0800
Tim Tianyang Chen  wrote:

> Hi Steve, did your mailer find all the patches? I made sure they all 
> reply to the same mail ID this time.
> 

Yes, sorry due to end of year work, these were put on the back burner.

I'll see if I can get to them sometime this week.

-- Steve


Re: [PATCH net-next v7 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events

2018-01-02 Thread David Miller
From: Masami Hiramatsu 
Date: Fri, 29 Dec 2017 11:45:20 +0900

> This series is v7 of the replacement of jprobe usage with trace
> events. This version fixes net/dccp/trace.h to avoid sparse
> warning. Since the TP_STORE_ADDR_PORTS macro can be shared
> with trace/events/tcp.h, it also introduce a new common header
> file and move the definition of that macro.
> 
> Previous version is here;
>  https://lkml.org/lkml/2017/12/28/7
> 
> Changes from v6:
>   [5/6]: Avoid preprocessor directives in tracepoint macro args

Series applied, thank you.


Re: [PATCH net-next v7 0/6] net: tcp: sctp: dccp: Replace jprobe usage with trace events

2018-01-02 Thread David Miller
From: Masami Hiramatsu 
Date: Fri, 29 Dec 2017 11:45:20 +0900

> This series is v7 of the replacement of jprobe usage with trace
> events. This version fixes net/dccp/trace.h to avoid sparse
> warning. Since the TP_STORE_ADDR_PORTS macro can be shared
> with trace/events/tcp.h, it also introduce a new common header
> file and move the definition of that macro.
> 
> Previous version is here;
>  https://lkml.org/lkml/2017/12/28/7
> 
> Changes from v6:
>   [5/6]: Avoid preprocessor directives in tracepoint macro args

Series applied, thank you.


Re: [PATCH v4] f2fs: add reserved blocks for root user

2018-01-02 Thread Jaegeuk Kim
This patch allows root to reserve some blocks via mount option.

"-o reserve_root=N" means N x 4KB-sized blocks for root only.

Signed-off-by: Jaegeuk Kim 
---

Change log from v3:
 - fix 0.2% calculation
 - preserve reserve_root=%u from remount_fs

 fs/f2fs/f2fs.h  | 26 ++
 fs/f2fs/super.c | 35 ++-
 fs/f2fs/sysfs.c |  3 ++-
 3 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5f7f42267221..9dde05c62b1c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_PRJQUOTA0x0020
 #define F2FS_MOUNT_QUOTA   0x0040
 #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x0080
+#define F2FS_MOUNT_RESERVE_ROOT0x0100
 
 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)   ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -1110,6 +,7 @@ struct f2fs_sb_info {
block_t last_valid_block_count; /* for recovery */
block_t reserved_blocks;/* configurable reserved blocks 
*/
block_t current_reserved_blocks;/* current reserved blocks */
+   block_t root_reserved_blocks;   /* root reserved blocks */
 
unsigned int nquota_files;  /* # of quota sysfile */
 
@@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
return ofs == XATTR_NODE_OFFSET;
 }
 
+static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi)
+{
+   /* limit is 0.2% */
+   return (sbi->user_block_count << 1) / 1000;
+}
+
 static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
 static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 struct inode *inode, blkcnt_t *count)
@@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct 
f2fs_sb_info *sbi,
sbi->total_valid_block_count += (block_t)(*count);
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
+
+   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
+   avail_user_block_count -= sbi->root_reserved_blocks;
+
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
+   if (diff > *count)
+   diff = *count;
*count -= diff;
release = diff;
-   sbi->total_valid_block_count = avail_user_block_count;
+   sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(>stat_lock);
percpu_counter_sub(>alloc_valid_block_count, diff);
@@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct 
f2fs_sb_info *sbi,
 
spin_lock(>stat_lock);
 
-   valid_block_count = sbi->total_valid_block_count + 1;
-   if (unlikely(valid_block_count + sbi->current_reserved_blocks >
-   sbi->user_block_count)) {
+   valid_block_count = sbi->total_valid_block_count +
+   sbi->current_reserved_blocks + 1;
+
+   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
+   valid_block_count += sbi->root_reserved_blocks;
+
+   if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(>stat_lock);
goto enospc;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index cb876d905ca5..3c62492b6a0d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -107,6 +107,7 @@ enum {
Opt_noextent_cache,
Opt_noinline_data,
Opt_data_flush,
+   Opt_reserve_root,
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
@@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = {
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
+   {Opt_reserve_root, "reserve_root=%u"},
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
@@ -488,6 +490,18 @@ static int parse_options(struct super_block *sb, char 
*options)
case Opt_data_flush:
set_opt(sbi, DATA_FLUSH);
break;
+   case Opt_reserve_root:
+   if (args->from && match_int(args, ))
+   return -EINVAL;
+   if (test_opt(sbi, RESERVE_ROOT)) {
+   f2fs_msg(sb, KERN_INFO,
+   "Preserve previous reserve_root=%u",
+   sbi->root_reserved_blocks);
+  

Re: [PATCH v4] f2fs: add reserved blocks for root user

2018-01-02 Thread Jaegeuk Kim
This patch allows root to reserve some blocks via mount option.

"-o reserve_root=N" means N x 4KB-sized blocks for root only.

Signed-off-by: Jaegeuk Kim 
---

Change log from v3:
 - fix 0.2% calculation
 - preserve reserve_root=%u from remount_fs

 fs/f2fs/f2fs.h  | 26 ++
 fs/f2fs/super.c | 35 ++-
 fs/f2fs/sysfs.c |  3 ++-
 3 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5f7f42267221..9dde05c62b1c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX];
 #define F2FS_MOUNT_PRJQUOTA0x0020
 #define F2FS_MOUNT_QUOTA   0x0040
 #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x0080
+#define F2FS_MOUNT_RESERVE_ROOT0x0100
 
 #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)   ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -1110,6 +,7 @@ struct f2fs_sb_info {
block_t last_valid_block_count; /* for recovery */
block_t reserved_blocks;/* configurable reserved blocks 
*/
block_t current_reserved_blocks;/* current reserved blocks */
+   block_t root_reserved_blocks;   /* root reserved blocks */
 
unsigned int nquota_files;  /* # of quota sysfile */
 
@@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs)
return ofs == XATTR_NODE_OFFSET;
 }
 
+static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi)
+{
+   /* limit is 0.2% */
+   return (sbi->user_block_count << 1) / 1000;
+}
+
 static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool);
 static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 struct inode *inode, blkcnt_t *count)
@@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct 
f2fs_sb_info *sbi,
sbi->total_valid_block_count += (block_t)(*count);
avail_user_block_count = sbi->user_block_count -
sbi->current_reserved_blocks;
+
+   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
+   avail_user_block_count -= sbi->root_reserved_blocks;
+
if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
diff = sbi->total_valid_block_count - avail_user_block_count;
+   if (diff > *count)
+   diff = *count;
*count -= diff;
release = diff;
-   sbi->total_valid_block_count = avail_user_block_count;
+   sbi->total_valid_block_count -= diff;
if (!*count) {
spin_unlock(>stat_lock);
percpu_counter_sub(>alloc_valid_block_count, diff);
@@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct 
f2fs_sb_info *sbi,
 
spin_lock(>stat_lock);
 
-   valid_block_count = sbi->total_valid_block_count + 1;
-   if (unlikely(valid_block_count + sbi->current_reserved_blocks >
-   sbi->user_block_count)) {
+   valid_block_count = sbi->total_valid_block_count +
+   sbi->current_reserved_blocks + 1;
+
+   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
+   valid_block_count += sbi->root_reserved_blocks;
+
+   if (unlikely(valid_block_count > sbi->user_block_count)) {
spin_unlock(>stat_lock);
goto enospc;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index cb876d905ca5..3c62492b6a0d 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -107,6 +107,7 @@ enum {
Opt_noextent_cache,
Opt_noinline_data,
Opt_data_flush,
+   Opt_reserve_root,
Opt_mode,
Opt_io_size_bits,
Opt_fault_injection,
@@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = {
{Opt_noextent_cache, "noextent_cache"},
{Opt_noinline_data, "noinline_data"},
{Opt_data_flush, "data_flush"},
+   {Opt_reserve_root, "reserve_root=%u"},
{Opt_mode, "mode=%s"},
{Opt_io_size_bits, "io_bits=%u"},
{Opt_fault_injection, "fault_injection=%u"},
@@ -488,6 +490,18 @@ static int parse_options(struct super_block *sb, char 
*options)
case Opt_data_flush:
set_opt(sbi, DATA_FLUSH);
break;
+   case Opt_reserve_root:
+   if (args->from && match_int(args, ))
+   return -EINVAL;
+   if (test_opt(sbi, RESERVE_ROOT)) {
+   f2fs_msg(sb, KERN_INFO,
+   "Preserve previous reserve_root=%u",
+   sbi->root_reserved_blocks);
+  

Re: [f2fs-dev] [PATCH v3] f2fs: add reserved blocks for root user

2018-01-02 Thread Jaegeuk Kim
On 01/02, Chao Yu wrote:
> On 2018/1/1 9:29, Jaegeuk Kim wrote:
> > This patch allows root to reserve some blocks via mount option.
> > 
> > "-o reserve_root=N" means N x 4KB-sized blocks for root only.
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> > 
> > Change log from v2:
> >  - wrong submission. :P
> > 
> >  fs/f2fs/f2fs.h  | 26 ++
> >  fs/f2fs/super.c | 26 +++---
> >  fs/f2fs/sysfs.c |  3 ++-
> >  3 files changed, 47 insertions(+), 8 deletions(-)
> > 
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5f7f42267221..123d875f7293 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX];
> >  #define F2FS_MOUNT_PRJQUOTA0x0020
> >  #define F2FS_MOUNT_QUOTA   0x0040
> >  #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x0080
> > +#define F2FS_MOUNT_RESERVE_ROOT0x0100
> >  
> >  #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= 
> > ~F2FS_MOUNT_##option)
> >  #define set_opt(sbi, option)   ((sbi)->mount_opt.opt |= 
> > F2FS_MOUNT_##option)
> > @@ -1110,6 +,7 @@ struct f2fs_sb_info {
> > block_t last_valid_block_count; /* for recovery */
> > block_t reserved_blocks;/* configurable reserved blocks 
> > */
> > block_t current_reserved_blocks;/* current reserved blocks */
> > +   block_t root_reserved_blocks;   /* root reserved blocks */
> >  
> > unsigned int nquota_files;  /* # of quota sysfile */
> >  
> > @@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int 
> > ofs)
> > return ofs == XATTR_NODE_OFFSET;
> >  }
> >  
> > +static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi)
> > +{
> > +   /* limit is 0.2% */
> 
> Should be 2% according to below calculation?

Oh, right. Fixed.

> 
> > +   return (sbi->user_block_count << 1) / 100;
> > +}
> > +
> >  static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, 
> > bool);
> >  static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
> >  struct inode *inode, blkcnt_t *count)
> > @@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct 
> > f2fs_sb_info *sbi,
> > sbi->total_valid_block_count += (block_t)(*count);
> > avail_user_block_count = sbi->user_block_count -
> > sbi->current_reserved_blocks;
> > +
> > +   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
> 
> How about adding uid & gid verification also like ext4?

Again, that's another feature which requires a mount option. I think it'd be
better to add that, once we have a use-case.

> As this is a mount option, in ->remount_fs, we should consider to recover
> original reserved block number if we encounter some error during remount.

Yup, agreed.

Thanks,

> 
> Thanks,
> 
> > +   avail_user_block_count -= sbi->root_reserved_blocks;
> > +
> > if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
> > diff = sbi->total_valid_block_count - avail_user_block_count;
> > +   if (diff > *count)
> > +   diff = *count;
> > *count -= diff;
> > release = diff;
> > -   sbi->total_valid_block_count = avail_user_block_count;
> > +   sbi->total_valid_block_count -= diff;
> > if (!*count) {
> > spin_unlock(>stat_lock);
> > percpu_counter_sub(>alloc_valid_block_count, diff);
> > @@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct 
> > f2fs_sb_info *sbi,
> >  
> > spin_lock(>stat_lock);
> >  
> > -   valid_block_count = sbi->total_valid_block_count + 1;
> > -   if (unlikely(valid_block_count + sbi->current_reserved_blocks >
> > -   sbi->user_block_count)) {
> > +   valid_block_count = sbi->total_valid_block_count +
> > +   sbi->current_reserved_blocks + 1;
> > +
> > +   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
> > +   valid_block_count += sbi->root_reserved_blocks;
> > +
> > +   if (unlikely(valid_block_count > sbi->user_block_count)) {
> > spin_unlock(>stat_lock);
> > goto enospc;
> > }
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index cb876d905ca5..9221b013db98 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -107,6 +107,7 @@ enum {
> > Opt_noextent_cache,
> > Opt_noinline_data,
> > Opt_data_flush,
> > +   Opt_reserve_root,
> > Opt_mode,
> > Opt_io_size_bits,
> > Opt_fault_injection,
> > @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = {
> > {Opt_noextent_cache, "noextent_cache"},
> > {Opt_noinline_data, "noinline_data"},
> > {Opt_data_flush, "data_flush"},
> > +   {Opt_reserve_root, "reserve_root=%u"},
> > {Opt_mode, 

Re: [f2fs-dev] [PATCH v3] f2fs: add reserved blocks for root user

2018-01-02 Thread Jaegeuk Kim
On 01/02, Chao Yu wrote:
> On 2018/1/1 9:29, Jaegeuk Kim wrote:
> > This patch allows root to reserve some blocks via mount option.
> > 
> > "-o reserve_root=N" means N x 4KB-sized blocks for root only.
> > 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> > 
> > Change log from v2:
> >  - wrong submission. :P
> > 
> >  fs/f2fs/f2fs.h  | 26 ++
> >  fs/f2fs/super.c | 26 +++---
> >  fs/f2fs/sysfs.c |  3 ++-
> >  3 files changed, 47 insertions(+), 8 deletions(-)
> > 
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index 5f7f42267221..123d875f7293 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -95,6 +95,7 @@ extern char *fault_name[FAULT_MAX];
> >  #define F2FS_MOUNT_PRJQUOTA0x0020
> >  #define F2FS_MOUNT_QUOTA   0x0040
> >  #define F2FS_MOUNT_INLINE_XATTR_SIZE   0x0080
> > +#define F2FS_MOUNT_RESERVE_ROOT0x0100
> >  
> >  #define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= 
> > ~F2FS_MOUNT_##option)
> >  #define set_opt(sbi, option)   ((sbi)->mount_opt.opt |= 
> > F2FS_MOUNT_##option)
> > @@ -1110,6 +,7 @@ struct f2fs_sb_info {
> > block_t last_valid_block_count; /* for recovery */
> > block_t reserved_blocks;/* configurable reserved blocks 
> > */
> > block_t current_reserved_blocks;/* current reserved blocks */
> > +   block_t root_reserved_blocks;   /* root reserved blocks */
> >  
> > unsigned int nquota_files;  /* # of quota sysfile */
> >  
> > @@ -1562,6 +1564,12 @@ static inline bool f2fs_has_xattr_block(unsigned int 
> > ofs)
> > return ofs == XATTR_NODE_OFFSET;
> >  }
> >  
> > +static inline block_t reserve_root_limit(struct f2fs_sb_info *sbi)
> > +{
> > +   /* limit is 0.2% */
> 
> Should be 2% according to below calculation?

Oh, right. Fixed.

> 
> > +   return (sbi->user_block_count << 1) / 100;
> > +}
> > +
> >  static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, 
> > bool);
> >  static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
> >  struct inode *inode, blkcnt_t *count)
> > @@ -1591,11 +1599,17 @@ static inline int inc_valid_block_count(struct 
> > f2fs_sb_info *sbi,
> > sbi->total_valid_block_count += (block_t)(*count);
> > avail_user_block_count = sbi->user_block_count -
> > sbi->current_reserved_blocks;
> > +
> > +   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
> 
> How about adding uid & gid verification also like ext4?

Again, that's another feature which requires a mount option. I think it'd be
better to add that, once we have a use-case.

> As this is a mount option, in ->remount_fs, we should consider to recover
> original reserved block number if we encounter some error during remount.

Yup, agreed.

Thanks,

> 
> Thanks,
> 
> > +   avail_user_block_count -= sbi->root_reserved_blocks;
> > +
> > if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
> > diff = sbi->total_valid_block_count - avail_user_block_count;
> > +   if (diff > *count)
> > +   diff = *count;
> > *count -= diff;
> > release = diff;
> > -   sbi->total_valid_block_count = avail_user_block_count;
> > +   sbi->total_valid_block_count -= diff;
> > if (!*count) {
> > spin_unlock(>stat_lock);
> > percpu_counter_sub(>alloc_valid_block_count, diff);
> > @@ -1784,9 +1798,13 @@ static inline int inc_valid_node_count(struct 
> > f2fs_sb_info *sbi,
> >  
> > spin_lock(>stat_lock);
> >  
> > -   valid_block_count = sbi->total_valid_block_count + 1;
> > -   if (unlikely(valid_block_count + sbi->current_reserved_blocks >
> > -   sbi->user_block_count)) {
> > +   valid_block_count = sbi->total_valid_block_count +
> > +   sbi->current_reserved_blocks + 1;
> > +
> > +   if (!(test_opt(sbi, RESERVE_ROOT) && capable(CAP_SYS_RESOURCE)))
> > +   valid_block_count += sbi->root_reserved_blocks;
> > +
> > +   if (unlikely(valid_block_count > sbi->user_block_count)) {
> > spin_unlock(>stat_lock);
> > goto enospc;
> > }
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index cb876d905ca5..9221b013db98 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -107,6 +107,7 @@ enum {
> > Opt_noextent_cache,
> > Opt_noinline_data,
> > Opt_data_flush,
> > +   Opt_reserve_root,
> > Opt_mode,
> > Opt_io_size_bits,
> > Opt_fault_injection,
> > @@ -157,6 +158,7 @@ static match_table_t f2fs_tokens = {
> > {Opt_noextent_cache, "noextent_cache"},
> > {Opt_noinline_data, "noinline_data"},
> > {Opt_data_flush, "data_flush"},
> > +   {Opt_reserve_root, "reserve_root=%u"},
> > {Opt_mode, "mode=%s"},
> > 

Re: [Intel-gfx] [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read

2018-01-02 Thread Chris Wilson
Quoting Rodrigo Vivi (2018-01-02 19:12:18)
> On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote:
> > + edid = drm_get_edid(connector, i2c);
> > +
> > + if (!edid && !intel_gmbus_is_forced_bit(i2c)) {
> > + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO 
> > bit-banging\n");
> > + intel_gmbus_force_bit(i2c, true);
> > + edid = drm_get_edid(connector, i2c);
> > + intel_gmbus_force_bit(i2c, false);
> > + }
> 
> Approach seems fine for this case.
> I just wonder what would be the risks of forcing this bit and edid read when 
> nothing is present on the other end?

Should be no more risky than using GMBUS as the bit-banging is the
underlying HW protocol; it should just be adding an extra delay to
the disconnected probe. Offset against the chance that it fixes
detection of borderline devices.

I would say that given the explanation above, the question is why not
apply it universally? (Bonus points for including the explanation as
comments.)
-Chris


Re: [Intel-gfx] [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read

2018-01-02 Thread Chris Wilson
Quoting Rodrigo Vivi (2018-01-02 19:12:18)
> On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote:
> > + edid = drm_get_edid(connector, i2c);
> > +
> > + if (!edid && !intel_gmbus_is_forced_bit(i2c)) {
> > + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO 
> > bit-banging\n");
> > + intel_gmbus_force_bit(i2c, true);
> > + edid = drm_get_edid(connector, i2c);
> > + intel_gmbus_force_bit(i2c, false);
> > + }
> 
> Approach seems fine for this case.
> I just wonder what would be the risks of forcing this bit and edid read when 
> nothing is present on the other end?

Should be no more risky than using GMBUS as the bit-banging is the
underlying HW protocol; it should just be adding an extra delay to
the disconnected probe. Offset against the chance that it fixes
detection of borderline devices.

I would say that given the explanation above, the question is why not
apply it universally? (Bonus points for including the explanation as
comments.)
-Chris


Re: [PATCH v5] x86/microcode/intel: Blacklist the specific BDW-EP for late loading

2018-01-02 Thread Luck, Tony
On Mon, Jan 01, 2018 at 11:10:56PM +0100, Borislav Petkov wrote:
> On Mon, Jan 01, 2018 at 10:04:47AM +0800, Jia Zhang wrote:
> 
> Ok, I went and massaged your version, here's what I committed:
> 
> Signed-off-by: Jia Zhang 
> Acked-by: Tony Luck 
> Cc: x86-ml 
> Link: 
> http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue...@alibaba-inc.com
> [ Heavily massage commit message and pr_* statements. ]
> Signed-off-by: Borislav Petkov 

Should there be a "Cc: stable ..." in there?

The original patch that disables for all broadwell
seems to be geting applied to a bunch of older trees,
I think we want this one to chase after it.

Perhaps a

Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79")

too?

-Tony


Re: [PATCH v5] x86/microcode/intel: Blacklist the specific BDW-EP for late loading

2018-01-02 Thread Luck, Tony
On Mon, Jan 01, 2018 at 11:10:56PM +0100, Borislav Petkov wrote:
> On Mon, Jan 01, 2018 at 10:04:47AM +0800, Jia Zhang wrote:
> 
> Ok, I went and massaged your version, here's what I committed:
> 
> Signed-off-by: Jia Zhang 
> Acked-by: Tony Luck 
> Cc: x86-ml 
> Link: 
> http://lkml.kernel.org/r/1514772287-92959-1-git-send-email-qianyue...@alibaba-inc.com
> [ Heavily massage commit message and pr_* statements. ]
> Signed-off-by: Borislav Petkov 

Should there be a "Cc: stable ..." in there?

The original patch that disables for all broadwell
seems to be geting applied to a bunch of older trees,
I think we want this one to chase after it.

Perhaps a

Fixes: 723f2828a98c ("x86/microcode/intel: Disable late loading on model 79")

too?

-Tony


Re: [PATCH v2 0/6] wl1251: Fix MAC address for Nokia N900

2018-01-02 Thread Pali Rohár
On Friday 10 November 2017 00:38:22 Pali Rohár wrote:
> This patch series fix processing MAC address for wl1251 chip found in Nokia 
> N900.
> 
> Changes since v1:
> * Added Acked-by for Pavel Machek
> * Fixed grammar
> * Magic numbers for NVS offsets are replaced by defines
> * Check for validity of mac address NVS data is moved into function
> * Changed order of patches as Pavel requested
> 
> Pali Rohár (6):
>   wl1251: Update wl->nvs_len after wl->nvs is valid
>   wl1251: Generate random MAC address only if driver does not have
> valid
>   wl1251: Parse and use MAC address from supplied NVS data
>   wl1251: Set generated MAC address back to NVS data
>   firmware: Add request_firmware_prefer_user() function
>   wl1251: Use request_firmware_prefer_user() for loading NVS
> calibration data
> 
>  drivers/base/firmware_class.c  |   45 +-
>  drivers/net/wireless/ti/wl1251/Kconfig |1 +
>  drivers/net/wireless/ti/wl1251/main.c  |  104 
> ++--
>  include/linux/firmware.h   |9 +++
>  4 files changed, 138 insertions(+), 21 deletions(-)

Hi! Are there any comments for first 4 patches? If not, could they be
accepted and merged?

-- 
Pali Rohár
pali.ro...@gmail.com


Re: [PATCH v2 0/6] wl1251: Fix MAC address for Nokia N900

2018-01-02 Thread Pali Rohár
On Friday 10 November 2017 00:38:22 Pali Rohár wrote:
> This patch series fix processing MAC address for wl1251 chip found in Nokia 
> N900.
> 
> Changes since v1:
> * Added Acked-by for Pavel Machek
> * Fixed grammar
> * Magic numbers for NVS offsets are replaced by defines
> * Check for validity of mac address NVS data is moved into function
> * Changed order of patches as Pavel requested
> 
> Pali Rohár (6):
>   wl1251: Update wl->nvs_len after wl->nvs is valid
>   wl1251: Generate random MAC address only if driver does not have
> valid
>   wl1251: Parse and use MAC address from supplied NVS data
>   wl1251: Set generated MAC address back to NVS data
>   firmware: Add request_firmware_prefer_user() function
>   wl1251: Use request_firmware_prefer_user() for loading NVS
> calibration data
> 
>  drivers/base/firmware_class.c  |   45 +-
>  drivers/net/wireless/ti/wl1251/Kconfig |1 +
>  drivers/net/wireless/ti/wl1251/main.c  |  104 
> ++--
>  include/linux/firmware.h   |9 +++
>  4 files changed, 138 insertions(+), 21 deletions(-)

Hi! Are there any comments for first 4 patches? If not, could they be
accepted and merged?

-- 
Pali Rohár
pali.ro...@gmail.com


Re: [PATCH] clk: Fix debugfs_create_*() usage

2018-01-02 Thread Stephen Boyd
On 01/02, Geert Uytterhoeven wrote:
> When exposing data access through debugfs, the correct
> debugfs_create_*() functions must be used, depending on data type.
> 
> Remove all casts from data pointers passed to debugfs_create_*()
> functions, as such casts prevent the compiler from flagging bugs.
> 
> clk_core.rate, .accuracy, and .flags are "unsigned long", hence casting
> to "u32 *" exposed the wrong halves on big-endian 64-bit systems.
> 
> Fix .rate and .accuracy, by using debugfs_create_ulong() instead.
> 
> Fix .flags by changing the field to "unsigned int", as a change to
> debugfs_create_x64() on 64-bit systems would change the user-visible
> formatting in debugfs.
> Note that __clk_get_flags() and clk_hw_get_flags() are left unchanged
> and still return "unsigned long", to avoid having to change all their
> users.  Likewise, of_clk_detect_critical() still takes "unsigned long",
> but the comment is updated as it is never passed a real pointer to
> clk_core.flags.
> 
> Signed-off-by: Geert Uytterhoeven 
> ---
> Looks like none of the 64-bit architectures support common clock yet?

arm64 does.

> ---
>  drivers/clk/clk.c | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> index 5ec580914089510a..b23e0249f0e3c634 100644
> --- a/drivers/clk/clk.c
> +++ b/drivers/clk/clk.c
> @@ -58,7 +58,7 @@ struct clk_core {
>   unsigned long   new_rate;
>   struct clk_core *new_parent;
>   struct clk_core *new_child;
> - unsigned long   flags;
> + unsigned intflags;

This doesn't look good.

>   boolorphan;
>   unsigned intenable_count;
>   unsigned intprepare_count;
> @@ -2600,43 +2600,43 @@ static int clk_debug_create_one(struct clk_core 
> *core, struct dentry *pdentry)
>  
>   core->dentry = d;
>  
> - d = debugfs_create_u32("clk_rate", S_IRUGO, core->dentry,
> - (u32 *)>rate);
> + d = debugfs_create_ulong("clk_rate", S_IRUGO, core->dentry,
> +  >rate);

As you're changing these lines, can you also change S_IRUGO to
the octal values. That's the preferred style now.

>   if (!d)
>   goto err_out;
>  
> - d = debugfs_create_u32("clk_accuracy", S_IRUGO, core->dentry,
> - (u32 *)>accuracy);
> + d = debugfs_create_ulong("clk_accuracy", S_IRUGO, core->dentry,
> +  >accuracy);
>   if (!d)
>   goto err_out;
>  
>   d = debugfs_create_u32("clk_phase", S_IRUGO, core->dentry,
> - (u32 *)>phase);
> +>phase);
>   if (!d)
>   goto err_out;
>  
>   d = debugfs_create_x32("clk_flags", S_IRUGO, core->dentry,
> - (u32 *)>flags);
> +>flags);

Maybe we need a new debugfs API like debugfs_create_ulong_hex()
or something that prints out an unsigned long as a hex value?
Probably we should change it to pretty print the values and what
they correspond to, with words, because that's the least
confusing thing to do with regards to endianness. So the
clk_flags file would have something like

CLK_SET_RATE_PARENT
CLK_SET_RATE_GATE

if those flags are set.

We don't care about ABI here either. This is debugfs.

> @@ -3927,7 +3927,7 @@ static int parent_ready(struct device_node *np)
>   * of_clk_detect_critical() - set CLK_IS_CRITICAL flag from Device Tree
>   * @np: Device node pointer associated with clock provider
>   * @index: clock index
> - * @flags: pointer to clk_core->flags
> + * @flags: pointer to core clock flags

Please split this off into another patch.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH] clk: Fix debugfs_create_*() usage

2018-01-02 Thread Stephen Boyd
On 01/02, Geert Uytterhoeven wrote:
> When exposing data access through debugfs, the correct
> debugfs_create_*() functions must be used, depending on data type.
> 
> Remove all casts from data pointers passed to debugfs_create_*()
> functions, as such casts prevent the compiler from flagging bugs.
> 
> clk_core.rate, .accuracy, and .flags are "unsigned long", hence casting
> to "u32 *" exposed the wrong halves on big-endian 64-bit systems.
> 
> Fix .rate and .accuracy, by using debugfs_create_ulong() instead.
> 
> Fix .flags by changing the field to "unsigned int", as a change to
> debugfs_create_x64() on 64-bit systems would change the user-visible
> formatting in debugfs.
> Note that __clk_get_flags() and clk_hw_get_flags() are left unchanged
> and still return "unsigned long", to avoid having to change all their
> users.  Likewise, of_clk_detect_critical() still takes "unsigned long",
> but the comment is updated as it is never passed a real pointer to
> clk_core.flags.
> 
> Signed-off-by: Geert Uytterhoeven 
> ---
> Looks like none of the 64-bit architectures support common clock yet?

arm64 does.

> ---
>  drivers/clk/clk.c | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
> index 5ec580914089510a..b23e0249f0e3c634 100644
> --- a/drivers/clk/clk.c
> +++ b/drivers/clk/clk.c
> @@ -58,7 +58,7 @@ struct clk_core {
>   unsigned long   new_rate;
>   struct clk_core *new_parent;
>   struct clk_core *new_child;
> - unsigned long   flags;
> + unsigned intflags;

This doesn't look good.

>   boolorphan;
>   unsigned intenable_count;
>   unsigned intprepare_count;
> @@ -2600,43 +2600,43 @@ static int clk_debug_create_one(struct clk_core 
> *core, struct dentry *pdentry)
>  
>   core->dentry = d;
>  
> - d = debugfs_create_u32("clk_rate", S_IRUGO, core->dentry,
> - (u32 *)>rate);
> + d = debugfs_create_ulong("clk_rate", S_IRUGO, core->dentry,
> +  >rate);

As you're changing these lines, can you also change S_IRUGO to
the octal values. That's the preferred style now.

>   if (!d)
>   goto err_out;
>  
> - d = debugfs_create_u32("clk_accuracy", S_IRUGO, core->dentry,
> - (u32 *)>accuracy);
> + d = debugfs_create_ulong("clk_accuracy", S_IRUGO, core->dentry,
> +  >accuracy);
>   if (!d)
>   goto err_out;
>  
>   d = debugfs_create_u32("clk_phase", S_IRUGO, core->dentry,
> - (u32 *)>phase);
> +>phase);
>   if (!d)
>   goto err_out;
>  
>   d = debugfs_create_x32("clk_flags", S_IRUGO, core->dentry,
> - (u32 *)>flags);
> +>flags);

Maybe we need a new debugfs API like debugfs_create_ulong_hex()
or something that prints out an unsigned long as a hex value?
Probably we should change it to pretty print the values and what
they correspond to, with words, because that's the least
confusing thing to do with regards to endianness. So the
clk_flags file would have something like

CLK_SET_RATE_PARENT
CLK_SET_RATE_GATE

if those flags are set.

We don't care about ABI here either. This is debugfs.

> @@ -3927,7 +3927,7 @@ static int parent_ready(struct device_node *np)
>   * of_clk_detect_critical() - set CLK_IS_CRITICAL flag from Device Tree
>   * @np: Device node pointer associated with clock provider
>   * @index: clock index
> - * @flags: pointer to clk_core->flags
> + * @flags: pointer to core clock flags

Please split this off into another patch.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)

2018-01-02 Thread Rodrigo Vivi
On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote:
> On Sat, 30 Dec 2017, Jiri Kosina wrote:
> 
> > Seems like disabling RC6 on the kernel command line works this around, and 
> > I can dock / undock several times in a row with the image always coming 
> > up properly on the external display.
> > 
> > On the first undock, the WARN_ONCE() below triggers, so I believe each 
> > undock leaks memory.
> > 
> > [   38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, 
> > pin_global=0
> > [   38.755138] WARNING: CPU: 3 PID: 96 at 
> > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 
> > [i915]
> 
> OK, I am seeing this warning with current Linus' tree (5aa90a845) even 
> without any attempt to dock/undock, so it's probably unrelated to external 
> outputs and it only by coincidence appeared originally at the same time I 
> docked the machine.
> 
> So there are two separate issues on this machine with latest kernel 
> (neither of them probably being regression):
> 
> - I have to disable i915 RC6 at the kernel cmdline, otherwise external 
>   (dock) display gets output only randomly (seems like always only on 
>   first dock)

Joonas, Chris, time to bring rc6_enable back on next-fixes before we
remove this support entirely?

> 
> - the warning, which triggers at not really deterministic time after boot, 
>   but usually rather quickly

Jiri, could you please report these issues separately on bugs.freedesktop.org?
Are them regressions? Possible bisect?
Please attach the dmesg booting with drm.debug=0x1e

> 
> -- 
> Jiri Kosina
> SUSE Labs
> 
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] Graphics on thinkpad x270 after dock/undock works only for the first time (CPU pipe B FIFO underrun)

2018-01-02 Thread Rodrigo Vivi
On Sat, Dec 30, 2017 at 12:53:58PM +, Jiri Kosina wrote:
> On Sat, 30 Dec 2017, Jiri Kosina wrote:
> 
> > Seems like disabling RC6 on the kernel command line works this around, and 
> > I can dock / undock several times in a row with the image always coming 
> > up properly on the external display.
> > 
> > On the first undock, the WARN_ONCE() below triggers, so I believe each 
> > undock leaks memory.
> > 
> > [   38.755084] Failed to release pages: bind_count=1, pages_pin_count=1, 
> > pin_global=0
> > [   38.755138] WARNING: CPU: 3 PID: 96 at 
> > ../drivers/gpu/drm/i915/i915_gem_userptr.c:89 cancel_userptr+0xe5/0xf0 
> > [i915]
> 
> OK, I am seeing this warning with current Linus' tree (5aa90a845) even 
> without any attempt to dock/undock, so it's probably unrelated to external 
> outputs and it only by coincidence appeared originally at the same time I 
> docked the machine.
> 
> So there are two separate issues on this machine with latest kernel 
> (neither of them probably being regression):
> 
> - I have to disable i915 RC6 at the kernel cmdline, otherwise external 
>   (dock) display gets output only randomly (seems like always only on 
>   first dock)

Joonas, Chris, time to bring rc6_enable back on next-fixes before we
remove this support entirely?

> 
> - the warning, which triggers at not really deterministic time after boot, 
>   but usually rather quickly

Jiri, could you please report these issues separately on bugs.freedesktop.org?
Are them regressions? Possible bisect?
Please attach the dmesg booting with drm.debug=0x1e

> 
> -- 
> Jiri Kosina
> SUSE Labs
> 
> ___
> Intel-gfx mailing list
> intel-...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [PATCH 2/3] dt-bindings: mtd: atmel-quadspi: add an optional property 'dmacap,memcpy'

2018-01-02 Thread Trent Piepho
On Tue, 2018-01-02 at 11:22 +0100, Ludovic Desroches wrote:
> On Wed, Dec 27, 2017 at 10:40:00PM +0100, Cyrille Pitchen wrote:
> 
> > Or maybe no change at all is required at the at_xdmac.c driver side: we
> > just don't care about the provided flags in the "dmas" property, especially
> > the "peripheral id". They would be ignored anyway when the atmel-quadspi.c
> > driver later calls dmaengine_prep_dma_memcpy(). So I could simply set the
> > dma cells to 0 in the device-tree?
> > 
> > Ludovic, what do you think about that ?
> 
> It may work but I won't do this. Usually, channels requested through the xlate
> function have usually their capaiblities set to DMA_SLAVE and not DMA_MEMCPY.
> In the at_xdmac case, it won't be an issue but if you have a controller
> which has channels which can support only mem-to-mem or peripheral, it
> won't work.

Maybe one could create an "AT91_XDMAC_DT_" macro to indicate a memcpy
channel.  There are still unused bits for another flag.  It also looks
like at_xdma uses peripheral id 0x3f for memcpy transfers (will that
work with memcpy DMA on multiple channels at the same time?).  So
perhaps perid 0x3f could be the indication of wanting a memcpy channel,
rather than another flag bit.  But however it's done, one writes:

dmas = < AT91_XDMAC_DT_MEMCPY>; dma-names = "rx-tx";

I think one could have the quadspi driver automatically fill in the dma
cell in the dma specifier if it is not present in the device tree.  So
one could write "dmas = <>" and the driver adds the
AT91_XDMAC_DT_MEMCPY cell before xlating.  I'm not sure if that's a
good idea or not.

Re: [PATCH 2/3] dt-bindings: mtd: atmel-quadspi: add an optional property 'dmacap,memcpy'

2018-01-02 Thread Trent Piepho
On Tue, 2018-01-02 at 11:22 +0100, Ludovic Desroches wrote:
> On Wed, Dec 27, 2017 at 10:40:00PM +0100, Cyrille Pitchen wrote:
> 
> > Or maybe no change at all is required at the at_xdmac.c driver side: we
> > just don't care about the provided flags in the "dmas" property, especially
> > the "peripheral id". They would be ignored anyway when the atmel-quadspi.c
> > driver later calls dmaengine_prep_dma_memcpy(). So I could simply set the
> > dma cells to 0 in the device-tree?
> > 
> > Ludovic, what do you think about that ?
> 
> It may work but I won't do this. Usually, channels requested through the xlate
> function have usually their capaiblities set to DMA_SLAVE and not DMA_MEMCPY.
> In the at_xdmac case, it won't be an issue but if you have a controller
> which has channels which can support only mem-to-mem or peripheral, it
> won't work.

Maybe one could create an "AT91_XDMAC_DT_" macro to indicate a memcpy
channel.  There are still unused bits for another flag.  It also looks
like at_xdma uses peripheral id 0x3f for memcpy transfers (will that
work with memcpy DMA on multiple channels at the same time?).  So
perhaps perid 0x3f could be the indication of wanting a memcpy channel,
rather than another flag bit.  But however it's done, one writes:

dmas = < AT91_XDMAC_DT_MEMCPY>; dma-names = "rx-tx";

I think one could have the quadspi driver automatically fill in the dma
cell in the dma specifier if it is not present in the device tree.  So
one could write "dmas = <>" and the driver adds the
AT91_XDMAC_DT_MEMCPY cell before xlating.  I'm not sure if that's a
good idea or not.

Re: [PATCH v2 1/9] PCI: Regroup all PCI related entries into drivers/pci/Makefile

2018-01-02 Thread Bjorn Helgaas
On Fri, Dec 29, 2017 at 09:21:56PM +0100, Cyrille Pitchen wrote:
> Hi Bjorn,
> 
> Le 28/12/2017 à 23:47, Bjorn Helgaas a écrit :
> > On Mon, Dec 18, 2017 at 07:16:01PM +0100, Cyrille Pitchen wrote:
> >> This patch cleans drivers/Makefile up by moving the pci/endpoint and
> >> pci/dwc entries from drivers/Makefile into drivers/pci/Makefile.
> > 
> > Thanks a lot for doing this!
> > 
> > s/This patch cleans/Clean up/
> >
> 
> updated for the next series
>  
> > Speaking of cleanup, this Makefile has useless comments and blank
> > lines.  Maybe you could add a new patch to remove them and reorder it
> > into a sensible order, with the Intel MID special case at the end and
> > the host/dwc/cadence stuff together?
> >
> 
> OK, I'm working on it. So right now I'm trying to sort entries by
> alphabetical order but the first test has just failed: kernel oops
> when calling pcied_init().

Sorting isn't a big deal.  The blank lines and useless comments do
make it hard to scan, but if we can't sort them, I'm fine with that.

But I *am* curious about the failure you observed.  That sounds like
we might have a dependency bug there and I'd like to fix that.  I
don't like to rely on link ordering because it's invisible in the
source code.

Can you post the reordering patch you used so I can investigate it?

> I guess there are more hidden dependencies than expected, solved by the
> link order. This cleanup might bit risky after all, especially since I
> won't be able to test all combinations or with all possible hardwares. 

Bjorn


Re: [PATCH v2 1/9] PCI: Regroup all PCI related entries into drivers/pci/Makefile

2018-01-02 Thread Bjorn Helgaas
On Fri, Dec 29, 2017 at 09:21:56PM +0100, Cyrille Pitchen wrote:
> Hi Bjorn,
> 
> Le 28/12/2017 à 23:47, Bjorn Helgaas a écrit :
> > On Mon, Dec 18, 2017 at 07:16:01PM +0100, Cyrille Pitchen wrote:
> >> This patch cleans drivers/Makefile up by moving the pci/endpoint and
> >> pci/dwc entries from drivers/Makefile into drivers/pci/Makefile.
> > 
> > Thanks a lot for doing this!
> > 
> > s/This patch cleans/Clean up/
> >
> 
> updated for the next series
>  
> > Speaking of cleanup, this Makefile has useless comments and blank
> > lines.  Maybe you could add a new patch to remove them and reorder it
> > into a sensible order, with the Intel MID special case at the end and
> > the host/dwc/cadence stuff together?
> >
> 
> OK, I'm working on it. So right now I'm trying to sort entries by
> alphabetical order but the first test has just failed: kernel oops
> when calling pcied_init().

Sorting isn't a big deal.  The blank lines and useless comments do
make it hard to scan, but if we can't sort them, I'm fine with that.

But I *am* curious about the failure you observed.  That sounds like
we might have a dependency bug there and I'd like to fix that.  I
don't like to rely on link ordering because it's invisible in the
source code.

Can you post the reordering patch you used so I can investigate it?

> I guess there are more hidden dependencies than expected, solved by the
> link order. This cleanup might bit risky after all, especially since I
> won't be able to test all combinations or with all possible hardwares. 

Bjorn


Re: [PATCH] PM / runtime: Rework pm_runtime_force_suspend/resume()

2018-01-02 Thread Rafael J. Wysocki
On Tuesday, January 2, 2018 2:04:04 PM CET Lukas Wunner wrote:
> On Tue, Jan 02, 2018 at 12:02:18PM +0100, Rafael J. Wysocki wrote:
> > On Tue, Jan 2, 2018 at 11:51 AM, Lukas Wunner  wrote:
> > > On Tue, Jan 02, 2018 at 01:56:28AM +0100, Rafael J. Wysocki wrote:
> > >> + if (atomic_read(>power.usage_count) <= 1 &&
> > >> + atomic_read(>power.child_count) == 0)
> > >> + pm_runtime_set_suspended(dev);
> > >>
> > >> - pm_runtime_set_suspended(dev);
> > >
> > > The ->runtime_suspend callback *has* been executed at this point.
> > > If the status is only updated conditionally, it may not reflect
> > > the device's actual power state correctly.  That doesn't seem to
> > > be a good idea.
> > 
> > It doesn't matter, because this is done with runtime PM disabled, isn't it?
> 
> It might not make a difference for the use case I have in mind, but
> pm_runtime_status_suspended() will return an incorrect result and is
> called from 47 files in 4.15-rc6 according to lxr.free-electrons.com.

Generally, the runtime PM status is only meaningful for devices with runtime PM
enabled.

There is an exception, which is during system suspend/resume, when runtime PM
is automatically disabled by the core, but that only under certain assumptions.

Basically, you have to assume that no one else will mess up with the device
between the times you call pm_runtime_status_suspended() to check its runtime
PM status (or between the first time you do that and the last time runtime PM
has been enabled for the device).

This patch doesn't change the situation in that respect.

> > > The kerneldoc says:
> > >
> > > Typically this function may be invoked from a system suspend callback
> > > to make sure the device is put into low power state.
> > >
> > > That portion is not modified by your patch.
> > >
> > > "Typically" implies that it's legal to call pm_runtime_force_suspend() in
> > > *other* contexts than as a ->suspend hook.
> > 
> > It should only be used during system suspend anyway, however.
> 
> Then the kerneldoc is wrong.

It isn't wrong.  It may be incomplete, but the information in it is correct.

And making it more complete is not part of this patch IMO.

> > >> One addition that would be really helpful:  pm_runtime_force_suspend()
> > >> should also force-suspend all children and consumers of the given
> > >> device.  Likewise, those should be resumed on pm_runtime_force_resume().
> > >> Then I could just add a device link from the audio PCI device on the GPU
> > >> to the graphics PCI device and just call pm_runtime_force_*() on the
> > >> graphics device (supplier) to magically power them both off and on.
> > >
> > > Actually, the assumption is that pm_runtime_force_suspend() must be
> > > called for the children before it is called for the parent even
> > > without my patch, so it is just not going to work this way.
> > 
> > Moreover, what if those devices have nonzero usage counters?  There
> > may be other reasons for that than just dependencies, like for example
> > user space might have written "on" to their "control" files in sysfs.
> 
> In that case pm_runtime_force_suspend() should return a negative errno.

In which case it wouldn't be suitable for the system-wide PM callback role.

> I envision amending control_store() so that "off" can be written to the
> "control" file, allowing userspace to invoke pm_runtime_force_suspend()
> to force certain devices into runtime suspend.

But it isn't a good tool for the purpose which I'm trying to tell you.

You need something else.

> The user would get back
> an error if the call failed for some reason (such as an active child or
> consumer of the to be force-suspended device).  That would be a clean
> replacement for the ON/OFF options we currently have for the
> vga_switcheroo debugfs control file.

Well, it looks like you are looking for an interface to invoke
pm_runtime_suspend() for the device as it behaves exactly the way you want. :-)

Thanks,
Rafael



Re: [PATCH] PM / runtime: Rework pm_runtime_force_suspend/resume()

2018-01-02 Thread Rafael J. Wysocki
On Tuesday, January 2, 2018 2:04:04 PM CET Lukas Wunner wrote:
> On Tue, Jan 02, 2018 at 12:02:18PM +0100, Rafael J. Wysocki wrote:
> > On Tue, Jan 2, 2018 at 11:51 AM, Lukas Wunner  wrote:
> > > On Tue, Jan 02, 2018 at 01:56:28AM +0100, Rafael J. Wysocki wrote:
> > >> + if (atomic_read(>power.usage_count) <= 1 &&
> > >> + atomic_read(>power.child_count) == 0)
> > >> + pm_runtime_set_suspended(dev);
> > >>
> > >> - pm_runtime_set_suspended(dev);
> > >
> > > The ->runtime_suspend callback *has* been executed at this point.
> > > If the status is only updated conditionally, it may not reflect
> > > the device's actual power state correctly.  That doesn't seem to
> > > be a good idea.
> > 
> > It doesn't matter, because this is done with runtime PM disabled, isn't it?
> 
> It might not make a difference for the use case I have in mind, but
> pm_runtime_status_suspended() will return an incorrect result and is
> called from 47 files in 4.15-rc6 according to lxr.free-electrons.com.

Generally, the runtime PM status is only meaningful for devices with runtime PM
enabled.

There is an exception, which is during system suspend/resume, when runtime PM
is automatically disabled by the core, but that only under certain assumptions.

Basically, you have to assume that no one else will mess up with the device
between the times you call pm_runtime_status_suspended() to check its runtime
PM status (or between the first time you do that and the last time runtime PM
has been enabled for the device).

This patch doesn't change the situation in that respect.

> > > The kerneldoc says:
> > >
> > > Typically this function may be invoked from a system suspend callback
> > > to make sure the device is put into low power state.
> > >
> > > That portion is not modified by your patch.
> > >
> > > "Typically" implies that it's legal to call pm_runtime_force_suspend() in
> > > *other* contexts than as a ->suspend hook.
> > 
> > It should only be used during system suspend anyway, however.
> 
> Then the kerneldoc is wrong.

It isn't wrong.  It may be incomplete, but the information in it is correct.

And making it more complete is not part of this patch IMO.

> > >> One addition that would be really helpful:  pm_runtime_force_suspend()
> > >> should also force-suspend all children and consumers of the given
> > >> device.  Likewise, those should be resumed on pm_runtime_force_resume().
> > >> Then I could just add a device link from the audio PCI device on the GPU
> > >> to the graphics PCI device and just call pm_runtime_force_*() on the
> > >> graphics device (supplier) to magically power them both off and on.
> > >
> > > Actually, the assumption is that pm_runtime_force_suspend() must be
> > > called for the children before it is called for the parent even
> > > without my patch, so it is just not going to work this way.
> > 
> > Moreover, what if those devices have nonzero usage counters?  There
> > may be other reasons for that than just dependencies, like for example
> > user space might have written "on" to their "control" files in sysfs.
> 
> In that case pm_runtime_force_suspend() should return a negative errno.

In which case it wouldn't be suitable for the system-wide PM callback role.

> I envision amending control_store() so that "off" can be written to the
> "control" file, allowing userspace to invoke pm_runtime_force_suspend()
> to force certain devices into runtime suspend.

But it isn't a good tool for the purpose which I'm trying to tell you.

You need something else.

> The user would get back
> an error if the call failed for some reason (such as an active child or
> consumer of the to be force-suspended device).  That would be a clean
> replacement for the ON/OFF options we currently have for the
> vga_switcheroo debugfs control file.

Well, it looks like you are looking for an interface to invoke
pm_runtime_suspend() for the device as it behaves exactly the way you want. :-)

Thanks,
Rafael



Re: [PATCH v2 4/6] clk: ingenic: Add JZ47xx TCU clocks driver

2018-01-02 Thread Stephen Boyd
On 01/01, Paul Cercueil wrote:
> diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c
> new file mode 100644
> index ..36afe3f02f91
> --- /dev/null
> +++ b/drivers/clk/ingenic/tcu.c
> @@ -0,0 +1,336 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Ingenic JZ47xx SoC TCU clocks driver
> + * Copyright (C) 2018 Paul Cercueil 
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 

Used?

> +
> +#include 
> +
> +enum ingenic_version {
> + ID_JZ4740,
> + ID_JZ4770,
> + ID_JZ4780,
> +};
> +
> +struct ingenic_tcu {
> + struct device_node *np;

Is this used?

> + struct regmap *map;
> +
> + struct clk_onecell_data clocks;
> +};
> +
> +struct ingenic_tcu_clk_info {
> + struct clk_init_data init_data;
> + u8 gate_bit;
> + u8 tcsr_reg;
> +};
> +
> +struct ingenic_tcu_clk {
> + struct clk_hw hw;
> +
> + struct ingenic_tcu *tcu;
> + const struct ingenic_tcu_clk_info *info;
> +
> + unsigned int idx;
> +};
> +
> +#define to_tcu_clk(_hw) container_of(_hw, struct ingenic_tcu_clk, hw)
> +
> +static int ingenic_tcu_enable(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> +
> + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit));
> + return 0;
> +}
> +
> +static void ingenic_tcu_disable(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> +
> + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit));
> +}
> +
> +static int ingenic_tcu_is_enabled(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + unsigned int value;
> +
> + regmap_read(tcu->map, REG_TSR, );
> +
> + return !(value & BIT(info->gate_bit));
> +}
> +
> +static u8 ingenic_tcu_get_parent(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + unsigned int val = 0;
> + int ret;
> +
> + ret = regmap_read(tcu->map, info->tcsr_reg, );
> + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx);
> +
> + return (u8) ffs(val & TCSR_PARENT_CLOCK_MASK) - 1;

Is the cast necessary?

> +}
> +
> +static int ingenic_tcu_set_parent(struct clk_hw *hw, u8 idx)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + int ret;
> +
> + /*
> +  * Our clock provider has the CLK_SET_PARENT_GATE flag set, so we know
> +  * that the clk is in unprepared state. To be able to access TCSR
> +  * we must ungate the clock supply and we gate it again when done.
> +  */
> +
> + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit));
> +
> + ret = regmap_update_bits(tcu->map, info->tcsr_reg,
> + TCSR_PARENT_CLOCK_MASK, BIT(idx));
> + WARN_ONCE(ret < 0, "Unable to update TCSR %i", tcu_clk->idx);
> +
> + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit));
> +
> + return 0;
> +}
> +
> +static unsigned long ingenic_tcu_recalc_rate(struct clk_hw *hw,
> + unsigned long parent_rate)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + unsigned int prescale;
> + int ret;
> +
> + ret = regmap_read(tcu->map, info->tcsr_reg, );
> + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx);
> +
> + prescale = (prescale & TCSR_PRESCALE_MASK) >> TCSR_PRESCALE_LSB;
> +
> + return parent_rate >> (prescale * 2);
> +}
> +
> +static long ingenic_tcu_round_rate(struct clk_hw *hw, unsigned long req_rate,
> + unsigned long *parent_rate)
> +{
> + long rate = (long) *parent_rate;

Is there a reason why rate is signed here?

> + unsigned int shift;
> +
> + if (req_rate > rate)
> + return -EINVAL;
> +
> + for (shift = 0; shift < 10; shift += 2)
> + if ((rate >> shift) <= req_rate)
> + return rate >> shift;
> +
> + return rate >> 10;

Can it be?

for (shift = 0; shift < 10; shift += 2)
if ((rate >> shift) <= req_rate)
break;

return rate >> shift;

> +}
> +
> +static int ingenic_tcu_set_rate(struct clk_hw *hw, unsigned long req_rate,
> + unsigned long parent_rate)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + const 

Re: [PATCH v2 4/6] clk: ingenic: Add JZ47xx TCU clocks driver

2018-01-02 Thread Stephen Boyd
On 01/01, Paul Cercueil wrote:
> diff --git a/drivers/clk/ingenic/tcu.c b/drivers/clk/ingenic/tcu.c
> new file mode 100644
> index ..36afe3f02f91
> --- /dev/null
> +++ b/drivers/clk/ingenic/tcu.c
> @@ -0,0 +1,336 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Ingenic JZ47xx SoC TCU clocks driver
> + * Copyright (C) 2018 Paul Cercueil 
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 

Used?

> +
> +#include 
> +
> +enum ingenic_version {
> + ID_JZ4740,
> + ID_JZ4770,
> + ID_JZ4780,
> +};
> +
> +struct ingenic_tcu {
> + struct device_node *np;

Is this used?

> + struct regmap *map;
> +
> + struct clk_onecell_data clocks;
> +};
> +
> +struct ingenic_tcu_clk_info {
> + struct clk_init_data init_data;
> + u8 gate_bit;
> + u8 tcsr_reg;
> +};
> +
> +struct ingenic_tcu_clk {
> + struct clk_hw hw;
> +
> + struct ingenic_tcu *tcu;
> + const struct ingenic_tcu_clk_info *info;
> +
> + unsigned int idx;
> +};
> +
> +#define to_tcu_clk(_hw) container_of(_hw, struct ingenic_tcu_clk, hw)
> +
> +static int ingenic_tcu_enable(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> +
> + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit));
> + return 0;
> +}
> +
> +static void ingenic_tcu_disable(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> +
> + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit));
> +}
> +
> +static int ingenic_tcu_is_enabled(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + unsigned int value;
> +
> + regmap_read(tcu->map, REG_TSR, );
> +
> + return !(value & BIT(info->gate_bit));
> +}
> +
> +static u8 ingenic_tcu_get_parent(struct clk_hw *hw)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + unsigned int val = 0;
> + int ret;
> +
> + ret = regmap_read(tcu->map, info->tcsr_reg, );
> + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx);
> +
> + return (u8) ffs(val & TCSR_PARENT_CLOCK_MASK) - 1;

Is the cast necessary?

> +}
> +
> +static int ingenic_tcu_set_parent(struct clk_hw *hw, u8 idx)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + int ret;
> +
> + /*
> +  * Our clock provider has the CLK_SET_PARENT_GATE flag set, so we know
> +  * that the clk is in unprepared state. To be able to access TCSR
> +  * we must ungate the clock supply and we gate it again when done.
> +  */
> +
> + regmap_write(tcu->map, REG_TSCR, BIT(info->gate_bit));
> +
> + ret = regmap_update_bits(tcu->map, info->tcsr_reg,
> + TCSR_PARENT_CLOCK_MASK, BIT(idx));
> + WARN_ONCE(ret < 0, "Unable to update TCSR %i", tcu_clk->idx);
> +
> + regmap_write(tcu->map, REG_TSSR, BIT(info->gate_bit));
> +
> + return 0;
> +}
> +
> +static unsigned long ingenic_tcu_recalc_rate(struct clk_hw *hw,
> + unsigned long parent_rate)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + struct ingenic_tcu *tcu = tcu_clk->tcu;
> + const struct ingenic_tcu_clk_info *info = tcu_clk->info;
> + unsigned int prescale;
> + int ret;
> +
> + ret = regmap_read(tcu->map, info->tcsr_reg, );
> + WARN_ONCE(ret < 0, "Unable to read TCSR %i", tcu_clk->idx);
> +
> + prescale = (prescale & TCSR_PRESCALE_MASK) >> TCSR_PRESCALE_LSB;
> +
> + return parent_rate >> (prescale * 2);
> +}
> +
> +static long ingenic_tcu_round_rate(struct clk_hw *hw, unsigned long req_rate,
> + unsigned long *parent_rate)
> +{
> + long rate = (long) *parent_rate;

Is there a reason why rate is signed here?

> + unsigned int shift;
> +
> + if (req_rate > rate)
> + return -EINVAL;
> +
> + for (shift = 0; shift < 10; shift += 2)
> + if ((rate >> shift) <= req_rate)
> + return rate >> shift;
> +
> + return rate >> 10;

Can it be?

for (shift = 0; shift < 10; shift += 2)
if ((rate >> shift) <= req_rate)
break;

return rate >> shift;

> +}
> +
> +static int ingenic_tcu_set_rate(struct clk_hw *hw, unsigned long req_rate,
> + unsigned long parent_rate)
> +{
> + struct ingenic_tcu_clk *tcu_clk = to_tcu_clk(hw);
> + const struct ingenic_tcu_clk_info 

Re: [PATCH 4.9 00/75] 4.9.74-stable review

2018-01-02 Thread David Miller
From: Neal Cardwell 
Date: Tue, 2 Jan 2018 14:11:25 -0500

> Looks like these 2 patches will cherry-pick cleanly if cherry-picked
> in the following sequence, on top of 4.9.74-rc1, which already has
> 6c9e73ef9aa7 ("tcp_bbr: record "full bw reached" decision in new
> full_bw_reached bit"):
> 
> $ git checkout linux-stable-rc/linux-4.9.y
> 
> $ git cherry-pick 2f6c498e4f15
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on
> loss recovery undo
>  Date: Thu Dec 7 12:43:31 2017 -0500
>  1 file changed, 4 insertions(+)
> 
> $ git cherry-pick 600647d467c6
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth
> sampling on loss recovery undo
>  Date: Thu Dec 7 12:43:32 2017 -0500
>  1 file changed, 1 insertion(+)
> 
> $ git log --oneline --decorate | head -3
> 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on
> loss recovery undo
> 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo
> 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1
> 
> I verified that this compiles without warnings, and boots, and BBR works.
> 
> Shall I prepare another version of these 2 patches, or do we think
> this recipe will be sufficient? (Sorry I am not more familiar with the
> backport-to-stable process.)

If this works and Greg is OK with it, I am fine with it too.


Re: [PATCH 4.9 00/75] 4.9.74-stable review

2018-01-02 Thread David Miller
From: Neal Cardwell 
Date: Tue, 2 Jan 2018 14:11:25 -0500

> Looks like these 2 patches will cherry-pick cleanly if cherry-picked
> in the following sequence, on top of 4.9.74-rc1, which already has
> 6c9e73ef9aa7 ("tcp_bbr: record "full bw reached" decision in new
> full_bw_reached bit"):
> 
> $ git checkout linux-stable-rc/linux-4.9.y
> 
> $ git cherry-pick 2f6c498e4f15
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on
> loss recovery undo
>  Date: Thu Dec 7 12:43:31 2017 -0500
>  1 file changed, 4 insertions(+)
> 
> $ git cherry-pick 600647d467c6
> Performing inexact rename detection: 100% (17803152/17803152), done.
> [detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth
> sampling on loss recovery undo
>  Date: Thu Dec 7 12:43:32 2017 -0500
>  1 file changed, 1 insertion(+)
> 
> $ git log --oneline --decorate | head -3
> 7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on
> loss recovery undo
> 0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo
> 79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1
> 
> I verified that this compiles without warnings, and boots, and BBR works.
> 
> Shall I prepare another version of these 2 patches, or do we think
> this recipe will be sufficient? (Sorry I am not more familiar with the
> backport-to-stable process.)

If this works and Greg is OK with it, I am fine with it too.


Re: [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read

2018-01-02 Thread Rodrigo Vivi
On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote:
> The ACK/NACK implementation as found in e.g. the G965 has the falling
> clock edge and the release of the data line after the ACK for the received
> byte happen at the same time.
> 
> This is conformant with the I2C specification, which allows a zero hold
> time, see footnote [3]: "A device must internally provide a hold time of
> at least 300 ns for the SDA signal (with respect to the V IH(min) of the
> SCL signal) to bridge the undefined region of the falling edge of SCL."
> 
> Some HDMI-to-VGA converters apparently fail to adhere to this requirement
> and latch SDA at the falling clock edge, so instead of an ACK
> sometimes a NACK is read and the slave (i.e. the EDID ROM) ends the
> transfer.
> 
> The bitbanging releases the data line for the ACK only 1/4 bit time after
> the falling clock edge, so a slave will see the correct value no matter
> if it samples at the rising or the falling clock edge or in the center.
> 
> Fallback to bitbanging is already done for the CRT connector.
> 
> Bug: https://bugs.freedesktop.org/show_bug.cgi?id=92685

s/Bug:/Bugzilla:

Did we get the confirmation that this also fix the Skylake issue
initially reported?

> 
> Signed-off-by: Stefan Brüns 
> 
> ---
> 
> Changes in v2:
> - Fix/enhance commit message, no code changes
> 
>  drivers/gpu/drm/i915/intel_hdmi.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
> b/drivers/gpu/drm/i915/intel_hdmi.c
> index 4dea833f9d1b..847cda4c017c 100644
> --- a/drivers/gpu/drm/i915/intel_hdmi.c
> +++ b/drivers/gpu/drm/i915/intel_hdmi.c
> @@ -1573,12 +1573,20 @@ intel_hdmi_set_edid(struct drm_connector *connector)
>   struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
>   struct edid *edid;
>   bool connected = false;
> + struct i2c_adapter *i2c;
>  
>   intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
>  
> - edid = drm_get_edid(connector,
> - intel_gmbus_get_adapter(dev_priv,
> - intel_hdmi->ddc_bus));
> + i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
> +
> + edid = drm_get_edid(connector, i2c);
> +
> + if (!edid && !intel_gmbus_is_forced_bit(i2c)) {
> + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO 
> bit-banging\n");
> + intel_gmbus_force_bit(i2c, true);
> + edid = drm_get_edid(connector, i2c);
> + intel_gmbus_force_bit(i2c, false);
> + }

Approach seems fine for this case.
I just wonder what would be the risks of forcing this bit and edid read when 
nothing is present on the other end?

>  
>   intel_hdmi_dp_dual_mode_detect(connector, edid != NULL);
>  
> -- 
> 2.15.1
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH v2] drm/i915: Try EDID bitbanging on HDMI after failed read

2018-01-02 Thread Rodrigo Vivi
On Sun, Dec 31, 2017 at 10:34:54PM +, Stefan Brüns wrote:
> The ACK/NACK implementation as found in e.g. the G965 has the falling
> clock edge and the release of the data line after the ACK for the received
> byte happen at the same time.
> 
> This is conformant with the I2C specification, which allows a zero hold
> time, see footnote [3]: "A device must internally provide a hold time of
> at least 300 ns for the SDA signal (with respect to the V IH(min) of the
> SCL signal) to bridge the undefined region of the falling edge of SCL."
> 
> Some HDMI-to-VGA converters apparently fail to adhere to this requirement
> and latch SDA at the falling clock edge, so instead of an ACK
> sometimes a NACK is read and the slave (i.e. the EDID ROM) ends the
> transfer.
> 
> The bitbanging releases the data line for the ACK only 1/4 bit time after
> the falling clock edge, so a slave will see the correct value no matter
> if it samples at the rising or the falling clock edge or in the center.
> 
> Fallback to bitbanging is already done for the CRT connector.
> 
> Bug: https://bugs.freedesktop.org/show_bug.cgi?id=92685

s/Bug:/Bugzilla:

Did we get the confirmation that this also fix the Skylake issue
initially reported?

> 
> Signed-off-by: Stefan Brüns 
> 
> ---
> 
> Changes in v2:
> - Fix/enhance commit message, no code changes
> 
>  drivers/gpu/drm/i915/intel_hdmi.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_hdmi.c 
> b/drivers/gpu/drm/i915/intel_hdmi.c
> index 4dea833f9d1b..847cda4c017c 100644
> --- a/drivers/gpu/drm/i915/intel_hdmi.c
> +++ b/drivers/gpu/drm/i915/intel_hdmi.c
> @@ -1573,12 +1573,20 @@ intel_hdmi_set_edid(struct drm_connector *connector)
>   struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector);
>   struct edid *edid;
>   bool connected = false;
> + struct i2c_adapter *i2c;
>  
>   intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS);
>  
> - edid = drm_get_edid(connector,
> - intel_gmbus_get_adapter(dev_priv,
> - intel_hdmi->ddc_bus));
> + i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus);
> +
> + edid = drm_get_edid(connector, i2c);
> +
> + if (!edid && !intel_gmbus_is_forced_bit(i2c)) {
> + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO 
> bit-banging\n");
> + intel_gmbus_force_bit(i2c, true);
> + edid = drm_get_edid(connector, i2c);
> + intel_gmbus_force_bit(i2c, false);
> + }

Approach seems fine for this case.
I just wonder what would be the risks of forcing this bit and edid read when 
nothing is present on the other end?

>  
>   intel_hdmi_dp_dual_mode_detect(connector, edid != NULL);
>  
> -- 
> 2.15.1
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH 4.9 00/75] 4.9.74-stable review

2018-01-02 Thread Neal Cardwell
On Tue, Jan 2, 2018 at 1:32 PM, David Miller  wrote:
> From: Neal Cardwell 
> Date: Tue, 2 Jan 2018 11:57:59 -0500
>
>> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman
>>  wrote:
>>> This is the start of the stable review cycle for the 4.9.74 release.
>>> There are 75 patches in this series, all will be posted as a response
>>> to this one.  If anyone has any issues with these being applied, please
>>> let me know.
>>>
>>> Responses should be made by Wed Jan  3 14:00:03 UTC 2018.
>>> Anything received after that time might be too late.
>>>
>>> The whole patch series can be found in one patch at:
>>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz
>>> or in the git tree and branch at:
>>>   git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
>>> linux-4.9.y
>>> and the diffstat can be found below.
>>
>> Hi Greg,
>>
>> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were
>> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an
>> inventory of the TCP BBR fixes, AFAICT we have:
>>
>> c589e69b508d tcp_bbr: record "full bw reached" decision in new
>> full_bw_reached bit
>>  - in 4.9 and 4.14 (great)
>>
>> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo
>>   - in 4.14 (but not 4.9)
>>
>> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery 
>> undo
>>   - in 4.14 (but not 4.9)
>>
>> Lacking the second and third patches in 4.9 will not cause any new
>> problems, but it will miss out on some nice fixes. If it's possible to
>> get  2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I
>> would be very grateful.
>
> These were not straight-forward to backport and I felt the risk outweighed
> the gains.
>
> If you want to do the backport yourself and you feel confident in it,
> feel free.

Thanks, Greg and David. Looks like these 2 patches will cherry-pick
cleanly if cherry-picked in the following sequence, on top of
4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw
reached" decision in new full_bw_reached bit"):

$ git checkout linux-stable-rc/linux-4.9.y

$ git cherry-pick 2f6c498e4f15
Performing inexact rename detection: 100% (17803152/17803152), done.
[detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on
loss recovery undo
 Date: Thu Dec 7 12:43:31 2017 -0500
 1 file changed, 4 insertions(+)

$ git cherry-pick 600647d467c6
Performing inexact rename detection: 100% (17803152/17803152), done.
[detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth
sampling on loss recovery undo
 Date: Thu Dec 7 12:43:32 2017 -0500
 1 file changed, 1 insertion(+)

$ git log --oneline --decorate | head -3
7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on
loss recovery undo
0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo
79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1

I verified that this compiles without warnings, and boots, and BBR works.

Shall I prepare another version of these 2 patches, or do we think
this recipe will be sufficient? (Sorry I am not more familiar with the
backport-to-stable process.)

Thanks!
neal


Re: [PATCH 4.9 00/75] 4.9.74-stable review

2018-01-02 Thread Neal Cardwell
On Tue, Jan 2, 2018 at 1:32 PM, David Miller  wrote:
> From: Neal Cardwell 
> Date: Tue, 2 Jan 2018 11:57:59 -0500
>
>> On Mon, Jan 1, 2018 at 9:31 AM, Greg Kroah-Hartman
>>  wrote:
>>> This is the start of the stable review cycle for the 4.9.74 release.
>>> There are 75 patches in this series, all will be posted as a response
>>> to this one.  If anyone has any issues with these being applied, please
>>> let me know.
>>>
>>> Responses should be made by Wed Jan  3 14:00:03 UTC 2018.
>>> Anything received after that time might be too late.
>>>
>>> The whole patch series can be found in one patch at:
>>> kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.74-rc1.gz
>>> or in the git tree and branch at:
>>>   git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
>>> linux-4.9.y
>>> and the diffstat can be found below.
>>
>> Hi Greg,
>>
>> In looking at the 4.9 and 4.14 patches yesterday, I noticed there were
>> two TCP BBR fixes that made it into 4.14 but not 4.9. Doing an
>> inventory of the TCP BBR fixes, AFAICT we have:
>>
>> c589e69b508d tcp_bbr: record "full bw reached" decision in new
>> full_bw_reached bit
>>  - in 4.9 and 4.14 (great)
>>
>> 2f6c498e4f15 tcp_bbr: reset full pipe detection on loss recovery undo
>>   - in 4.14 (but not 4.9)
>>
>> 600647d467c6 tcp_bbr: reset long-term bandwidth sampling on loss recovery 
>> undo
>>   - in 4.14 (but not 4.9)
>>
>> Lacking the second and third patches in 4.9 will not cause any new
>> problems, but it will miss out on some nice fixes. If it's possible to
>> get  2f6c498e4f15 and 600647d467c6 either into 4.9.74 or 4.9.75, I
>> would be very grateful.
>
> These were not straight-forward to backport and I felt the risk outweighed
> the gains.
>
> If you want to do the backport yourself and you feel confident in it,
> feel free.

Thanks, Greg and David. Looks like these 2 patches will cherry-pick
cleanly if cherry-picked in the following sequence, on top of
4.9.74-rc1, which already has 6c9e73ef9aa7 ("tcp_bbr: record "full bw
reached" decision in new full_bw_reached bit"):

$ git checkout linux-stable-rc/linux-4.9.y

$ git cherry-pick 2f6c498e4f15
Performing inexact rename detection: 100% (17803152/17803152), done.
[detached HEAD 0982234c57e1] tcp_bbr: reset full pipe detection on
loss recovery undo
 Date: Thu Dec 7 12:43:31 2017 -0500
 1 file changed, 4 insertions(+)

$ git cherry-pick 600647d467c6
Performing inexact rename detection: 100% (17803152/17803152), done.
[detached HEAD 7e866eccd083] tcp_bbr: reset long-term bandwidth
sampling on loss recovery undo
 Date: Thu Dec 7 12:43:32 2017 -0500
 1 file changed, 1 insertion(+)

$ git log --oneline --decorate | head -3
7e866eccd083 (HEAD) tcp_bbr: reset long-term bandwidth sampling on
loss recovery undo
0982234c57e1 tcp_bbr: reset full pipe detection on loss recovery undo
79070be7f1ae (linux-stable-rc/linux-4.9.y) Linux 4.9.74-rc1

I verified that this compiles without warnings, and boots, and BBR works.

Shall I prepare another version of these 2 patches, or do we think
this recipe will be sufficient? (Sorry I am not more familiar with the
backport-to-stable process.)

Thanks!
neal


[GIT PULL rcu/next] RCU commits for 4.15

2018-01-02 Thread Paul E. McKenney
Hello, Ingo, and Happy New Year!

This pull request contains the following changes:

1.  Updates to use cond_resched() instead of cond_resched_rcu_qs()
where feasible (currently everywhere except in kernel/rcu and
in kernel/torture.c).  Also a couple of fixes to avoid sending
IPIs to offline CPUs.

http://lkml.kernel.org/r/20171201192122.ga19...@linux.vnet.ibm.com

2.  Updates to simplify RCU's dyntick-idle handling.

http://lkml.kernel.org/r/20171201193625.ga20...@linux.vnet.ibm.com

3.  Miscellaneous fixes.

http://lkml.kernel.org/r/20171201194139.ga22...@linux.vnet.ibm.com

4.  Updates to remove almost all uses of smp_read_barrier_depends()
and read_barrier_depends().

http://lkml.kernel.org/r/20171201195053.ga23...@linux.vnet.ibm.com

5.  Torture-test updates.

http://lkml.kernel.org/r/20171201200819.ga25...@linux.vnet.ibm.com

All of these changes have been subjected to 0day Test Robot and -next
testing, and are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git for-mingo

for you to fetch changes up to 1dfa55e01987288d847220b8c027204871440ed1:

  Merge branches 'cond_resched.2017.12.04a', 'dyntick.2017.11.28a', 
'fixes.2017.12.11a', 'srbd.2017.12.05a' and 'torture.2017.12.11a' into HEAD 
(2017-12-11 09:21:58 -0800)


Davidlohr Bueso (2):
  locking/locktorture: Fix rwsem reader_delay
  locking/locktorture: Fix num reader/writer corner cases

Paul E. McKenney (59):
  rcu: Avoid ->dynticks_nmi_nesting store tearing
  rcu: Reduce dyntick-idle state space
  rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation
  rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit
  rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}()
  rcu: Make ->dynticks_nesting be a simple counter
  rcu: Eliminate rcu_irq_enter_disabled()
  rcu: Add tracing to irq/NMI dyntick-idle transitions
  rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long
  rcu: Add ->dynticks field to rcu_dyntick trace event
  rcu: Stop duplicating lockdep checks in RCU's idle-entry code
  rcu: Avoid ->dynticks_nesting store tearing
  rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter()
  rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit()
  rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code
  doc: Update dyntick-idle design documentation for NMI/irq consolidation
  srcu: Prohibit call_srcu() use under raw spinlocks
  torture: Suppress CPU stall warnings during shutdown ftrace dump
  torture: Prepare scripting for shift from %p to %pK
  sched: Stop resched_cpu() from sending IPIs to offline CPUs
  sched: Stop switched_to_rt() from sending IPIs to offline CPUs
  netfilter: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  mm: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  workqueue: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  softirq: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  fs: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  doc: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  rcu: Account for rcu_all_qs() in cond_resched()
  doc: READ_ONCE() now implies smp_barrier_depends()
  mn10300: READ_ONCE() now implies smp_read_barrier_depends()
  drivers/net/ethernet/qlogic/qed: Fix __qed_spq_block() ordering
  fs/dcache: Use release-acquire for name/length update
  percpu: READ_ONCE() now implies smp_read_barrier_depends()
  rcu: Adjust read-side accessor comments for READ_ONCE()
  rtnetlink: Update now-misleading smp_read_barrier_depends() comment
  seqlock: Remove now-redundant smp_read_barrier_depends()
  uprobes: Remove now-redundant smp_read_barrier_depends()
  locking: Remove smp_read_barrier_depends() from 
queued_spin_lock_slowpath()
  tracepoint: Remove smp_read_barrier_depends() from comment
  lib/assoc_array: Remove smp_read_barrier_depends()
  mm/ksm: Remove now-redundant smp_read_barrier_depends()
  netfilter: Remove now-redundant smp_read_barrier_depends()
  keyring: Remove now-redundant smp_read_barrier_depends()
  drivers/infiniband: Remove now-redundant smp_read_barrier_depends()
  drivers/dma/ioat: Remove now-redundant smp_read_barrier_depends()
  doc: De-emphasize smp_read_barrier_depends
  genetlink: Remove smp_read_barrier_depends() from comment
  netlink: Remove smp_read_barrier_depends() from comment
  checkpatch: Add warnings for {smp_,}read_barrier_depends()
  drivers/vhost: Remove now-redundant read_barrier_depends()
  rcu: Add comment giving debug strategy for double call_rcu()
  torture: Reduce #ifdefs for 

[GIT PULL rcu/next] RCU commits for 4.15

2018-01-02 Thread Paul E. McKenney
Hello, Ingo, and Happy New Year!

This pull request contains the following changes:

1.  Updates to use cond_resched() instead of cond_resched_rcu_qs()
where feasible (currently everywhere except in kernel/rcu and
in kernel/torture.c).  Also a couple of fixes to avoid sending
IPIs to offline CPUs.

http://lkml.kernel.org/r/20171201192122.ga19...@linux.vnet.ibm.com

2.  Updates to simplify RCU's dyntick-idle handling.

http://lkml.kernel.org/r/20171201193625.ga20...@linux.vnet.ibm.com

3.  Miscellaneous fixes.

http://lkml.kernel.org/r/20171201194139.ga22...@linux.vnet.ibm.com

4.  Updates to remove almost all uses of smp_read_barrier_depends()
and read_barrier_depends().

http://lkml.kernel.org/r/20171201195053.ga23...@linux.vnet.ibm.com

5.  Torture-test updates.

http://lkml.kernel.org/r/20171201200819.ga25...@linux.vnet.ibm.com

All of these changes have been subjected to 0day Test Robot and -next
testing, and are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git for-mingo

for you to fetch changes up to 1dfa55e01987288d847220b8c027204871440ed1:

  Merge branches 'cond_resched.2017.12.04a', 'dyntick.2017.11.28a', 
'fixes.2017.12.11a', 'srbd.2017.12.05a' and 'torture.2017.12.11a' into HEAD 
(2017-12-11 09:21:58 -0800)


Davidlohr Bueso (2):
  locking/locktorture: Fix rwsem reader_delay
  locking/locktorture: Fix num reader/writer corner cases

Paul E. McKenney (59):
  rcu: Avoid ->dynticks_nmi_nesting store tearing
  rcu: Reduce dyntick-idle state space
  rcu: Move rcu_nmi_{enter,exit}() to prepare for consolidation
  rcu: Clamp ->dynticks_nmi_nesting at eqs entry/exit
  rcu: Define rcu_irq_{enter,exit}() in terms of rcu_nmi_{enter,exit}()
  rcu: Make ->dynticks_nesting be a simple counter
  rcu: Eliminate rcu_irq_enter_disabled()
  rcu: Add tracing to irq/NMI dyntick-idle transitions
  rcu: Shrink ->dynticks_{nmi_,}nesting from long long to long
  rcu: Add ->dynticks field to rcu_dyntick trace event
  rcu: Stop duplicating lockdep checks in RCU's idle-entry code
  rcu: Avoid ->dynticks_nesting store tearing
  rcu: Fold rcu_eqs_enter_common() into rcu_eqs_enter()
  rcu: Fold rcu_eqs_exit_common() into rcu_eqs_exit()
  rcu: Simplify rcu_eqs_{enter,exit}() non-idle task debug code
  doc: Update dyntick-idle design documentation for NMI/irq consolidation
  srcu: Prohibit call_srcu() use under raw spinlocks
  torture: Suppress CPU stall warnings during shutdown ftrace dump
  torture: Prepare scripting for shift from %p to %pK
  sched: Stop resched_cpu() from sending IPIs to offline CPUs
  sched: Stop switched_to_rt() from sending IPIs to offline CPUs
  netfilter: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  mm: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  workqueue: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  trace: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  softirq: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  fs: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  doc: Eliminate cond_resched_rcu_qs() in favor of cond_resched()
  rcu: Account for rcu_all_qs() in cond_resched()
  doc: READ_ONCE() now implies smp_barrier_depends()
  mn10300: READ_ONCE() now implies smp_read_barrier_depends()
  drivers/net/ethernet/qlogic/qed: Fix __qed_spq_block() ordering
  fs/dcache: Use release-acquire for name/length update
  percpu: READ_ONCE() now implies smp_read_barrier_depends()
  rcu: Adjust read-side accessor comments for READ_ONCE()
  rtnetlink: Update now-misleading smp_read_barrier_depends() comment
  seqlock: Remove now-redundant smp_read_barrier_depends()
  uprobes: Remove now-redundant smp_read_barrier_depends()
  locking: Remove smp_read_barrier_depends() from 
queued_spin_lock_slowpath()
  tracepoint: Remove smp_read_barrier_depends() from comment
  lib/assoc_array: Remove smp_read_barrier_depends()
  mm/ksm: Remove now-redundant smp_read_barrier_depends()
  netfilter: Remove now-redundant smp_read_barrier_depends()
  keyring: Remove now-redundant smp_read_barrier_depends()
  drivers/infiniband: Remove now-redundant smp_read_barrier_depends()
  drivers/dma/ioat: Remove now-redundant smp_read_barrier_depends()
  doc: De-emphasize smp_read_barrier_depends
  genetlink: Remove smp_read_barrier_depends() from comment
  netlink: Remove smp_read_barrier_depends() from comment
  checkpatch: Add warnings for {smp_,}read_barrier_depends()
  drivers/vhost: Remove now-redundant read_barrier_depends()
  rcu: Add comment giving debug strategy for double call_rcu()
  torture: Reduce #ifdefs for 

Re: [PATCH 0/3] Ktest: add email support

2018-01-02 Thread Tim Tianyang Chen
Hi Steve, did your mailer find all the patches? I made sure they all 
reply to the same mail ID this time.


Thanks,
Tim

On 12/15/2017 03:20 PM, Tim Tianyang Chen wrote:

This patch set will let users define a mailer, an email address and when to 
receive
notifications during automated testings. Users need to setup the specified 
mailer
prior to using this feature.

Tim Tianyang Chen (3):
   Ktest: add email support
   Ktest: use dodie for critical falures
   Ktest: add email options to sample.config

  ktest.pl| 131 +---
  sample.conf |  10 +
  2 files changed, 109 insertions(+), 32 deletions(-)





Re: [PATCH 0/3] Ktest: add email support

2018-01-02 Thread Tim Tianyang Chen
Hi Steve, did your mailer find all the patches? I made sure they all 
reply to the same mail ID this time.


Thanks,
Tim

On 12/15/2017 03:20 PM, Tim Tianyang Chen wrote:

This patch set will let users define a mailer, an email address and when to 
receive
notifications during automated testings. Users need to setup the specified 
mailer
prior to using this feature.

Tim Tianyang Chen (3):
   Ktest: add email support
   Ktest: use dodie for critical falures
   Ktest: add email options to sample.config

  ktest.pl| 131 +---
  sample.conf |  10 +
  2 files changed, 109 insertions(+), 32 deletions(-)





Re: [PATCH v2 0/4] Address error and recovery for AER and DPC

2018-01-02 Thread Sinan Kaya
On 1/2/2018 2:02 PM, Bjorn Helgaas wrote:
> I read that as suggesting that we should enable DPC support in Linux
> if and only if we also enable AER.  But I don't see anything in DPC
> that looks like that.  Should there be something there?  Should DPC be
> restructured so it's enabled and handled inside the AER driver instead
> of being a separate driver?

I think Keith posted a patch to do this. If firmware first is enabled, DPC
init is skipped after his patch.

Oza was able to plumb the DPC handling into error recovery callbacks of
the portdrv since the portdrv layer already provides this facilities such
as reset_link and resume.

The way DPC and AER works is almost identical from AER portdrv perspective.

I really like his plumbing. Putting DPC code into AER makes it more
convoluted in my opinion.

-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
Foundation Collaborative Project.


Re: [PATCH v2 0/4] Address error and recovery for AER and DPC

2018-01-02 Thread Sinan Kaya
On 1/2/2018 2:02 PM, Bjorn Helgaas wrote:
> I read that as suggesting that we should enable DPC support in Linux
> if and only if we also enable AER.  But I don't see anything in DPC
> that looks like that.  Should there be something there?  Should DPC be
> restructured so it's enabled and handled inside the AER driver instead
> of being a separate driver?

I think Keith posted a patch to do this. If firmware first is enabled, DPC
init is skipped after his patch.

Oza was able to plumb the DPC handling into error recovery callbacks of
the portdrv since the portdrv layer already provides this facilities such
as reset_link and resume.

The way DPC and AER works is almost identical from AER portdrv perspective.

I really like his plumbing. Putting DPC code into AER makes it more
convoluted in my opinion.

-- 
Sinan Kaya
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux 
Foundation Collaborative Project.


Re: [PATCH v2 0/4] Address error and recovery for AER and DPC

2018-01-02 Thread Keith Busch
On Tue, Jan 02, 2018 at 01:02:15PM -0600, Bjorn Helgaas wrote:
> On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote:
> > This patch set brings in support for DPC and AER to co-exist and not to
> > race for recovery.
> > 
> > The current implementation of AER and error message broadcasting to the
> > EP driver is tightly coupled and limited to AER service driver.
> > It is important to factor out broadcasting and other link handling
> > callbacks. So that not only when AER gets triggered, but also when DPC get
> > triggered, or both get triggered simultaneously (for e.g. ERR_FATAL),
> > callbacks are handled appropriately.
> > having modularized the code, the race between AER and DPC is handled
> > gracefully.
> > for e.g. when DPC is active and kicked in, AER should not attempt to do
> > recovery, because DPC takes care of it.
> 
> High-level question:
> 
> We have some convoluted code in negotiate_os_control() and
> aer_service_init() that (I think) essentially disables AER unless the
> platform firmware grants us permission to use it.
> 
> The last implementation note in PCIe r3.1, sec 6.2.10 says
> 
>   DPC may be controlled in some configurations by platform firmware
>   and in other configurations by the operating system. DPC
>   functionality is strongly linked with the functionality in Advanced
>   Error Reporting. To avoid conflicts over whether platform firmware
>   or the operating system have control of DPC, it is recommended that
>   platform firmware and operating systems always link the control of
>   DPC to the control of Advanced Error Reporting.
> 
> I read that as suggesting that we should enable DPC support in Linux
> if and only if we also enable AER.  But I don't see anything in DPC
> that looks like that.  Should there be something there?  Should DPC be
> restructured so it's enabled and handled inside the AER driver instead
> of being a separate driver?

Yes, I agree the two should be linked. I submitted a patch for that here,
though driver responsibilities are still separate in this series:

  https://marc.info/?l=linux-pci=151371742225111=2



Re: [PATCH v2 0/4] Address error and recovery for AER and DPC

2018-01-02 Thread Keith Busch
On Tue, Jan 02, 2018 at 01:02:15PM -0600, Bjorn Helgaas wrote:
> On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote:
> > This patch set brings in support for DPC and AER to co-exist and not to
> > race for recovery.
> > 
> > The current implementation of AER and error message broadcasting to the
> > EP driver is tightly coupled and limited to AER service driver.
> > It is important to factor out broadcasting and other link handling
> > callbacks. So that not only when AER gets triggered, but also when DPC get
> > triggered, or both get triggered simultaneously (for e.g. ERR_FATAL),
> > callbacks are handled appropriately.
> > having modularized the code, the race between AER and DPC is handled
> > gracefully.
> > for e.g. when DPC is active and kicked in, AER should not attempt to do
> > recovery, because DPC takes care of it.
> 
> High-level question:
> 
> We have some convoluted code in negotiate_os_control() and
> aer_service_init() that (I think) essentially disables AER unless the
> platform firmware grants us permission to use it.
> 
> The last implementation note in PCIe r3.1, sec 6.2.10 says
> 
>   DPC may be controlled in some configurations by platform firmware
>   and in other configurations by the operating system. DPC
>   functionality is strongly linked with the functionality in Advanced
>   Error Reporting. To avoid conflicts over whether platform firmware
>   or the operating system have control of DPC, it is recommended that
>   platform firmware and operating systems always link the control of
>   DPC to the control of Advanced Error Reporting.
> 
> I read that as suggesting that we should enable DPC support in Linux
> if and only if we also enable AER.  But I don't see anything in DPC
> that looks like that.  Should there be something there?  Should DPC be
> restructured so it's enabled and handled inside the AER driver instead
> of being a separate driver?

Yes, I agree the two should be linked. I submitted a patch for that here,
though driver responsibilities are still separate in this series:

  https://marc.info/?l=linux-pci=151371742225111=2



Re: [PATCH v4 16/19] fs: only set S_VERSION when updating times if necessary

2018-01-02 Thread Jeff Layton
On Tue, 2018-01-02 at 17:50 +0100, Jan Kara wrote:
> On Fri 22-12-17 07:05:53, Jeff Layton wrote:
> > From: Jeff Layton 
> > 
> > We only really need to update i_version if someone has queried for it
> > since we last incremented it. By doing that, we can avoid having to
> > update the inode if the times haven't changed.
> > 
> > If the times have changed, then we go ahead and forcibly increment the
> > counter, under the assumption that we'll be going to the storage
> > anyway, and the increment itself is relatively cheap.
> > 
> > Signed-off-by: Jeff Layton 
> > ---
> >  fs/inode.c | 10 +++---
> >  1 file changed, 7 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/inode.c b/fs/inode.c
> > index 19e72f500f71..2fa920188759 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -1635,17 +1635,21 @@ static int relatime_need_update(const struct path 
> > *path, struct inode *inode,
> >  int generic_update_time(struct inode *inode, struct timespec *time, int 
> > flags)
> >  {
> > int iflags = I_DIRTY_TIME;
> > +   bool dirty = false;
> >  
> > if (flags & S_ATIME)
> > inode->i_atime = *time;
> > if (flags & S_VERSION)
> > -   inode_inc_iversion(inode);
> > +   dirty |= inode_maybe_inc_iversion(inode, dirty);
> > if (flags & S_CTIME)
> > inode->i_ctime = *time;
> > if (flags & S_MTIME)
> > inode->i_mtime = *time;
> > +   if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
> > +   !(inode->i_sb->s_flags & SB_LAZYTIME))
> > +   dirty = true;
> 
> When you pass 'dirty' to inode_maybe_inc_iversion(), it is always false.
> Maybe this condition should be at the beginning of the function? Once you
> fix that the patch looks good so you can add:
> 
> Reviewed-by: Jan Kara 
> 

Thanks for the review! I've fixed it in my tree. I'll not re-post the
set unless I have to make another significant change or someone requests
it.

I did make one other change, and that was to drop the "const" qualifiers
on the integer arguments in the new API. David Howells pointed out that
they don't really help anything, and the prototypes look cleaner without
them.

This set is now in linux-next as well, so I'm going to try to get this
merged into v4.16, assuming no problems between now and the merge
window.
-- 
Jeff Layton 


Re: [PATCH v4 16/19] fs: only set S_VERSION when updating times if necessary

2018-01-02 Thread Jeff Layton
On Tue, 2018-01-02 at 17:50 +0100, Jan Kara wrote:
> On Fri 22-12-17 07:05:53, Jeff Layton wrote:
> > From: Jeff Layton 
> > 
> > We only really need to update i_version if someone has queried for it
> > since we last incremented it. By doing that, we can avoid having to
> > update the inode if the times haven't changed.
> > 
> > If the times have changed, then we go ahead and forcibly increment the
> > counter, under the assumption that we'll be going to the storage
> > anyway, and the increment itself is relatively cheap.
> > 
> > Signed-off-by: Jeff Layton 
> > ---
> >  fs/inode.c | 10 +++---
> >  1 file changed, 7 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/inode.c b/fs/inode.c
> > index 19e72f500f71..2fa920188759 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -1635,17 +1635,21 @@ static int relatime_need_update(const struct path 
> > *path, struct inode *inode,
> >  int generic_update_time(struct inode *inode, struct timespec *time, int 
> > flags)
> >  {
> > int iflags = I_DIRTY_TIME;
> > +   bool dirty = false;
> >  
> > if (flags & S_ATIME)
> > inode->i_atime = *time;
> > if (flags & S_VERSION)
> > -   inode_inc_iversion(inode);
> > +   dirty |= inode_maybe_inc_iversion(inode, dirty);
> > if (flags & S_CTIME)
> > inode->i_ctime = *time;
> > if (flags & S_MTIME)
> > inode->i_mtime = *time;
> > +   if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
> > +   !(inode->i_sb->s_flags & SB_LAZYTIME))
> > +   dirty = true;
> 
> When you pass 'dirty' to inode_maybe_inc_iversion(), it is always false.
> Maybe this condition should be at the beginning of the function? Once you
> fix that the patch looks good so you can add:
> 
> Reviewed-by: Jan Kara 
> 

Thanks for the review! I've fixed it in my tree. I'll not re-post the
set unless I have to make another significant change or someone requests
it.

I did make one other change, and that was to drop the "const" qualifiers
on the integer arguments in the new API. David Howells pointed out that
they don't really help anything, and the prototypes look cleaner without
them.

This set is now in linux-next as well, so I'm going to try to get this
merged into v4.16, assuming no problems between now and the merge
window.
-- 
Jeff Layton 


Re: [PATCH v2 0/4] Address error and recovery for AER and DPC

2018-01-02 Thread Bjorn Helgaas
On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote:
> This patch set brings in support for DPC and AER to co-exist and not to
> race for recovery.
> 
> The current implementation of AER and error message broadcasting to the
> EP driver is tightly coupled and limited to AER service driver.
> It is important to factor out broadcasting and other link handling
> callbacks. So that not only when AER gets triggered, but also when DPC get
> triggered, or both get triggered simultaneously (for e.g. ERR_FATAL),
> callbacks are handled appropriately.
> having modularized the code, the race between AER and DPC is handled
> gracefully.
> for e.g. when DPC is active and kicked in, AER should not attempt to do
> recovery, because DPC takes care of it.

High-level question:

We have some convoluted code in negotiate_os_control() and
aer_service_init() that (I think) essentially disables AER unless the
platform firmware grants us permission to use it.

The last implementation note in PCIe r3.1, sec 6.2.10 says

  DPC may be controlled in some configurations by platform firmware
  and in other configurations by the operating system. DPC
  functionality is strongly linked with the functionality in Advanced
  Error Reporting. To avoid conflicts over whether platform firmware
  or the operating system have control of DPC, it is recommended that
  platform firmware and operating systems always link the control of
  DPC to the control of Advanced Error Reporting.

I read that as suggesting that we should enable DPC support in Linux
if and only if we also enable AER.  But I don't see anything in DPC
that looks like that.  Should there be something there?  Should DPC be
restructured so it's enabled and handled inside the AER driver instead
of being a separate driver?

Bjorn


Re: [PATCH v2 0/4] Address error and recovery for AER and DPC

2018-01-02 Thread Bjorn Helgaas
On Fri, Dec 29, 2017 at 12:54:15PM +0530, Oza Pawandeep wrote:
> This patch set brings in support for DPC and AER to co-exist and not to
> race for recovery.
> 
> The current implementation of AER and error message broadcasting to the
> EP driver is tightly coupled and limited to AER service driver.
> It is important to factor out broadcasting and other link handling
> callbacks. So that not only when AER gets triggered, but also when DPC get
> triggered, or both get triggered simultaneously (for e.g. ERR_FATAL),
> callbacks are handled appropriately.
> having modularized the code, the race between AER and DPC is handled
> gracefully.
> for e.g. when DPC is active and kicked in, AER should not attempt to do
> recovery, because DPC takes care of it.

High-level question:

We have some convoluted code in negotiate_os_control() and
aer_service_init() that (I think) essentially disables AER unless the
platform firmware grants us permission to use it.

The last implementation note in PCIe r3.1, sec 6.2.10 says

  DPC may be controlled in some configurations by platform firmware
  and in other configurations by the operating system. DPC
  functionality is strongly linked with the functionality in Advanced
  Error Reporting. To avoid conflicts over whether platform firmware
  or the operating system have control of DPC, it is recommended that
  platform firmware and operating systems always link the control of
  DPC to the control of Advanced Error Reporting.

I read that as suggesting that we should enable DPC support in Linux
if and only if we also enable AER.  But I don't see anything in DPC
that looks like that.  Should there be something there?  Should DPC be
restructured so it's enabled and handled inside the AER driver instead
of being a separate driver?

Bjorn


Re: [PATCH 01/33] clk_ops: change round_rate() to return unsigned long

2018-01-02 Thread Stephen Boyd
On 12/31, Bryan O'Donoghue wrote:
> On 30/12/17 16:36, Mikko Perttunen wrote:
> >FWIW, we had this problem some years ago with the Tegra CPU clock
> >- then it was determined that a simpler solution was to have the
> >determine_rate callback support unsigned long rates - so clock
> >drivers that need to return rates higher than 2^31 can instead
> >implement the determine_rate callback. That is what's currently
> >implemented.
> >
> >Mikko
> 
> Granted we could work around it but, having both zero and less than
> zero indicate error means you can't support larger than LONG_MAX
> which is I think worth fixing.
> 

Ok. But can you implement the determine_rate op instead of the
round_rate op for your clk? It's not a work-around, it's the
preferred solution. That would allow rates larger than 2^31 for
the clk without pushing through a change to all the drivers to
express zero as "error" and non-zero as the rounded rate.

I'm not entirely opposed to this approach, because we probably
don't care to pass the particular error value from a clk provider
to a clk consumer about what the error is. It's actually what we
proposed as the solution for clk_round_rate() to return values
larger than LONG_MAX to consumers. But doing that consumer API
change or this provider side change is going to require us to
evaluate all the consumers of these clks to make sure they don't
check for some error value that's less than zero. This series
does half the work, by changing the provider side, while ignoring
the consumer side and any potential fallout of the less than zero
to zero return value change.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH 01/33] clk_ops: change round_rate() to return unsigned long

2018-01-02 Thread Stephen Boyd
On 12/31, Bryan O'Donoghue wrote:
> On 30/12/17 16:36, Mikko Perttunen wrote:
> >FWIW, we had this problem some years ago with the Tegra CPU clock
> >- then it was determined that a simpler solution was to have the
> >determine_rate callback support unsigned long rates - so clock
> >drivers that need to return rates higher than 2^31 can instead
> >implement the determine_rate callback. That is what's currently
> >implemented.
> >
> >Mikko
> 
> Granted we could work around it but, having both zero and less than
> zero indicate error means you can't support larger than LONG_MAX
> which is I think worth fixing.
> 

Ok. But can you implement the determine_rate op instead of the
round_rate op for your clk? It's not a work-around, it's the
preferred solution. That would allow rates larger than 2^31 for
the clk without pushing through a change to all the drivers to
express zero as "error" and non-zero as the rounded rate.

I'm not entirely opposed to this approach, because we probably
don't care to pass the particular error value from a clk provider
to a clk consumer about what the error is. It's actually what we
proposed as the solution for clk_round_rate() to return values
larger than LONG_MAX to consumers. But doing that consumer API
change or this provider side change is going to require us to
evaluate all the consumers of these clks to make sure they don't
check for some error value that's less than zero. This series
does half the work, by changing the provider side, while ignoring
the consumer side and any potential fallout of the less than zero
to zero return value change.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


[PATCH] alpha: fix crash if pthread_create races with signal delivery

2018-01-02 Thread Mikulas Patocka
On alpha, a process will crash if it attempts to start a thread and a
signal is delivered at the same time. The crash can be reproduced with
this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html

The reason for the crash is this:
* we call the clone syscall
* we go to the function copy_process
* copy process calls copy_thread_tls, it is a wrapper around copy_thread
* copy_thread sets the tls pointer: childti->pcb.unique = regs->r20
* copy_thread sets regs->r20 to zero
* we go back to copy_process
* copy process checks "if (signal_pending(current))" and returns
  -ERESTARTNOINTR
* the clone syscall is restarted, but this time, regs->r20 is zero, so
  the new thread is created with zero tls pointer
* the new thread crashes in start_thread when attempting to access tls

The comment in the code says that setting the register r20 is some
compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so
we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug
by zeroing regs->r20 only if CLONE_SETTLS is not set.

Signed-off-by: Mikulas Patocka 
Cc: sta...@vger.kernel.org

---
 arch/alpha/kernel/process.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux-stable/arch/alpha/kernel/process.c
===
--- linux-stable.orig/arch/alpha/kernel/process.c   2017-12-31 
17:42:12.0 +0100
+++ linux-stable/arch/alpha/kernel/process.c2018-01-02 18:06:24.0 
+0100
@@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, u
   application calling fork.  */
if (clone_flags & CLONE_SETTLS)
childti->pcb.unique = regs->r20;
+   else
+   regs->r20 = 0;  /* OSF/1 has some strange fork() semantics.  */
childti->pcb.usp = usp ?: rdusp();
*childregs = *regs;
childregs->r0 = 0;
childregs->r19 = 0;
childregs->r20 = 1; /* OSF/1 has some strange fork() semantics.  */
-   regs->r20 = 0;
stack = ((struct switch_stack *) regs) - 1;
*childstack = *stack;
childstack->r26 = (unsigned long) ret_from_fork;


[PATCH] alpha: fix crash if pthread_create races with signal delivery

2018-01-02 Thread Mikulas Patocka
On alpha, a process will crash if it attempts to start a thread and a
signal is delivered at the same time. The crash can be reproduced with
this program: https://cygwin.com/ml/cygwin/2014-11/msg00473.html

The reason for the crash is this:
* we call the clone syscall
* we go to the function copy_process
* copy process calls copy_thread_tls, it is a wrapper around copy_thread
* copy_thread sets the tls pointer: childti->pcb.unique = regs->r20
* copy_thread sets regs->r20 to zero
* we go back to copy_process
* copy process checks "if (signal_pending(current))" and returns
  -ERESTARTNOINTR
* the clone syscall is restarted, but this time, regs->r20 is zero, so
  the new thread is created with zero tls pointer
* the new thread crashes in start_thread when attempting to access tls

The comment in the code says that setting the register r20 is some
compatibility with OSF/1. But OSF/1 doesn't use the CLONE_SETTLS flag, so
we don't have to zero r20 if CLONE_SETTLS is set. This patch fixes the bug
by zeroing regs->r20 only if CLONE_SETTLS is not set.

Signed-off-by: Mikulas Patocka 
Cc: sta...@vger.kernel.org

---
 arch/alpha/kernel/process.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

Index: linux-stable/arch/alpha/kernel/process.c
===
--- linux-stable.orig/arch/alpha/kernel/process.c   2017-12-31 
17:42:12.0 +0100
+++ linux-stable/arch/alpha/kernel/process.c2018-01-02 18:06:24.0 
+0100
@@ -265,12 +265,13 @@ copy_thread(unsigned long clone_flags, u
   application calling fork.  */
if (clone_flags & CLONE_SETTLS)
childti->pcb.unique = regs->r20;
+   else
+   regs->r20 = 0;  /* OSF/1 has some strange fork() semantics.  */
childti->pcb.usp = usp ?: rdusp();
*childregs = *regs;
childregs->r0 = 0;
childregs->r19 = 0;
childregs->r20 = 1; /* OSF/1 has some strange fork() semantics.  */
-   regs->r20 = 0;
stack = ((struct switch_stack *) regs) - 1;
*childstack = *stack;
childstack->r26 = (unsigned long) ret_from_fork;


Re: pci driver loads right after unload

2018-01-02 Thread Bjorn Helgaas
[+cc Greg, linux-kernel]

Hi Max,

Thanks for the report!

On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote:
> hi all,
> I encountered a strange phenomena using 2 different pci drivers
> (nvme and mlx5_core) since 4.15-rc1:
> when I try to unload the modules using "modprobe -r" cmd it calls
> the .probe function right after calling the .remove function and the
> module is not realy unloaded.
> I think there is some race condition because when I added a
> msleep(1000) after "pci_unregister_driver(_driver);" (in the
> nvme module testing, it also worked in the mlx5_core), the issue
> seems to dissapear.

You say "since 4.15-rc1".  Does that mean it's a regression?  If so,
what's the most recent kernel that does not have this problem?  Worst
case, you could bisect to find where it broke.

I don't see anything obvious in the drivers/pci changes between v4.14
and v4.15-rc1.  Module loading and driver binding is mostly driven by
the driver core and udev.  Maybe you could learn something with
"udevadm monitor" or by turning on the some of the debug in
lib/kobject_uevent.c?

Bjorn


Re: pci driver loads right after unload

2018-01-02 Thread Bjorn Helgaas
[+cc Greg, linux-kernel]

Hi Max,

Thanks for the report!

On Tue, Jan 02, 2018 at 01:50:23AM +0200, Max Gurtovoy wrote:
> hi all,
> I encountered a strange phenomena using 2 different pci drivers
> (nvme and mlx5_core) since 4.15-rc1:
> when I try to unload the modules using "modprobe -r" cmd it calls
> the .probe function right after calling the .remove function and the
> module is not realy unloaded.
> I think there is some race condition because when I added a
> msleep(1000) after "pci_unregister_driver(_driver);" (in the
> nvme module testing, it also worked in the mlx5_core), the issue
> seems to dissapear.

You say "since 4.15-rc1".  Does that mean it's a regression?  If so,
what's the most recent kernel that does not have this problem?  Worst
case, you could bisect to find where it broke.

I don't see anything obvious in the drivers/pci changes between v4.14
and v4.15-rc1.  Module loading and driver binding is mostly driven by
the driver core and udev.  Maybe you could learn something with
"udevadm monitor" or by turning on the some of the debug in
lib/kobject_uevent.c?

Bjorn


perf test BPF failing on 4.15.0-rc6

2018-01-02 Thread Arnaldo Carvalho de Melo
Hi Wang,

I just updated my machine to Fedora 27 and 4.15.0-rc6 and the
only test failing for me is:

[root@jouet linux]# perf test bpf
39: BPF filter:
39.1: Basic BPF filtering : FAILED!
39.2: BPF pinning : Skip
39.3: BPF prologue generation : Skip
39.4: BPF relocation checker  : Skip
[root@jouet linux]#

I haven't checked but perhaps the problem is that SyS_epoll_wait seems
to now be inlined in three places and perhaps the eBPF proggie is being
added to just one of them?

Seemingly relevant excerpt:

Open Debuginfo file: /lib/modules/4.15.0-rc6/build/vmlinux
Try to find probe point from debuginfo.
Matched function: SyS_epoll_wait [2f40eb7]
found inline addr: 0x812b6ff1
Probe point found: compat_SyS_epoll_pwait+129
found inline addr: 0x812b6de7
Probe point found: SyS_epoll_pwait+135
found inline addr: 0x812b6c80
Probe point found: SyS_epoll_wait+0
Found 3 probe_trace_events.

- Arnaldo

P.S.: Full -v output:

[root@jouet linux]# uname -a
Linux jouet 4.15.0-rc6 #4 SMP Tue Jan 2 14:30:53 -03 2018 x86_64 x86_64 x86_64 
GNU/Linux
[root@jouet linux]# gcc -v
Using built-in specs.
COLLECT_GCC=/usr/bin/gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/7/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-redhat-linux
Configured with: ../configure --enable-bootstrap 
--enable-languages=c,c++,objc,obj-c++,fortran,ada,go,lto --prefix=/usr 
--mandir=/usr/share/man --infodir=/usr/share/info 
--with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared 
--enable-threads=posix --enable-checking=release --enable-multilib 
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions 
--enable-gnu-unique-object --enable-linker-build-id 
--with-gcc-major-version-only --with-linker-hash-style=gnu --enable-plugin 
--enable-initfini-array --with-isl --enable-libmpx 
--enable-offload-targets=nvptx-none --without-cuda-driver 
--enable-gnu-indirect-function --with-tune=generic --with-arch_32=i686 
--build=x86_64-redhat-linux
Thread model: posix
gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC) 
[root@jouet linux]# clang -v
clang version 6.0.0 (http://llvm.org/git/clang.git 
56cc8f8880db2ebc433eeb6b6a707c101467a186) (http://llvm.org/git/llvm.git 
3656d83960a4f3fedf6d8f19043abf52379f78c3)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /usr/local/bin
Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7
Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Selected multilib: .;@m64
[root@jouet linux]# perf test -v BPF
39: BPF filter:
39.1: Basic BPF filtering :
--- start ---
test child forked, pid 24304
Kernel build dir is set to /lib/modules/4.15.0-rc6/build
set env: KBUILD_DIR=/lib/modules/4.15.0-rc6/build
unset env: KBUILD_OPTS
include option is set to  -nostdinc -isystem 
/usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h 
set env: NR_CPUS=4
set env: LINUX_VERSION_CODE=0x40f00
set env: CLANG_EXEC=/usr/local/bin/clang
set env: CLANG_OPTIONS=-xc 
set env: KERNEL_INC_OPTIONS= -nostdinc -isystem 
/usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h 
set env: WORKING_DIR=/lib/modules/4.15.0-rc6/build
set env: CLANG_SOURCE=-
llvm compiling command template: echo '/*
 * bpf-script-example.c
 * Test basic LLVM building
 */
#ifndef LINUX_VERSION_CODE
# error Need LINUX_VERSION_CODE
# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" 
into llvm section of ~/.perfconfig'
#endif
#define BPF_ANY 0
#define BPF_MAP_TYPE_ARRAY 2
#define BPF_FUNC_map_lookup_elem 1
#define BPF_FUNC_map_update_elem 2

static void *(*bpf_map_lookup_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_lookup_elem;
static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int 
flags) =
(void *) BPF_FUNC_map_update_elem;

struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};

#define SEC(NAME) __attribute__((section(NAME), used))
struct bpf_map_def SEC("maps") flip_table = {

perf test BPF failing on 4.15.0-rc6

2018-01-02 Thread Arnaldo Carvalho de Melo
Hi Wang,

I just updated my machine to Fedora 27 and 4.15.0-rc6 and the
only test failing for me is:

[root@jouet linux]# perf test bpf
39: BPF filter:
39.1: Basic BPF filtering : FAILED!
39.2: BPF pinning : Skip
39.3: BPF prologue generation : Skip
39.4: BPF relocation checker  : Skip
[root@jouet linux]#

I haven't checked but perhaps the problem is that SyS_epoll_wait seems
to now be inlined in three places and perhaps the eBPF proggie is being
added to just one of them?

Seemingly relevant excerpt:

Open Debuginfo file: /lib/modules/4.15.0-rc6/build/vmlinux
Try to find probe point from debuginfo.
Matched function: SyS_epoll_wait [2f40eb7]
found inline addr: 0x812b6ff1
Probe point found: compat_SyS_epoll_pwait+129
found inline addr: 0x812b6de7
Probe point found: SyS_epoll_pwait+135
found inline addr: 0x812b6c80
Probe point found: SyS_epoll_wait+0
Found 3 probe_trace_events.

- Arnaldo

P.S.: Full -v output:

[root@jouet linux]# uname -a
Linux jouet 4.15.0-rc6 #4 SMP Tue Jan 2 14:30:53 -03 2018 x86_64 x86_64 x86_64 
GNU/Linux
[root@jouet linux]# gcc -v
Using built-in specs.
COLLECT_GCC=/usr/bin/gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-redhat-linux/7/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-redhat-linux
Configured with: ../configure --enable-bootstrap 
--enable-languages=c,c++,objc,obj-c++,fortran,ada,go,lto --prefix=/usr 
--mandir=/usr/share/man --infodir=/usr/share/info 
--with-bugurl=http://bugzilla.redhat.com/bugzilla --enable-shared 
--enable-threads=posix --enable-checking=release --enable-multilib 
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions 
--enable-gnu-unique-object --enable-linker-build-id 
--with-gcc-major-version-only --with-linker-hash-style=gnu --enable-plugin 
--enable-initfini-array --with-isl --enable-libmpx 
--enable-offload-targets=nvptx-none --without-cuda-driver 
--enable-gnu-indirect-function --with-tune=generic --with-arch_32=i686 
--build=x86_64-redhat-linux
Thread model: posix
gcc version 7.2.1 20170915 (Red Hat 7.2.1-2) (GCC) 
[root@jouet linux]# clang -v
clang version 6.0.0 (http://llvm.org/git/clang.git 
56cc8f8880db2ebc433eeb6b6a707c101467a186) (http://llvm.org/git/llvm.git 
3656d83960a4f3fedf6d8f19043abf52379f78c3)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /usr/local/bin
Found candidate GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7
Selected GCC installation: /usr/lib/gcc/x86_64-redhat-linux/7
Candidate multilib: .;@m64
Candidate multilib: 32;@m32
Selected multilib: .;@m64
[root@jouet linux]# perf test -v BPF
39: BPF filter:
39.1: Basic BPF filtering :
--- start ---
test child forked, pid 24304
Kernel build dir is set to /lib/modules/4.15.0-rc6/build
set env: KBUILD_DIR=/lib/modules/4.15.0-rc6/build
unset env: KBUILD_OPTS
include option is set to  -nostdinc -isystem 
/usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h 
set env: NR_CPUS=4
set env: LINUX_VERSION_CODE=0x40f00
set env: CLANG_EXEC=/usr/local/bin/clang
set env: CLANG_OPTIONS=-xc 
set env: KERNEL_INC_OPTIONS= -nostdinc -isystem 
/usr/lib/gcc/x86_64-redhat-linux/7/include 
-I/home/acme/git/linux/arch/x86/include -I./arch/x86/include/generated  
-I/home/acme/git/linux/include -I./include 
-I/home/acme/git/linux/arch/x86/include/uapi 
-I./arch/x86/include/generated/uapi -I/home/acme/git/linux/include/uapi 
-I./include/generated/uapi -include 
/home/acme/git/linux/include/linux/kconfig.h 
set env: WORKING_DIR=/lib/modules/4.15.0-rc6/build
set env: CLANG_SOURCE=-
llvm compiling command template: echo '/*
 * bpf-script-example.c
 * Test basic LLVM building
 */
#ifndef LINUX_VERSION_CODE
# error Need LINUX_VERSION_CODE
# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" 
into llvm section of ~/.perfconfig'
#endif
#define BPF_ANY 0
#define BPF_MAP_TYPE_ARRAY 2
#define BPF_FUNC_map_lookup_elem 1
#define BPF_FUNC_map_update_elem 2

static void *(*bpf_map_lookup_elem)(void *map, void *key) =
(void *) BPF_FUNC_map_lookup_elem;
static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int 
flags) =
(void *) BPF_FUNC_map_update_elem;

struct bpf_map_def {
unsigned int type;
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
};

#define SEC(NAME) __attribute__((section(NAME), used))
struct bpf_map_def SEC("maps") flip_table = {

<    1   2   3   4   5   6   7   8   9   10   >