date:20170628

[PATCH] debugfs: Add dummy implementation of few helpers

2017-06-28 Thread Viresh Kumar

This adds (missing) dummy implementations of
debugfs_create_file_unsafe() and debugfs_create_ulong().

Signed-off-by: Viresh Kumar 
---
 include/linux/debugfs.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 9174b0d28582..222892287eb8 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -196,6 +196,14 @@ static inline struct dentry *debugfs_create_file(const 
char *name, umode_t mode,
return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_file_unsafe(const char *name,
+   umode_t mode, struct dentry *parent,
+   void *data,
+   const struct file_operations *fops)
+{
+   return ERR_PTR(-ENODEV);
+}
+
 static inline struct dentry *debugfs_create_file_size(const char *name, 
umode_t mode,
struct dentry *parent, void *data,
const struct file_operations *fops,
@@ -289,6 +297,14 @@ static inline struct dentry *debugfs_create_u64(const char 
*name, umode_t mode,
return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_ulong(const char *name,
+   umode_t mode,
+   struct dentry *parent,
+   unsigned long *value)
+{
+   return ERR_PTR(-ENODEV);
+}
+
 static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode,
   struct dentry *parent,
   u8 *value)
-- 
2.13.0.71.gd7076ec9c9cb

Re: [RFC 4/5] drivers: boot_constraint: Add debugfs support

2017-06-28 Thread Viresh Kumar

On 28-06-17, 08:46, Randy Dunlap wrote:
> On 06/28/2017 03:26 AM, Viresh Kumar wrote:
> > This patch adds debugfs support for boot constraints. This is how it
> > looks for a "vmmc-supply" constraint for the MMC device.
> > 
> 
> Hi,
> Does this build OK when DEBUG_FS is not enabled in kernel .config?

TBH, I haven't tried this earlier as I believed dummy implementation
of all the debugfs helpers is available.

It needs the following debugfs patch (that I sent just now), which
adds the dummy implementation of debugfs_create_ulong().

https://marc.info/?l=linux-kernel=149870936319587

-- 
viresh

[PATCH] aic7xxx: fix firmware build with O=path

2017-06-28 Thread Jakub Kicinski

Building firmware with O=path was apparently broken in aic7 for ever.
Message of the previous commit to the Makefile (from 2008) mentions
this unfortunate state of affairs already.  Fix this, mostly to make
randconfig builds more reliable.

Signed-off-by: Jakub Kicinski 
---
 drivers/scsi/aic7xxx/Makefile| 12 
 drivers/scsi/aic7xxx/aicasm/Makefile | 53 
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/drivers/scsi/aic7xxx/Makefile b/drivers/scsi/aic7xxx/Makefile
index 741d81861d17..07b60a780c06 100644
--- a/drivers/scsi/aic7xxx/Makefile
+++ b/drivers/scsi/aic7xxx/Makefile
@@ -55,9 +55,9 @@ aicasm-7xxx-opts-$(CONFIG_AIC7XXX_REG_PRETTY_PRINT) := \
 
 ifeq ($(CONFIG_AIC7XXX_BUILD_FIRMWARE),y)
 $(obj)/aic7xxx_seq.h: $(src)/aic7xxx.seq $(src)/aic7xxx.reg 
$(obj)/aicasm/aicasm
-   $(obj)/aicasm/aicasm -I$(src) -r $(obj)/aic7xxx_reg.h \
+   $(obj)/aicasm/aicasm -I$(srctree)/$(src) -r $(obj)/aic7xxx_reg.h \
  $(aicasm-7xxx-opts-y) -o $(obj)/aic7xxx_seq.h \
- $(src)/aic7xxx.seq
+ $(srctree)/$(src)/aic7xxx.seq
 
 $(aic7xxx-gen-y): $(obj)/aic7xxx_seq.h
 else
@@ -72,14 +72,14 @@ aicasm-79xx-opts-$(CONFIG_AIC79XX_REG_PRETTY_PRINT) := \
 
 ifeq ($(CONFIG_AIC79XX_BUILD_FIRMWARE),y)
 $(obj)/aic79xx_seq.h: $(src)/aic79xx.seq $(src)/aic79xx.reg 
$(obj)/aicasm/aicasm
-   $(obj)/aicasm/aicasm -I$(src) -r $(obj)/aic79xx_reg.h \
+   $(obj)/aicasm/aicasm -I$(srctree)/$(src) -r $(obj)/aic79xx_reg.h \
  $(aicasm-79xx-opts-y) -o $(obj)/aic79xx_seq.h \
- $(src)/aic79xx.seq
+ $(srctree)/$(src)/aic79xx.seq
 
 $(aic79xx-gen-y): $(obj)/aic79xx_seq.h
 else
 $(obj)/aic79xx_reg_print.c: $(src)/aic79xx_reg_print.c_shipped
 endif
 
-$(obj)/aicasm/aicasm: $(src)/aicasm/*.[chyl]
-   $(MAKE) -C $(src)/aicasm
+$(obj)/aicasm/aicasm: $(srctree)/$(src)/aicasm/*.[chyl]
+   $(MAKE) -C $(srctree)/$(src)/aicasm OUTDIR=$(shell pwd)/$(obj)/aicasm/
diff --git a/drivers/scsi/aic7xxx/aicasm/Makefile 
b/drivers/scsi/aic7xxx/aicasm/Makefile
index b98c5c1056c3..45e2d49c1fff 100644
--- a/drivers/scsi/aic7xxx/aicasm/Makefile
+++ b/drivers/scsi/aic7xxx/aicasm/Makefile
@@ -1,19 +1,21 @@
 PROG=  aicasm
 
+OUTDIR ?= ./
+
 .SUFFIXES= .l .y .c .h
 
 CSRCS= aicasm.c aicasm_symbol.c
 YSRCS= aicasm_gram.y aicasm_macro_gram.y
 LSRCS= aicasm_scan.l aicasm_macro_scan.l
 
-GENHDRS=   aicdb.h $(YSRCS:.y=.h)
-GENSRCS=   $(YSRCS:.y=.c) $(LSRCS:.l=.c)
+GENHDRS=   $(addprefix ${OUTDIR}/,aicdb.h $(YSRCS:.y=.h))
+GENSRCS=   $(addprefix ${OUTDIR}/,$(YSRCS:.y=.c) $(LSRCS:.l=.c))
 
 SRCS=  ${CSRCS} ${GENSRCS}
 LIBS=  -ldb
 clean-files:= ${GENSRCS} ${GENHDRS} $(YSRCS:.y=.output) $(PROG)
 # Override default kernel CFLAGS.  This is a userland app.
-AICASM_CFLAGS:= -I/usr/include -I.
+AICASM_CFLAGS:= -I/usr/include -I. -I$(OUTDIR)
 LEX= flex
 YACC= bison
 YFLAGS= -d
@@ -32,22 +34,25 @@ YFLAGS+= -t -v
 LFLAGS= -d
 endif
 
-$(PROG):  ${GENHDRS} $(SRCS)
-   $(AICASM_CC) $(AICASM_CFLAGS) $(SRCS) -o $(PROG) $(LIBS)
+$(PROG):  $(OUTDIR) ${GENHDRS} $(SRCS)
+   $(AICASM_CC) $(AICASM_CFLAGS) $(SRCS) -o $(OUTDIR)/$(PROG) $(LIBS)
+
+$(OUTDIR):
+   mkdir -p $(OUTDIR)
 
-aicdb.h:
+$(OUTDIR)/aicdb.h:
@if [ -e "/usr/include/db4/db_185.h" ]; then\
-   echo "#include " > aicdb.h;   \
+   echo "#include " > $@;\
 elif [ -e "/usr/include/db3/db_185.h" ]; then  \
-   echo "#include " > aicdb.h;   \
+   echo "#include " > $@;\
 elif [ -e "/usr/include/db2/db_185.h" ]; then  \
-   echo "#include " > aicdb.h;   \
+   echo "#include " > $@;\
 elif [ -e "/usr/include/db1/db_185.h" ]; then  \
-   echo "#include " > aicdb.h;   \
+   echo "#include " > $@;\
 elif [ -e "/usr/include/db/db_185.h" ]; then   \
-   echo "#include " > aicdb.h;\
+   echo "#include " > $@; \
 elif [ -e "/usr/include/db_185.h" ]; then  \
-   echo "#include " > aicdb.h;   \
+   echo "#include " > $@;\
 else   \
echo "*** Install db development libraries";\
 fi
@@ -58,23 +63,23 @@ $(PROG):  ${GENHDRS} $(SRCS)
 # Create a dependency chain in generated files
 # to avoid concurrent invocations of the single
 # rule that builds them all.
-aicasm_gram.c: aicasm_gram.h
-aicasm_gram.c aicasm_gram.h: aicasm_gram.y
+$(OUTDIR)/aicasm_gram.c: $(OUTDIR)/aicasm_gram.h
+$(OUTDIR)/aicasm_gram.c $(OUTDIR)/aicasm_gram.h: aicasm_gram.y
$(YACC) $(YFLAGS) -b $(<:.y=) $<
-   mv $(<:.y=).tab.c $(<:.y=.c)
-   mv $(<:.y=).tab.h $(<:.y=.h)
+   mv $(<:.y=).tab.c

Re: [kernel-hardening] [PATCH v5 3/3] x86/refcount: Implement fast refcount overflow protection

2017-06-28 Thread Li Kun


Hi Kees,


在 2017/5/31 5:39, Kees Cook 写道:

This protection is a modified version of the x86 PAX_REFCOUNT defense
from PaX/grsecurity. This speeds up the refcount_t API by duplicating
the existing atomic_t implementation with a single instruction added to
detect if the refcount has wrapped past INT_MAX (or below 0) resulting
in a negative value, where the handler then restores the refcount_t to
INT_MAX or saturates to INT_MIN / 2. With this overflow protection, the
use-after-free following a refcount_t wrap is blocked from happening,
avoiding the vulnerability entirely.

While this defense only perfectly protects the overflow case, as that
can be detected and stopped before the reference is freed and left to be
abused by an attacker, it also notices some of the "inc from 0" and "below
0" cases. However, these only indicate that a use-after-free has already
happened. Such notifications are likely avoidable by an attacker that has
already exploited a use-after-free vulnerability, but it's better to have
them than allow such conditions to remain universally silent.

On overflow detection (actually "negative value" detection), the refcount
value is reset to INT_MAX, the offending process is killed, and a report
and stack trace are generated. This allows the system to attempt to
keep operating. In the case of a below-zero decrement or other negative
value results, the refcount is saturated to INT_MIN / 2 to keep it from
reaching zero again. (For the INT_MAX reset, another option would be to
choose (INT_MAX - N) with some small N to provide some headroom for
legitimate users of the reference counter.)

On the matter of races, since the entire range beyond INT_MAX but before 0
is negative, every inc will trap, leaving no overflow-only race condition.

As for performance, this implementation adds a single "js" instruction to
the regular execution flow of a copy of the regular atomic_t operations.
Since this is a forward jump, it is by default the non-predicted path,
which will be reinforced by dynamic branch prediction. The result is
this protection having no measurable change in performance over standard
atomic_t operations. The error path, located in .text.unlikely, saves
the refcount location and then uses UD0 to fire a refcount exception
handler, which resets the refcount, reports the error, marks the process
to be killed, and returns to regular execution. This keeps the changes to
.text size minimal, avoiding return jumps and open-coded calls to the
error reporting routine.

Assembly comparison:

atomic_inc
.text:
81546149:   f0 ff 45 f4 lock incl -0xc(%rbp)

refcount_inc
.text:
81546149:   f0 ff 45 f4 lock incl -0xc(%rbp)
8154614d:   0f 88 80 d5 17 00   js 816c36d3
...
.text.unlikely:
816c36d3:   48 8d 4d f4 lea-0xc(%rbp),%rcx
816c36d7:   0f ff   (bad)

Thanks to PaX Team for various suggestions for improvement.

Signed-off-by: Kees Cook 
Reviewed-by: Josh Poimboeuf 
---
  arch/Kconfig|  9 +
  arch/x86/Kconfig|  1 +
  arch/x86/include/asm/asm.h  |  6 +++
  arch/x86/include/asm/refcount.h | 87 +
  arch/x86/mm/extable.c   | 40 +++
  include/linux/kernel.h  |  6 +++
  include/linux/refcount.h|  4 ++
  kernel/panic.c  | 22 +++
  8 files changed, 175 insertions(+)
  create mode 100644 arch/x86/include/asm/refcount.h

diff --git a/arch/Kconfig b/arch/Kconfig
index fba3bf186728..e9445ac0e899 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -867,6 +867,15 @@ config STRICT_MODULE_RWX
  config ARCH_WANT_RELAX_ORDER
bool
  
+config ARCH_HAS_REFCOUNT

+   bool
+   help
+ An architecture selects this when it has implemented refcount_t
+ using primitizes that provide a faster runtime at the expense
+ of some full refcount state checks. The refcount overflow condition,
+ however, must be retained. Catching overflows is the primary
+ security concern for protecting against bugs in reference counts.
+
  config REFCOUNT_FULL
bool "Perform full reference count validation at the expense of speed"
help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd18994a9555..65525f76b27c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -54,6 +54,7 @@ config X86
select ARCH_HAS_KCOVif X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_APIif X86_64
+   select ARCH_HAS_REFCOUNT
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 7a9df3beb89b..676ee5807d86 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -74,6 +74,9 @@
  # define _ASM_EXTABLE_EX(from, to)

Re: [linux-next][PATCH] usb: dwc3: omap: remove IRQ_NOAUTOEN used with shared irq

2017-06-28 Thread Vignesh R



On Thursday 29 June 2017 05:01 AM, Strashko, Grygorii wrote:
> IRQ_NOAUTOEN can't be used with shared IRQs and Kernel now will triggers
> warning if it happns, since commit 04c848d39879 ("genirq: Warn when
> IRQ_NOAUTOEN is used with shared interrupts"). And this is the case for
> OMAP DWC 3 driver.
> 
> Hence, remove IRQ_NOAUTOEN flag and instead call disable_irq() before
> disabling PM runtime in probe error path handling.

Or, how about requesting the irq at the end of probe after extcon
registration?

> 
> Fixes: 12a7f17fac5b ("usb: dwc3: omap: fix race of pm runtime with...")
> Signed-off-by: Grygorii Strashko 
> ---
>  drivers/usb/dwc3/dwc3-omap.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c
> index 9892650..cd9bef5 100644
> --- a/drivers/usb/dwc3/dwc3-omap.c
> +++ b/drivers/usb/dwc3/dwc3-omap.c
> @@ -512,7 +512,6 @@ static int dwc3_omap_probe(struct platform_device *pdev)
>  
>   /* check the DMA Status */
>   reg = dwc3_omap_readl(omap->base, USBOTGSS_SYSCONFIG);
> - irq_set_status_flags(omap->irq, IRQ_NOAUTOEN);
>   ret = devm_request_threaded_irq(dev, omap->irq, dwc3_omap_interrupt,
>   dwc3_omap_interrupt_thread, IRQF_SHARED,
>   "dwc3-omap", omap);
> @@ -533,10 +532,10 @@ static int dwc3_omap_probe(struct platform_device *pdev)
>   }
>  
>   dwc3_omap_enable_irqs(omap);
> - enable_irq(omap->irq);
>   return 0;
>  
>  err1:
> + disable_irq(omap->irq);
>   pm_runtime_put_sync(dev);
>   pm_runtime_disable(dev);
>  
> 

-- 
Regards
Vignesh

[PATCH] hashtable: remove repeated phrase from a comment

2017-06-28 Thread Jakub Kicinski

"in a rcu enabled hashtable" is repeated twice in a comment.

Signed-off-by: Jakub Kicinski 
---
I'm not sure who would take this :S

 include/linux/hashtable.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h
index 661e5c2a8e2a..082dc1bd0801 100644
--- a/include/linux/hashtable.h
+++ b/include/linux/hashtable.h
@@ -167,7 +167,6 @@ static inline void hash_del_rcu(struct hlist_node *node)
 /**
  * hash_for_each_possible_rcu - iterate over all possible objects hashing to 
the
  * same bucket in an rcu enabled hashtable
- * in a rcu enabled hashtable
  * @name: hashtable to iterate
  * @obj: the type * to use as a loop cursor for each entry
  * @member: the name of the hlist_node within the struct
-- 
2.11.0

Re: [PATCH] cpufreq: Find transition latency dynamically

2017-06-28 Thread Viresh Kumar

On 02-06-17, 16:59, Viresh Kumar wrote:
> The transition_latency_ns represents the maximum time it can take for
> the hardware to switch from/to any frequency for a CPU.
> 
> The transition_latency_ns is used currently for two purposes:
> 
> o To check if the hardware latency is over the maximum allowed for a
>   governor (only for ondemand and conservative (why not schedutil?)) and
>   to decide if the governor can be used or not.
> 
> o To calculate the sampling_rate or rate_limit for the governors by
>   multiplying transition_latency_ns with a constant.
> 
> The platform drivers can also set this value to CPUFREQ_ETERNAL if they
> don't know this number and in that case we disallow use of ondemand and
> conservative governors as the latency would be higher than the maximum
> allowed for the governors.
> 
> In many cases this number is forged by the driver authors to get the
> default sampling rate to a desired value. Anyway, the actual latency
> values can differ from what is received from the hardware designers.
> 
> Over that, what is provided by the drivers is most likely the time it
> takes to change frequency of the hardware, which doesn't account the
> software overhead involved.
> 
> In order to have guarantees about this number, this patch tries to
> calculate the latency dynamically at cpufreq driver registration time by
> first switching to min frequency, then to the max and finally back to
> the initial frequency. And the maximum of all three is used as the
> target_latency. Specifically the time it takes to go from min to max
> frequency (when the software runs the slowest) should be good enough,
> and even if there is a delta involved then it shouldn't be a lot.
> 
> For now this patch limits this feature only for platforms which have set
> the transition latency to CPUFREQ_ETERNAL. Maybe we can convert everyone
> to use it in future, but lets see.
> 
> This is tested over ARM64 Hikey platform which currently sets
> "clock-latency" as 500 us from DT, while with this patch the actualy
> value increased to 800 us.
> 
> Signed-off-by: Viresh Kumar 
> ---
>  drivers/cpufreq/cpufreq.c | 63 
> +++
>  1 file changed, 63 insertions(+)

Hi Rafael,

Any inputs on this one ?

-- 
viresh

Re: [PATCH v6 1/6] pinctrl: qcom: Add ipq8074 pinctrl driver

2017-06-28 Thread Bjorn Andersson

On Fri 09 Jun 02:41 PDT 2017, Varadarajan Narayanan wrote:

> Add initial pinctrl driver to support pin configuration with
> pinctrl framework for ipq8074.
> 
> Acked-by: Rob Herring  (bindings)
> Signed-off-by: Manoharan Vijaya Raghavan 
> Signed-off-by: Varadarajan Narayanan 

Sorry for the delay, I think this looks good.

Acked-by: Bjorn Andersson 

Regards,
Bjorn

Re: [PATCH v6 6/6] arm64: defconfig: Enable qcom ipq8074 clock and pinctrl

2017-06-28 Thread Bjorn Andersson

On Fri 09 Jun 02:41 PDT 2017, Varadarajan Narayanan wrote:

> From: Abhishek Sahu 
> 
> These configs are required for booting kernel in qcom
> ipq8074 boards.
> 
> Signed-off-by: Abhishek Sahu 
> Signed-off-by: Varadarajan Narayanan 

Acked-by: Bjorn Andersson 

Regards,
Bjorn

Re: [PATCH] futex: avoid undefined behaviour when shift exponent is negative

2017-06-28 Thread hpa

On June 28, 2017 7:12:04 PM PDT, zhong jiang  wrote:
>On 2017/6/29 5:43, h...@zytor.com wrote:
>> On June 27, 2017 9:35:10 PM PDT, zhong jiang 
>wrote:
>>> Hi,  Ingo
>>>
>>> Thank you for the comment.
>>> On 2017/6/22 0:40, Ingo Molnar wrote:
 * zhong jiang  wrote:

> when shift expoment is negative, left shift alway zero. therefore,
>>> we
> modify the logic to avoid the warining.
>
> Signed-off-by: zhong jiang 
> ---
>  arch/x86/include/asm/futex.h | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/futex.h
>>> b/arch/x86/include/asm/futex.h
> index b4c1f54..2425fca 100644
> --- a/arch/x86/include/asm/futex.h
> +++ b/arch/x86/include/asm/futex.h
> @@ -49,8 +49,12 @@ static inline int futex_atomic_op_inuser(int
>>> encoded_op, u32 __user *uaddr)
>   int cmparg = (encoded_op << 20) >> 20;
>   int oldval = 0, ret, tem;
>  
> - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
> - oparg = 1 << oparg;
> + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
> + if (oparg >= 0)
> + oparg = 1 << oparg;
> + else
> + oparg = 0;
> + }
 Could we avoid all these complications by using an unsigned type?
>>> I think it is not feasible.  a negative shift exponent is likely
>>> existence and reasonable.
>>>  as the above case,  oparg is a negative is common. 
>>>
>>> I think it can be avoided by following change. 
>>>
>>> diff --git a/arch/x86/include/asm/futex.h
>>> b/arch/x86/include/asm/futex.h
>>> index b4c1f54..3205e86 100644
>>> --- a/arch/x86/include/asm/futex.h
>>> +++ b/arch/x86/include/asm/futex.h
>>> @@ -50,7 +50,7 @@ static inline int futex_atomic_op_inuser(int
>>> encoded_op, u32 __user *uaddr)
>>>int oldval = 0, ret, tem;
>>>
>>>if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
>>> -   oparg = 1 << oparg;
>>> +   oparg = safe_shift(1, oparg);
>>>
>>>if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
>>>return -EFAULT;
>>> diff --git a/drivers/video/fbdev/core/fbmem.c
>>> b/drivers/video/fbdev/core/fbmem.c
>>> index 069fe79..b4edda3 100644
>>> --- a/drivers/video/fbdev/core/fbmem.c
>>> +++ b/drivers/video/fbdev/core/fbmem.c
>>> @@ -190,11 +190,6 @@ char* fb_get_buffer_offset(struct fb_info
>*info,
>>> struct fb_pixmap *buf, u32 size
>>>
>>> #ifdef CONFIG_LOGO
>>>
>>> -static inline unsigned safe_shift(unsigned d, int n)
>>> -{
>>> -   return n < 0 ? d >> -n : d << n;
>>> -}
>>> -
>>> static void fb_set_logocmap(struct fb_info *info,
>>>   const struct linux_logo *logo)
>>> {
>>> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
>>> index d043ada..f3b8856 100644
>>> --- a/include/linux/kernel.h
>>> +++ b/include/linux/kernel.h
>>> @@ -841,6 +841,10 @@ static inline void ftrace_dump(enum
>>> ftrace_dump_mode oops_dump_mode) { }
>>>  */
>>> #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
>>>
>>> +static inline unsigned safe_shift(unsigned d, int n)
>>> +{
>>> +   return n < 0 ? d >> -n : d << n;
>>> +}
>>>
>>> Thansk
>>> zhongjiang
>>>
 Thanks,

Ingo

 .

>> What makes it reasonable?  It is totally ill-defined and doesn't do
>anything useful now?
> Thanks you for comments.
> 
>Maybe I mismake the meaning. I test the negative cases in x86 , all
>case is zero. so I come to a conclusion.
> 
>zj.c:15:8: warning: left shift count is negative
>[-Wshift-count-negative]
>  j = 1 << -2048;
>^
>[root@localhost zhongjiang]# ./zj
>j = 0
>j.c:15:8: warning: left shift count is negative
>[-Wshift-count-negative]
>  j = 1 << -2047;
>^
>[root@localhost zhongjiang]# ./zj
>j = 0
>
>I insmod a module into kernel to test the testcasts, all of the result
>is zero.
>
>I wonder whether I miss some point or not. Do you point out to me?
>please
>
>Thanks
>zhongjiang
> 
> 

When you use compile-time constants, the compiler generates the value at 
compile time, which can be totally different.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

Re: [PATCH v3 2/3] dt-bindings: input: Add R_LRADC support for A83T

2017-06-28 Thread Ziping Chen

2017-06-28 1:31 GMT+08:00 Maxime Ripard :
> On Tue, Jun 27, 2017 at 11:18:17PM +0800, Ziping Chen wrote:
>> 2017-06-27 1:15 GMT+08:00 Maxime Ripard :
>> > Hi,
>> >
>> > On Sat, Jun 24, 2017 at 10:45:14AM +0800, Ziping Chen wrote:
>> >> From: Ziping Chen 
>> >>
>> >> Allwinner A83T SoC has a low res adc like the one
>> >> in Allwinner A10 SoC.
>> >>
>> >> Add binding for it.
>> >>
>> >> Signed-off-by: Ziping Chen 
>> >> Acked-by: Rob Herring 
>> >> ---
>> >>  Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt | 6 --
>> >>  1 file changed, 4 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt 
>> >> b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
>> >> index 4357e498ef04..525d85e3043f 100644
>> >> --- a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
>> >> +++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
>> >> @@ -2,12 +2,14 @@ Allwinner sun4i low res adc attached tablet keys
>> >>  
>> >>
>> >>  Required properties:
>> >> - - compatible: "allwinner,sun4i-a10-lradc-keys"
>> >> + - compatible: should be one of the following string:
>> >> + "allwinner,sun4i-a10-lradc-keys"
>> >> + "allwinner,sun8i-a83t-r-lradc-keys"
>> >
>> > This doesn't really have anything related to keys, and can be used
>> > purely as an ADC.
>> >
>> > I know this is the compatible that was used for the A10, but I'd
>> > rather drop the keys for the the new compatible.
>> >
>>
>> LRADC is named KEYADC in some other SoCs' User Manual(such as R40 User
>> Manual V1.0), thus LRADC is related to keys.
>
> It's still called LRADC on the A83T, so we'll use that name. If we
> ever need another compatible for the R40, then yes, keyadc would make
> sense in that case.

Hi,

So... whether to use "sun4i-lradc" to replace "sun4i-lradc-keys"?

I think it should be separated, one is the input subsystem, the other
is the iio subsystem.

>
> Maxime
>
> --
> Maxime Ripard, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com

Re: [PATCH] fs: ext4: inode->i_generation not assigned 0.

2017-06-28 Thread William Koh

On 6/28/17, 7:32 PM, "Andreas Dilger"  wrote:

On Jun 28, 2017, at 4:06 PM, Kyungchan Koh  wrote:
> 
> In fs/ext4/super.c, the function ext4_nfs_get_inode takes as input
> "generation" that can be used to specify the generation of the inode to
> be returned. When 0 is given as input, then inodes of any generation can
> be returned. Therefore, generation 0 is a special case that should be
> avoided when assigning generation to inodes.

I'd agree with this change to avoid assigning generation == 0 to real 
inodes.

Also, the separate question arises about whether we need to allow file 
handle
lookup with generation == 0?  That allows FID guessing easily, while 
requiring
a non-zero generation makes that a lot harder.

What are the cases where generation == 0 are used?

Honestly, I’m not too sure. I just noticed that generation 0 was a special
case from reading the code.

> A new inline function, ext4_inode_set_gen, will take care of the
> problem.  Now, inodes cannot have a generation of 0, so this patch fixes
> the issue.
> 
> Signed-off-by: Kyungchan Koh 
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3219154..74c6677 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1549,6 +1549,14 @@ static inline int ext4_valid_inum(struct 
super_block *sb, unsigned long ino)
>ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
> }
> 
> +static inline void ext4_inode_set_gen(struct inode *inode,
> +   struct ext4_sb_info *sbi)
> +{
> + inode->i_generation = sbi->s_next_generation++;
> + if (!inode->i_generation)

This should be marked "unlikely()" since it happens at most once every 4B
file creations (though likely even less since it is unlikely that so many
files will be created in a single mount).

Got it.

> + inode->i_generation = sbi->s_next_generation++;
> +}
> +
> 
> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 98ac2f1..d33f6f0 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -1072,7 +1072,7 @@ struct inode *__ext4_new_inode(handle_t *handle, 
struct inode   }
>   spin_lock(>s_next_gen_lock);
> - inode->i_generation = sbi->s_next_generation++;
> + ext4_inode_set_gen(inode, sbi);
>   spin_unlock(>s_next_gen_lock);
> 
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 0c21e22..d52a467 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -160,8 +160,8 @@ static long swap_inode_boot_loader(struct super_block 
*sb,
> 
>   spin_lock(>s_next_gen_lock);
> - inode->i_generation = sbi->s_next_generation++;
> - inode_bl->i_generation = sbi->s_next_generation++;
> + ext4_inode_set_gen(inode, sbi);
> + ext4_inode_set_gen(inode_bl, sbi);
>   spin_unlock(>s_next_gen_lock);
> 

Cheers, Andreas

This is applicable to many fs, including ext2, ext4, exofs, jfs, and f2fs.
Therefore, a shared helper in linux/fs.h will allow for easy changes
in all fs. Is there any reason that might be a bad idea?

Best,
Kyungchan Koh

Re: [PATCH v6 5/6] arm64: dts: Add ipq8074 SoC and HK01 board support

2017-06-28 Thread Bjorn Andersson

On Fri 09 Jun 02:41 PDT 2017, Varadarajan Narayanan wrote:
> diff --git a/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts 
> b/arch/arm64/boot/dts/qcom/ipq8074-hk01.dts
[..]
> +
> + chosen {
> + bootargs = "root=/dev/ram0 rw init=/init";

As far as I know you can omit both root= and init= from this.

> + stdout-path = "serial0";
> + };
> +

Apart from that,

Acked-by: Bjorn Andersson 

Regards,
Bjorn

Re: [linux-sunxi] Re: [PATCH v3 2/3] dt-bindings: input: Add R_LRADC support for A83T

2017-06-28 Thread Ziping Chen

2017-06-28 1:36 GMT+08:00 Maxime Ripard :
> Hi,
>
> On Tue, Jun 27, 2017 at 11:29:10PM +0800, icen...@aosc.io wrote:
>> Maxime, here's another problem: if we have already a GP LRADC driver,
>> how can we tell the kernel to use it as IIO ADC rather than keys?
>
> The GPADC IIO driver is not for the LRADC driver, but the GPADC /
> temperature sensor.
>
> We used to have an LRADC IIO driver in the CHIP BSP written by Alex
> (in CC):
> https://github.com/NextThingCo/CHIP-linux/commit/8675b761c54be73dc7cc0113209f02e10cc63a27
>
> But he never mainlined it.
>
>> Should we introduce a new property for this once ready?
>
> We need to keep the current binding. We can just check for the
> presence or not of child nodes to see if it has some keys, and we'd
> need an IIO-to-input driver that is yet to be written.
>

Yes, then we need an iio-to-input driver...
So...whether the driver(a83t lradc keys) can be applied now,
or we should wait for the iio-to-input driver.

Thanks
Ziping

> Maxime
>
> --
> Maxime Ripard, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com

linux-next: manual merge of the usb tree with the uuid tree

2017-06-28 Thread Stephen Rothwell

Hi Greg,

Today's linux-next merge of the usb tree got a conflict in:

  drivers/usb/misc/ucsi.c

between commit:

  94116f8126de ("ACPI: Switch to use generic guid_t in acpi_evaluate_dsm()")

from the uuid tree and commit:

  8243edf44152 ("usb: typec: ucsi: Add ACPI driver")

from the usb tree.

I fixed it up (the latter deleted the file, so I did that - I think the
new code is ok with then new guid handling) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

Almost right :-(

I got the following error:

drivers/usb/typec/ucsi/ucsi_acpi.c: In function 'ucsi_acpi_dsm':
drivers/usb/typec/ucsi/ucsi_acpi.c:33:48: error: passing argument 2 of 
'acpi_evaluate_dsm' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
  obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), ua->uuid.b, 1, func,
^
In file included from include/linux/acpi.h:44:0,
 from drivers/usb/typec/ucsi/ucsi_acpi.c:14:
include/acpi/acpi_bus.h:65:20: note: expected 'const guid_t * {aka const struct 
 *}' but argument is of type '__u8 * {aka unsigned char *}'
 union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,  
^

I have applied the following merge fix patch (the first hunk is
probably not strictly necessary):

From: Stephen Rothwell 
Date: Thu, 29 Jun 2017 14:36:10 +1000
Subject: [PATCH] usb: typec: fix for "ACPI: Switch to use generic guid_t in
 acpi_evaluate_dsm()"

Signed-off-by: Stephen Rothwell 
---
 drivers/usb/typec/ucsi/ucsi_acpi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c 
b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 3fb2e48e1c91..7b7c9373a9b6 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -23,14 +23,14 @@ struct ucsi_acpi {
struct device *dev;
struct ucsi *ucsi;
struct ucsi_ppm ppm;
-   uuid_le uuid;
+   guid_t uuid;
 };
 
 static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
 {
union acpi_object *obj;
 
-   obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), ua->uuid.b, 1, func,
+   obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), >uuid, 1, func,
NULL);
if (!obj) {
dev_err(ua->dev, "%s: failed to evaluate _DSM %d\n",
-- 
2.11.0

-- 
Cheers,
Stephen Rothwell

Re: [kernel-sched-cputime] question about probable bug in cputime_adjust()

2017-06-28 Thread Frans Klaver



On 29 June 2017 01:57:19 CEST, "Gustavo A. R. Silva"  
wrote:
 --- a/kernel/sched/cputime.c
 +++ b/kernel/sched/cputime.c
 @@ -637,9 +637,10 @@ static void cputime_adjust(struct task_cputime
>*curr,
  *= (rtime_i+1 - rtime_i) + utime_i
  *>= utime_i
  */
 -   if (stime < prev->stime)
 +   if (stime < prev->stime) {
 stime = prev->stime;
 -   utime = rtime - stime;
 +   utime = rtime - stime;
 +   }


 If you confirm this, I will send a patch in a full and proper form.

 I'd really appreciate your comments.
>>>
>>> If you do that, how would you meet the guarantee made in line 583?
>>>
>
>You are right, I see now.
>
>Then in this case the following patch would be the way to go:
>
>--- a/kernel/sched/cputime.c
>+++ b/kernel/sched/cputime.c
>@@ -615,10 +615,8 @@ static void cputime_adjust(struct task_cputime
>*curr,
>   * userspace. Once a task gets some ticks, the monotonicy code at
>  * 'update' will ensure things converge to the observed ratio.
>  */
>-   if (stime == 0) {
>-   utime = rtime;
>+   if (stime == 0)
> goto update;
>-   }
>
> if (utime == 0) {
> stime = rtime;
>
>
>but I think this one is even better:
>
>
>--- a/kernel/sched/cputime.c
>+++ b/kernel/sched/cputime.c
>@@ -615,19 +615,11 @@ static void cputime_adjust(struct task_cputime
>*curr,
>   * userspace. Once a task gets some ticks, the monotonicy code at
>  * 'update' will ensure things converge to the observed ratio.
>  */
>-   if (stime == 0) {
>-   utime = rtime;
>-   goto update;
>-   }
>-
>-   if (utime == 0) {
>+   if (stime != 0 && utime == 0)
> stime = rtime;
>-   goto update;
>-   }
>-
>-   stime = scale_stime(stime, rtime, stime + utime);
>+   else
>+   stime = scale_stime(stime, rtime, stime + utime);

I don't think it is better. The stime == 0 case is gone now. So scale_time() 
will be called in that case. This whole if/else block should only be executed 
if stime != 0.

[PATCH] staging: lustre: lnet: remove dead code

2017-06-28 Thread Dmitriy Cherkasov

Remove code which was permanently disabled with ifdefs.

This also resolves the following checkpatch warning which was
triggered by the dead code:

WARNING: space prohibited before semicolon

Signed-off-by: Dmitriy Cherkasov 
---
 drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h 
b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
index 5540de6..7c487fa 100644
--- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
+++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h
@@ -521,13 +521,7 @@ struct ksock_proto {
 
 static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t 
len)
 {
-#if 1
return crc32_le(crc, p, len);
-#else
-   while (len-- > 0)
-   crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ;
-   return crc;
-#endif
 }
 
 static inline int
-- 
1.9.1

Re: [PATCH] fs: ext4: inode->i_generation not assigned 0.

2017-06-28 Thread Darrick J. Wong

[add linux-xfs to cc]

On Thu, Jun 29, 2017 at 04:37:14AM +, William Koh wrote:
> On 6/28/17, 7:32 PM, "Andreas Dilger"  wrote:
> 
> On Jun 28, 2017, at 4:06 PM, Kyungchan Koh  wrote:
> > 
> > In fs/ext4/super.c, the function ext4_nfs_get_inode takes as input
> > "generation" that can be used to specify the generation of the inode to
> > be returned. When 0 is given as input, then inodes of any generation can
> > be returned. Therefore, generation 0 is a special case that should be
> > avoided when assigning generation to inodes.
> 
> I'd agree with this change to avoid assigning generation == 0 to real 
> inodes.
> 
> Also, the separate question arises about whether we need to allow file 
> handle
> lookup with generation == 0?  That allows FID guessing easily, while 
> requiring
> a non-zero generation makes that a lot harder.
> 
> What are the cases where generation == 0 are used?
> 
> Honestly, I’m not too sure. I just noticed that generation 0 was a special
> case from reading the code.
> 
> > A new inline function, ext4_inode_set_gen, will take care of the
> > problem.  Now, inodes cannot have a generation of 0, so this patch fixes
> > the issue.
> > 
> > Signed-off-by: Kyungchan Koh 
> > 
> > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> > index 3219154..74c6677 100644
> > --- a/fs/ext4/ext4.h
> > +++ b/fs/ext4/ext4.h
> > @@ -1549,6 +1549,14 @@ static inline int ext4_valid_inum(struct 
> super_block *sb, unsigned long ino)
> >  ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
> > }
> > 
> > +static inline void ext4_inode_set_gen(struct inode *inode,
> > + struct ext4_sb_info *sbi)
> > +{
> > +   inode->i_generation = sbi->s_next_generation++;
> > +   if (!inode->i_generation)
> 
> This should be marked "unlikely()" since it happens at most once every 4B
> file creations (though likely even less since it is unlikely that so many
> files will be created in a single mount).
> 
> Got it.
> 
> > +   inode->i_generation = sbi->s_next_generation++;
> > +}
> > +
> > 
> > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> > index 98ac2f1..d33f6f0 100644
> > --- a/fs/ext4/ialloc.c
> > +++ b/fs/ext4/ialloc.c
> > @@ -1072,7 +1072,7 @@ struct inode *__ext4_new_inode(handle_t *handle, 
> struct inode }
> > spin_lock(>s_next_gen_lock);
> > -   inode->i_generation = sbi->s_next_generation++;
> > +   ext4_inode_set_gen(inode, sbi);
> > spin_unlock(>s_next_gen_lock);
> > 
> > diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> > index 0c21e22..d52a467 100644
> > --- a/fs/ext4/ioctl.c
> > +++ b/fs/ext4/ioctl.c
> > @@ -160,8 +160,8 @@ static long swap_inode_boot_loader(struct 
> super_block *sb,
> > 
> > spin_lock(>s_next_gen_lock);
> > -   inode->i_generation = sbi->s_next_generation++;
> > -   inode_bl->i_generation = sbi->s_next_generation++;
> > +   ext4_inode_set_gen(inode, sbi);
> > +   ext4_inode_set_gen(inode_bl, sbi);
> > spin_unlock(>s_next_gen_lock);
> > 
> 
> 
> Cheers, Andreas
> 
> This is applicable to many fs, including ext2, ext4, exofs, jfs, and f2fs.
> Therefore, a shared helper in linux/fs.h will allow for easy changes
> in all fs. Is there any reason that might be a bad idea?

AFAICT, i_generation == 0 in XFS and btrfs is just as valid as any other
number.  There is no special casing of zero in either filesystem.

So now, my curiosity intrigued, I surveyed all the Linux filesystems
that can export to NFS.  I see that there are actually quite a few fs
(ext[2-4], exofs, efs, fat, jfs, f2fs, isofs, nilfs2, reiserfs, udf,
ufs) that treat zero as a special value meaning "ignore generation
check"; others (xfs, btrfs, fuse, ntfs, ocfs2) that don't consider zero
special and always require a match; and still others (affs, befs, ceph,
gfs2, jffs2, squashfs) that don't check at all.

That to mean strongly suggests that more research is necessary to figure
out why some of the filesystems that support i_generation reserve zero
as a special value to disable generation checks and why others always
require an exact match.  Until we can recapture why things are they way
they are, it doesn't make much sense to have a helper that only applies
to half the filesystems.

Granted, the contents of a file handle are generally left up to the
individual filesystem, and the behaviors are very different, so I also
don't see that much value in hoisting i_generation updates to the VFS
level.

I guess it wouldn't really matter if XFS stopped writing i_generation =
0 onto disk, but I'm too curious about this odd difference in behavior
to let it go just yet. :)

--D

> 
> Best,
> Kyungchan Koh 
> 
> 
> 
>

Re: [ANNOUNCE] v4.11.7-rt3

2017-06-28 Thread Mike Galbraith

On Thu, 2017-06-29 at 04:55 +0200, Mike Galbraith wrote:
> 
> cpus_allowed became cpus_mask.  Anything (crash.. hohum, yet again)
> that rummages around in the kernels gizzard will have to adapt.

(wrt crash: nope, it doesn't care for a change)

Re: [PATCH 6/8] drm: Allow DSI devices to be registered before the host registers.

2017-06-28 Thread Archit Taneja




On 06/28/2017 01:28 AM, Eric Anholt wrote:

When a mipi_dsi_host is registered, the DT is walked to find any child
nodes with compatible strings.  Those get registered as DSI devices,
and most DSI panel drivers are mipi_dsi_drivers that attach to those nodes.

There is one special case currently, the adv7533 bridge, where the
bridge probes on I2C, and during the bridge attach step it looks up
the mipi_dsi_host and registers the mipi_dsi_device (for its own stub
mipi_dsi_driver).

For the Raspberry Pi panel, though, we also need to attach on I2C (our
control bus), but don't have a bridge driver.  The lack of a bridge's
attach() step like adv7533 uses means that we aren't able to delay the
mipi_dsi_device creation until the mipi_dsi_host is present.

To fix this, we extend mipi_dsi_device_register_full() to allow being
called with a NULL host, which puts the device on a queue waiting for
a host to appear.  When a new host is registered, we fill in the host
value and finish the device creation process.


This is quite a nice idea. The only bothering thing is the info.of_node usage
varies between child nodes (mipi_dsi_devs) and non-child nodes (i2c control
bus).

For DSI children expressed in DT, the of_node in info holds the DT node
corresponding to the DSI child itself. For non-DT ones, this patch assumes
that info.of_node stores the DSI host DT node. I think it should be okay as
long as we mention the usage in a comment somewhere. The other option is to
have a new info.host_node field to keep a track of the host DT node.

Thanks,
Archit



Signed-off-by: Eric Anholt 
---
  drivers/gpu/drm/drm_mipi_dsi.c | 49 --
  include/drm/drm_mipi_dsi.h |  3 +++
  2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 1160a579e0dc..9cdd68a7dc0d 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -45,6 +45,13 @@
   * subset of the MIPI DCS command set.
   */
  
+static DEFINE_MUTEX(host_lock);

+static LIST_HEAD(host_list);
+/* List of struct mipi_dsi_device which were registered while no host
+ * was available.
+ */
+static LIST_HEAD(unattached_device_list);
+
  static int mipi_dsi_device_match(struct device *dev, struct device_driver 
*drv)
  {
struct mipi_dsi_device *dsi = to_mipi_dsi_device(dev);
@@ -138,10 +145,12 @@ static struct mipi_dsi_device 
*mipi_dsi_device_alloc(struct mipi_dsi_host *host)
  
  	dsi->host = host;

dsi->dev.bus = _dsi_bus_type;
-   dsi->dev.parent = host->dev;
dsi->dev.type = _dsi_device_type;
  
-	device_initialize(>dev);

+   if (dsi->host) {
+   dsi->dev.parent = host->dev;
+   device_initialize(>dev);
+   }
  
  	return dsi;

  }
@@ -206,7 +215,7 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
  const struct mipi_dsi_device_info *info)
  {
struct mipi_dsi_device *dsi;
-   struct device *dev = host->dev;
+   struct device *dev = host ? host->dev : NULL;
int ret;
  
  	if (!info) {

@@ -230,11 +239,17 @@ mipi_dsi_device_register_full(struct mipi_dsi_host *host,
dsi->channel = info->channel;
strlcpy(dsi->name, info->type, sizeof(dsi->name));
  
-	ret = mipi_dsi_device_add(dsi);

-   if (ret) {
-   dev_err(dev, "failed to add DSI device %d\n", ret);
-   kfree(dsi);
-   return ERR_PTR(ret);
+   if (!dsi->host) {
+   mutex_lock(_lock);
+   list_add(>list, _device_list);
+   mutex_unlock(_lock);
+   } else {
+   ret = mipi_dsi_device_add(dsi);
+   if (ret) {
+   dev_err(dev, "failed to add DSI device %d\n", ret);
+   kfree(dsi);
+   return ERR_PTR(ret);
+   }
}
  
  	return dsi;

@@ -251,9 +266,6 @@ void mipi_dsi_device_unregister(struct mipi_dsi_device *dsi)
  }
  EXPORT_SYMBOL(mipi_dsi_device_unregister);
  
-static DEFINE_MUTEX(host_lock);

-static LIST_HEAD(host_list);
-
  /**
   * of_find_mipi_dsi_host_by_node() - find the MIPI DSI host matching a
   * device tree node
@@ -285,6 +297,7 @@ EXPORT_SYMBOL(of_find_mipi_dsi_host_by_node);
  int mipi_dsi_host_register(struct mipi_dsi_host *host)
  {
struct device_node *node;
+   struct mipi_dsi_device *dsi, *temp;
  
  	for_each_available_child_of_node(host->dev->of_node, node) {

/* skip nodes without reg property */
@@ -295,6 +308,20 @@ int mipi_dsi_host_register(struct mipi_dsi_host *host)
  
  	mutex_lock(_lock);

list_add_tail(>list, _list);
+
+   /* If any DSI devices were registered under our OF node, then
+* connect our host to it and probe them now.
+*/
+   list_for_each_entry_safe(dsi, temp, _device_list, list) {
+   if (of_get_parent(dsi->dev.of_node) ==

Re: [PATCH 1/2] drm/msm: gpu: don't abuse dma_alloc for non-DMA allocations

2017-06-28 Thread Bjorn Andersson

On Tue 20 Jun 13:16 PDT 2017, Arnd Bergmann wrote:

> In zap_shader_load_mdt(), we pass a pointer to a phys_addr_t
> into dmam_alloc_coherent, which the compiler warns about:
> 
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c: In function 'zap_shader_load_mdt':
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c:54:50: error: passing argument 3 of 
> 'dmam_alloc_coherent' from incompatible pointer type 
> [-Werror=incompatible-pointer-types]
> 
> The returned DMA address is later passed on to a function that
> takes a phys_addr_t, so it's clearly wrong to use the DMA
> mapping interface here: the memory may be uncached, or the
> address may be completely wrong if there is an IOMMU connected
> to the device.
> 
> My interpretation is that using dmam_alloc_coherent() had two
> purposes:
> 
>  a) get a chunk of consecutive memory that may be larger than
> the limit for kmalloc()
> 
>  b) use the devres infrastructure to simplify the unwinding
> in the error case.
> 
> I think ideally we'd use a devres-based version of
> alloc_pages_exact() here, but since that doesn't exist,
> let's use devm_get_free_pages() instead. This wastes a little
> memory as the size gets rounded up to a power of two, but
> is otherwise harmless. If we want to save memory here, calling
> devm_free_pages() to release the memory once it is no longer
> needed is probably better anyway.
> 
> Fixes: 7c65817e6d38 ("drm/msm: gpu: Enable zap shader for A5XX")
> Signed-off-by: Arnd Bergmann 

Acked-by: Bjorn Andersson 

Regards,
Bjorn

> ---
>  drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c 
> b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index b4b54f1c24bc..eee9ac81aaa1 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -51,11 +51,13 @@ static int zap_shader_load_mdt(struct device *dev, const 
> char *fwname)
>   }
>  
>   /* Allocate memory for the firmware image */
> - mem_region = dmam_alloc_coherent(dev, mem_size, _phys, GFP_KERNEL);
> + mem_region = (void *)devm_get_free_pages(dev, GFP_KERNEL,
> + get_order(mem_size));
>   if (!mem_region) {
>   ret = -ENOMEM;
>   goto out;
>   }
> + mem_phys = virt_to_phys(mem_region);
>  
>   /* Load the rest of the MDT */
>   ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, mem_region, mem_phys,
> -- 
> 2.9.0
>

Re: linux-next: build warnings after merge of the scsi-mkp tree

2017-06-28 Thread Stephen Rothwell

Hi James,

This has now migrated to the scsi tree.

On Wed, 28 Jun 2017 15:55:10 +1000 Stephen Rothwell  
wrote:
>
> After merging the scsi-mkp tree, today's linux-next build
> (powerpc_ppc64_defconfig) produced these warnings:
> 
> In file included from include/linux/byteorder/big_endian.h:4:0,
>  from arch/powerpc/include/uapi/asm/byteorder.h:13,
>  from include/asm-generic/bitops/le.h:5,
>  from arch/powerpc/include/asm/bitops.h:246,
>  from include/linux/bitops.h:36,
>  from include/linux/kernel.h:10,
>  from include/asm-generic/bug.h:15,
>  from arch/powerpc/include/asm/bug.h:127,
>  from include/linux/bug.h:4,
>  from arch/powerpc/include/asm/mmu.h:125,
>  from arch/powerpc/include/asm/lppaca.h:36,
>  from arch/powerpc/include/asm/paca.h:21,
>  from arch/powerpc/include/asm/current.h:16,
>  from include/linux/sched.h:11,
>  from include/linux/blkdev.h:4,
>  from include/linux/blk-mq.h:4,
>  from drivers/scsi/qla2xxx/qla_nvme.h:10,
>  from drivers/scsi/qla2xxx/qla_nvme.c:7:
> drivers/scsi/qla2xxx/qla_nvme.c: In function 'qla2x00_start_nvme_mq':
> include/uapi/linux/byteorder/big_endian.h:32:26: warning: large integer 
> implicitly truncated to unsigned type [-Woverflow]
>  #define __cpu_to_le32(x) ((__force __le32)__swab32((x)))
>   ^
> include/linux/byteorder/generic.h:87:21: note: in expansion of macro 
> '__cpu_to_le32'
>  #define cpu_to_le32 __cpu_to_le32
>  ^
> drivers/scsi/qla2xxx/qla_nvme.c:444:27: note: in expansion of macro 
> 'cpu_to_le32'
> cont_pkt->entry_type = cpu_to_le32(CONTINUE_A64_TYPE);
>^
> drivers/scsi/qla2xxx/qla_nvme.c: At top level:
> drivers/scsi/qla2xxx/qla_nvme.c:667:13: warning: 
> 'qla_nvme_unregister_remote_port' defined but not used [-Wunused-function]
>  static void qla_nvme_unregister_remote_port(struct work_struct *work)
>  ^
> drivers/scsi/qla2xxx/qla_nvme.c:604:12: warning: 'qla_nvme_wait_on_rport_del' 
> defined but not used [-Wunused-function]
>  static int qla_nvme_wait_on_rport_del(fc_port_t *fcport)
> ^
> drivers/scsi/qla2xxx/qla_nvme.c:634:13: warning: 'qla_nvme_abort_all' defined 
> but not used [-Wunused-function]
>  static void qla_nvme_abort_all(fc_port_t *fcport)
>  ^
> 
> Introduced by commit
> 
>   e84067d74301 ("scsi: qla2xxx: Add FC-NVMe F/W initialization and transport 
> registration")

-- 
Cheers,
Stephen Rothwell

[PATCH] ASoC: rsnd: constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
   8172 920   090922384 sound/soc/sh/rcar/core.o

File size After adding 'const':
   textdata bss dec hex filename
   8364 728   090922384 sound/soc/sh/rcar/core.o

Signed-off-by: Arvind Yadav 
---
 sound/soc/sh/rcar/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c
index 1744015..e327436 100644
--- a/sound/soc/sh/rcar/core.c
+++ b/sound/soc/sh/rcar/core.c
@@ -1281,7 +1281,7 @@ static int rsnd_resume(struct device *dev)
return 0;
 }
 
-static struct dev_pm_ops rsnd_pm_ops = {
+static const struct dev_pm_ops rsnd_pm_ops = {
.suspend= rsnd_suspend,
.resume = rsnd_resume,
 };
-- 
1.9.1

Re: [PATCH] thermal/intel_powerclamp: add const to thermal_cooling_device_ops structure

2017-06-28 Thread Bhumika Goyal

On Thu, Jun 29, 2017 at 8:30 AM, Zhang Rui  wrote:
> On Wed, 2017-06-21 at 12:39 +0530, Bhumika Goyal wrote:
>> Declare thermal_cooling_device_ops structure as const as it is only
>> passed
>> as an argument to the function thermal_cooling_device_register and
>> this
>> argument is of type const. So, declare the structure as const.
>>
> I checked the source and the code is written in this way as well in
> some other drivers, why not fix them altogether?
>

Hey,

I have already sent the patches for the drivers requiring this change.
Some of them got applied as well.

Thanks,
Bhumika

> thanks,
> rui
>> Signed-off-by: Bhumika Goyal 
>> ---
>>  drivers/thermal/intel_powerclamp.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/thermal/intel_powerclamp.c
>> b/drivers/thermal/intel_powerclamp.c
>> index d718cd1..e4c68b7 100644
>> --- a/drivers/thermal/intel_powerclamp.c
>> +++ b/drivers/thermal/intel_powerclamp.c
>> @@ -659,7 +659,7 @@ static int powerclamp_set_cur_state(struct
>> thermal_cooling_device *cdev,
>>  }
>>
>>  /* bind to generic thermal layer as cooling device*/
>> -static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
>> +static const struct thermal_cooling_device_ops
>> powerclamp_cooling_ops = {
>>   .get_max_state = powerclamp_get_max_state,
>>   .get_cur_state = powerclamp_get_cur_state,
>>   .set_cur_state = powerclamp_set_cur_state,

Re: [PATCH 2/2] drm/msm: gpu: call qcom_mdt interfaces only for ARCH_QCOM

2017-06-28 Thread Bjorn Andersson

On Tue 20 Jun 13:16 PDT 2017, Arnd Bergmann wrote:

> When compile-testing for something other than ARCH_QCOM,
> we run into a link error:
> 
> drivers/gpu/drm/msm/adreno/a5xx_gpu.o: In function `a5xx_hw_init':
> a5xx_gpu.c:(.text.a5xx_hw_init+0x600): undefined reference to 
> `qcom_mdt_get_size'
> a5xx_gpu.c:(.text.a5xx_hw_init+0x93c): undefined reference to `qcom_mdt_load'
> 
> There is already an #ifdef that tries to check for CONFIG_QCOM_MDT_LOADER,
> but that symbol is only meaningful when building for ARCH_QCOM.
> 
> This adds a compile-time check for ARCH_QCOM, and clarifies the
> Kconfig select statement so we don't even try it for other targets.
> 
> The check for CONFIG_QCOM_MDT_LOADER can then go away, which also
> improves compile-time coverage and makes the code a little nicer
> to read.

Acked-by: Bjorn Andersson 

Regards,
Bjorn

Re: [PATCH] acpi: thermal: honor "mode" sysfs file setting

2017-06-28 Thread Zhang Rui

On Thu, 2017-06-29 at 00:14 +0200, Rafael J. Wysocki wrote:
> On Thursday, June 22, 2017 02:45:42 PM Enric Balletbo i Serra wrote:
> > 
> > From: Sameer Nanda 
> > 
> > Under each thermal zone there is a file called "mode". Writing
> > enabled
> > or disabled to this file allows a given thermal zone to be enabled
> > or
> > disabled. Honor writes to this file by enabling or disabling the
> > polling timers.
> > 
> > With this change, in the acpi_thermal_add path,
> > acpi_thermal_get_info
> > gets called before acpi_thermal_register_thermal_zone. Since
> > tz_enabled
> > was getting set to 1 only in acpi_thermal_register_thermal_zone,
> > acpi_thermal_get_info ended up disabling thermal polling so moved
> > the
> > setting of tz_enabled to 1 into acpi_thermal_add itself.
> > 
> > After this patch echoing enabled|disabled to "mode" sysfs will
> > start/stop
> > the polling of the temperature.
> > 
I see, so there are three logics to decide the polling frequency

1. returned by _TZP, according to ACPI spec
2. overridden by thermal.tzp module parameter
3. cleared when the thermal zone is disabled and restored when the
thermal zone is re-enabled (missing in current code)

what we are doing in this patch is to introduce the third logic, and
move all the code of 1, 2 and 3 into
acpi_thermal_get_polling_frequency().
To align with this change, tz->tz_enable=1 is moved earlier, before
acpi_thermal_get_polling_frequency() being invoked in
acpi_thermal_add().
right?

> > Signed-off-by: Sameer Nanda 
> > Signed-off-by: Enric Balletbo i Serra  > >
> Rui, Srinivas, can you please have a look at this one and let me know
> what you
> think?
> 
> > 
> > ---
> >  drivers/acpi/thermal.c | 28 +---
> >  1 file changed, 21 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
> > index 1d0417b..68ad9fe 100644
> > --- a/drivers/acpi/thermal.c
> > +++ b/drivers/acpi/thermal.c
> > @@ -223,6 +223,17 @@ static int
> > acpi_thermal_get_polling_frequency(struct acpi_thermal *tz)
> >     if (!tz)
> >     return -EINVAL;
> >  
> > +   if (tz->tz_enabled == THERMAL_DEVICE_DISABLED) {
> > +   tz->polling_frequency = 0;
> > +   return 0;
> > +   }
> > +
> > +   /* Get default polling frequency [_TZP] (optional) */
> > +   if (tzp) {
> > +   tz->polling_frequency = tzp;
> > +   return 0;
> > +   }
> > +
> >     status = acpi_evaluate_integer(tz->device->handle, "_TZP",
> > NULL, );
> >     if (ACPI_FAILURE(status))
> >     return -ENODEV;
> > @@ -582,6 +593,14 @@ static int thermal_set_mode(struct
> > thermal_zone_device *thermal,
> >     ACPI_DEBUG_PRINT((ACPI_DB_INFO,
> >     "%s kernel ACPI thermal control\n",
> >     tz->tz_enabled ? "Enable" : "Disable"));
> > +
> > +   acpi_thermal_get_polling_frequency(tz);
> > +
> > +   mutex_lock(>thermal_zone->lock);
> > +   tz->thermal_zone->polling_delay = tz-
> > >polling_frequency * 100;
> > +   tz->thermal_zone->passive_delay = tz-
> > >polling_frequency * 100;

To me, this policy, "disabling polling when disabling thermal zone,
enabling polling after enabling thermal zone" applies to all the
thermal zones, thus it's better to be implemented in thermal subsystem,
rather than ACPI thermal driver.

BTW, two other comments about the code itself,
1. I don't like the way how tz->polling_delay/tz->passive_delay are
changed here. If we have to do this, better using a thermal API, and we
should invoke monitor_thermal_zone() right after updating these two
fields.
2. passive_delay is set to wrong value here.

thanks,
rui
> > +   mutex_unlock(>thermal_zone->lock);
> > +
> >     acpi_thermal_check(tz);
> >     }
> >     return 0;
> > @@ -930,8 +949,6 @@ static int
> > acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
> >     if (ACPI_FAILURE(status))
> >     return -ENODEV;
> >  
> > -   tz->tz_enabled = 1;
> > -
> >     dev_info(>device->dev, "registered as
> > thermal_zone%d\n",
> >      tz->thermal_zone->id);
> >     return 0;
> > @@ -1039,11 +1056,7 @@ static int acpi_thermal_get_info(struct
> > acpi_thermal *tz)
> >     if (!result)
> >     tz->flags.cooling_mode = 1;
> >  
> > -   /* Get default polling frequency [_TZP] (optional) */
> > -   if (tzp)
> > -   tz->polling_frequency = tzp;
> > -   else
> > -   acpi_thermal_get_polling_frequency(tz);
> > +   acpi_thermal_get_polling_frequency(tz);
> >  
> >     return 0;
> >  }
> > @@ -1088,6 +1101,7 @@ static int acpi_thermal_add(struct
> > acpi_device *device)
> >     return -ENOMEM;
> >  
> >     tz->device = device;
> > +   tz->tz_enabled = 1;
> >     strcpy(tz->name, device->pnp.bus_id);
> >     strcpy(acpi_device_name(device),
> > ACPI_THERMAL_DEVICE_NAME);
> >     strcpy(acpi_device_class(device), ACPI_THERMAL_CLASS);
> > 
> --
> To unsubscribe from this list: send the line

[PATCH v5 0/6] g_NCR5380: PDMA fixes and cleanup

2017-06-28 Thread Finn Thain

Ondrej, would you please test this new series?

Changed since v1:
- PDMA transfer residual is calculated earlier.
- End of DMA flag check is now polled (if there is any residual).

Changed since v2:
- Bail out of transfer loops when Gated IRQ gets asserted.
- Make udelay conditional on board type.
- Drop sg_tablesize patch due to performance regression.

Changed since v3:
- Add Ondrej's workaround for corrupt WRITE commands on DTC boards.
- Reset the 53c400 logic after any short PDMA transfer.
- Don't fail the transfer if the 53c400 logic got a reset.

Changed since v4:
- Bail out of transfer loops when Gated IRQ gets asserted. (Again.)
- Always call wait_for_53c80_registers() at end of transfer.
- Drain chip buffers after PDMA receive is interrupted.
- Rework residual calculation.
- Add new patch to correct DMA terminology.


Finn Thain (2):
  g_NCR5380: Cleanup comments and whitespace
  g_NCR5380: Use unambiguous terminology for PDMA send and receive

Ondrej Zary (4):
  g_NCR5380: Fix PDMA transfer size
  g_NCR5380: End PDMA transfer correctly on target disconnection
  g_NCR5380: Limit PDMA send to 512 B to avoid data corruption on
DTC3181E
  g_NCR5380: Re-work PDMA loops

 drivers/scsi/g_NCR5380.c | 260 +--
 1 file changed, 139 insertions(+), 121 deletions(-)

-- 
2.13.0

[PATCH v5 5/6] g_NCR5380: Re-work PDMA loops

2017-06-28 Thread Finn Thain

From: Ondrej Zary 

The polling loops in pread() and pwrite() can easily become infinite
loops and hang the machine.

On DTC chips, IRQ can arrive late and we miss it because we only check
once. Merge the IRQ check into host buffer wait and add polling limit.

Also place a limit on polling for 53C80 registers accessibility.

[Use NCR5380_poll_politely2() for register polling. Rely on polling for
gated IRQ rather than polling for phase error, like the algorithm in the
datasheet. Calculate residual from block count register instead of the
loop counter. Factor-out common code as wait_for_53c80_access(). -- F.T.]

Signed-off-by: Ondrej Zary 
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 168 +--
 1 file changed, 88 insertions(+), 80 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 5fd227bb1830..f7e50d2bca07 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -482,6 +482,30 @@ static void generic_NCR5380_release_resources(struct 
Scsi_Host *instance)
release_mem_region(base, region_size);
 }
 
+/* wait_for_53c80_access - wait for 53C80 registers to become accessible
+ * @hostdata: scsi host private data
+ *
+ * The registers within the 53C80 logic block are inaccessible until
+ * bit 7 in the 53C400 control status register gets asserted.
+ */
+
+static void wait_for_53c80_access(struct NCR5380_hostdata *hostdata)
+{
+   int count = 1;
+
+   do {
+   if (hostdata->board == BOARD_DTC3181E)
+   udelay(4); /* DTC436 chip hangs without this */
+   if (NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)
+   return;
+   } while (--count > 0);
+
+   scmd_printk(KERN_ERR, hostdata->connected,
+   "53c80 registers not accessible, device will be reset\n");
+   NCR5380_write(hostdata->c400_ctl_status, CSR_RESET);
+   NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
+}
+
 /**
  * generic_NCR5380_pread - pseudo DMA read
  * @hostdata: scsi host private data
@@ -494,18 +518,23 @@ static void generic_NCR5380_release_resources(struct 
Scsi_Host *instance)
 static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 unsigned char *dst, int len)
 {
-   int blocks = len / 128;
+   int residual;
int start = 0;
 
NCR5380_write(hostdata->c400_ctl_status, CSR_BASE | CSR_TRANS_DIR);
-   NCR5380_write(hostdata->c400_blk_cnt, blocks);
-   while (1) {
-   if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
+   NCR5380_write(hostdata->c400_blk_cnt, len / 128);
+
+   while (start < len) {
+   if (NCR5380_poll_politely2(hostdata, hostdata->c400_ctl_status,
+  CSR_HOST_BUF_NOT_RDY, 0,
+  hostdata->c400_ctl_status,
+  CSR_GATED_53C80_IRQ,
+  CSR_GATED_53C80_IRQ, HZ / 64) < 0)
+   break;
+
+   if (NCR5380_read(hostdata->c400_ctl_status) &
+   CSR_HOST_BUF_NOT_RDY)
break;
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
-   goto out_wait;
-   while (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_HOST_BUF_NOT_RDY)
-   ; /* FIXME - no timeout */
 
if (hostdata->io_port && hostdata->io_width == 2)
insw(hostdata->io_port + hostdata->c400_host_buf,
@@ -516,44 +545,30 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
else
memcpy_fromio(dst + start,
hostdata->io + NCR53C400_host_buffer, 128);
-
start += 128;
-   blocks--;
-   }
-
-   if (blocks) {
-   while (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_HOST_BUF_NOT_RDY)
-   ; /* FIXME - no timeout */
 
-   if (hostdata->io_port && hostdata->io_width == 2)
-   insw(hostdata->io_port + hostdata->c400_host_buf,
-   dst + start, 64);
-   else if (hostdata->io_port)
-   insb(hostdata->io_port + hostdata->c400_host_buf,
-   dst + start, 128);
-   else
-   memcpy_fromio(dst + start,
-   hostdata->io + NCR53C400_host_buffer, 128);
-
-   start += 128;
-   blocks--;
+   if (NCR5380_read(hostdata->c400_ctl_status) &
+   CSR_GATED_53C80_IRQ)
+   break;
}
 
-   if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
-

[PATCH v5 1/6] g_NCR5380: Fix PDMA transfer size

2017-06-28 Thread Finn Thain

From: Ondrej Zary 

generic_NCR5380_dma_xfer_len() incorrectly uses cmd->transfersize
which causes rescan-scsi-bus and CD-ROM access to hang the system.
Use cmd->SCp.this_residual instead, like other NCR5380 drivers.

Signed-off-by: Ondrej Zary 
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 67c8dac321ad..14ef4e8c4713 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -76,6 +76,7 @@
 #define IRQ_AUTO 254
 
 #define MAX_CARDS 8
+#define DMA_MAX_SIZE 32768
 
 /* old-style parameters for compatibility */
 static int ncr_irq = -1;
@@ -629,23 +630,16 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
 static int generic_NCR5380_dma_xfer_len(struct NCR5380_hostdata *hostdata,
 struct scsi_cmnd *cmd)
 {
-   int transfersize = cmd->transfersize;
+   int transfersize = cmd->SCp.this_residual;
 
if (hostdata->flags & FLAG_NO_PSEUDO_DMA)
return 0;
 
-   /* Limit transfers to 32K, for xx400 & xx406
-* pseudoDMA that transfers in 128 bytes blocks.
-*/
-   if (transfersize > 32 * 1024 && cmd->SCp.this_residual &&
-   !(cmd->SCp.this_residual % transfersize))
-   transfersize = 32 * 1024;
-
/* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
if (transfersize % 128)
transfersize = 0;
 
-   return transfersize;
+   return min(transfersize, DMA_MAX_SIZE);
 }
 
 /*
-- 
2.13.0

[PATCH v5 3/6] g_NCR5380: Cleanup comments and whitespace

2017-06-28 Thread Finn Thain

Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 61 ++--
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 911a4300ea51..dedaed2d16e4 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -1,17 +1,17 @@
 /*
  * Generic Generic NCR5380 driver
- * 
+ *
  * Copyright 1993, Drew Eckhardt
- * Visionary Computing
- * (Unix and Linux consulting and custom programming)
- * d...@colorado.edu
- *  +1 (303) 440-4894
+ * Visionary Computing
+ * (Unix and Linux consulting and custom programming)
+ * d...@colorado.edu
+ * +1 (303) 440-4894
  *
  * NCR53C400 extensions (c) 1994,1995,1996, Kevin Lentin
- *k.len...@cs.monash.edu.au
+ * k.len...@cs.monash.edu.au
  *
  * NCR53C400A extensions (c) 1996, Ingmar Baumgart
- *ing...@gonzo.schwaben.de
+ * ing...@gonzo.schwaben.de
  *
  * DTC3181E extensions (c) 1997, Ronald van Cuijlenborg
  * ronald.van.cuijlenb...@tip.nl or nu...@dds.nl
@@ -481,15 +481,14 @@ static void generic_NCR5380_release_resources(struct 
Scsi_Host *instance)
 }
 
 /**
- * generic_NCR5380_pread - pseudo DMA read
- * @hostdata: scsi host private data
- * @dst: buffer to read into
- * @len: buffer length
+ * generic_NCR5380_pread - pseudo DMA read
+ * @hostdata: scsi host private data
+ * @dst: buffer to write into
+ * @len: transfer size
  *
- * Perform a pseudo DMA mode read from an NCR53C400 or equivalent
- * controller
+ * Perform a pseudo DMA mode receive from a 53C400 or equivalent device.
  */
- 
+
 static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
 unsigned char *dst, int len)
 {
@@ -508,10 +507,10 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
 
if (hostdata->io_port && hostdata->io_width == 2)
insw(hostdata->io_port + hostdata->c400_host_buf,
-   dst + start, 64);
+dst + start, 64);
else if (hostdata->io_port)
insb(hostdata->io_port + hostdata->c400_host_buf,
-   dst + start, 128);
+dst + start, 128);
else
memcpy_fromio(dst + start,
hostdata->io + NCR53C400_host_buffer, 128);
@@ -558,13 +557,12 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
 }
 
 /**
- * generic_NCR5380_pwrite - pseudo DMA write
- * @hostdata: scsi host private data
- * @dst: buffer to read into
- * @len: buffer length
+ * generic_NCR5380_pwrite - pseudo DMA write
+ * @hostdata: scsi host private data
+ * @src: buffer to read from
+ * @len: transfer size
  *
- * Perform a pseudo DMA mode read from an NCR53C400 or equivalent
- * controller
+ * Perform a pseudo DMA mode send to a 53C400 or equivalent device.
  */
 
 static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
@@ -603,10 +601,10 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
 
if (hostdata->io_port && hostdata->io_width == 2)
outsw(hostdata->io_port + hostdata->c400_host_buf,
-   src + start, 64);
+ src + start, 64);
else if (hostdata->io_port)
outsb(hostdata->io_port + hostdata->c400_host_buf,
-   src + start, 128);
+ src + start, 128);
else
memcpy_toio(hostdata->io + NCR53C400_host_buffer,
src + start, 128);
@@ -656,10 +654,8 @@ static int generic_NCR5380_dma_residual(struct 
NCR5380_hostdata *hostdata)
return hostdata->pdma_residual;
 }
 
-/*
- * Include the NCR5380 core code that we build our driver around   
- */
- 
+/* Include the core driver code. */
+
 #include "NCR5380.c"
 
 static struct scsi_host_template driver_template = {
@@ -679,11 +675,10 @@ static struct scsi_host_template driver_template = {
.max_sectors= 128,
 };
 
-
 static int generic_NCR5380_isa_match(struct device *pdev, unsigned int ndev)
 {
int ret = generic_NCR5380_init_one(_template, pdev, base[ndev],
- irq[ndev], card[ndev]);
+  irq[ndev], card[ndev]);
if (ret) {
if (base[ndev])
printk(KERN_WARNING "Card not found at address 
0x%03x\n",
@@ -695,7 +690,7 @@ static int generic_NCR5380_isa_match(struct device *pdev, 
unsigned int ndev)
 }
 
 static int generic_NCR5380_isa_remove(struct device *pdev,
-

[PATCH v5 4/6] g_NCR5380: Limit PDMA send to 512 B to avoid data corruption on DTC3181E

2017-06-28 Thread Finn Thain

From: Ondrej Zary 

The corruption is always the same: one byte missing at the beginning of
a 128 B block. It happens only with slow Quantum LPS 240 drive, not with
faster IBM DORS-32160. It's not clear what causes this. Documentation
for the DTC436 chip has not been made available. Hence this workaround.

Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index dedaed2d16e4..5fd227bb1830 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -45,7 +45,8 @@
int c400_blk_cnt; \
int c400_host_buf; \
int io_width; \
-   int pdma_residual
+   int pdma_residual; \
+   int board
 
 #define NCR5380_dma_xfer_lengeneric_NCR5380_dma_xfer_len
 #define NCR5380_dma_recv_setup  generic_NCR5380_pread
@@ -316,6 +317,7 @@ static int generic_NCR5380_init_one(struct 
scsi_host_template *tpnt,
}
hostdata = shost_priv(instance);
 
+   hostdata->board = board;
hostdata->io = iomem;
hostdata->region_size = region_size;
 
@@ -644,7 +646,12 @@ static int generic_NCR5380_dma_xfer_len(struct 
NCR5380_hostdata *hostdata,
 
/* 53C400 datasheet: non-modulo-128-byte transfers should use PIO */
if (transfersize % 128)
-   transfersize = 0;
+   return 0;
+
+   /* Limit PDMA send to 512 B to avoid random corruption on DTC3181E */
+   if (hostdata->board == BOARD_DTC3181E &&
+   cmd->sc_data_direction == DMA_TO_DEVICE)
+   transfersize = min(cmd->SCp.this_residual, 512);
 
return min(transfersize, DMA_MAX_SIZE);
 }
-- 
2.13.0

[PATCH v5 2/6] g_NCR5380: End PDMA transfer correctly on target disconnection

2017-06-28 Thread Finn Thain

From: Ondrej Zary 

When an IRQ arrives during PDMA transfer, pread() and pwrite() return
without waiting for the 53C80 registers to be ready and this ends up
messing up the chip state. This was observed with SONY CDU-55S which is
slow enough to disconnect during 4096-byte reads.

IRQ during PDMA is not an error so don't return -1. Instead, store the
remaining byte count for use by NCR5380_dma_residual().

[Poll for the BASR_END_DMA_TRANSFER condition rather than remove the
error message -- F.T.]

Signed-off-by: Ondrej Zary 
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 48 +++-
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 14ef4e8c4713..911a4300ea51 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -44,12 +44,13 @@
int c400_ctl_status; \
int c400_blk_cnt; \
int c400_host_buf; \
-   int io_width
+   int io_width; \
+   int pdma_residual
 
 #define NCR5380_dma_xfer_lengeneric_NCR5380_dma_xfer_len
 #define NCR5380_dma_recv_setup  generic_NCR5380_pread
 #define NCR5380_dma_send_setup  generic_NCR5380_pwrite
-#define NCR5380_dma_residualNCR5380_dma_residual_none
+#define NCR5380_dma_residualgeneric_NCR5380_dma_residual
 
 #define NCR5380_intrgeneric_NCR5380_intr
 #define NCR5380_queue_command   generic_NCR5380_queue_command
@@ -500,10 +501,8 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
while (1) {
if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ) {
-   printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, 
blocks=%d\n", start, blocks);
-   return -1;
-   }
+   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
+   goto out_wait;
while (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_HOST_BUF_NOT_RDY)
; /* FIXME - no timeout */
 
@@ -542,13 +541,19 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
printk("53C400r: no 53C80 gated irq after transfer");
 
+out_wait:
+   hostdata->pdma_residual = len - start;
+
/* wait for 53C80 registers to be available */
while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
;
 
-   if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
-   printk(KERN_ERR "53C400r: no end dma signal\n");
-   
+   if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+ BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+ HZ / 64) < 0)
+   scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA 
timeout (%d)\n",
+   __func__, hostdata->pdma_residual);
+
return 0;
 }
 
@@ -571,10 +576,8 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ) {
-   printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, 
blocks=%d\n", start, blocks);
-   return -1;
-   }
+   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
+   goto out_wait;
 
if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
@@ -612,18 +615,24 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
blocks--;
}
 
+out_wait:
+   hostdata->pdma_residual = len - start;
+
/* wait for 53C80 registers to be available */
while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
udelay(4); /* DTC436 chip hangs without this */
/* FIXME - no timeout */
}
 
-   if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
-   printk(KERN_ERR "53C400w: no end dma signal\n");
-   }
-
while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
;   // TIMEOUT
+
+   if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+ BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+ HZ / 64) < 0)
+   scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA 
timeout (%d)\n",
+   __func__, hostdata->pdma_residual);
+
return 0;
 }
 
@@

[linux-next][PATCH] usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys

2017-06-28 Thread Vignesh R

commit f54edb539c116 ("usb: dwc3: core: initialize ULPI before trying to
get the PHY") moved call to dwc3_core_get_phy() from dwc3_probe() to
dwc3_core_init() after dwc3_core_soft_reset(). But
dwc3_core_soft_reset() calls phy_init(), therefore dwc3_core_get_phy()
needs to be called before dwc3_core_soft_reset().

Fix this by moving call to dwc3_core_get_phy() before
dwc3_core_soft_reset().

This fixes the following abort seen on DRA7xx platforms
[   24.769118] usb usb2: SerialNumber: xhci-hcd.1.auto
[   24.781144] hub 2-0:1.0: USB hub found
[   24.787836] hub 2-0:1.0: 1 port detected
[   24.809939] Unhandled fault: imprecise external abort (0x1406) at 0x

Reported-by: Carlos Hernandez 
Signed-off-by: Vignesh R 
---
 drivers/usb/dwc3/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 326b302fc440..03474d3575ab 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -766,15 +766,15 @@ static int dwc3_core_init(struct dwc3 *dwc)
dwc->maximum_speed = USB_SPEED_HIGH;
}
 
-   ret = dwc3_core_soft_reset(dwc);
+   ret = dwc3_core_get_phy(dwc);
if (ret)
goto err0;
 
-   ret = dwc3_phy_setup(dwc);
+   ret = dwc3_core_soft_reset(dwc);
if (ret)
goto err0;
 
-   ret = dwc3_core_get_phy(dwc);
+   ret = dwc3_phy_setup(dwc);
if (ret)
goto err0;
 
-- 
2.13.0

[PATCH v5 6/6] g_NCR5380: Use unambiguous terminology for PDMA send and receive

2017-06-28 Thread Finn Thain

The word "read" may be used to mean "DMA read operation" or
"SCSI READ command", though a READ command implies writing to memory.

Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index f7e50d2bca07..b8b1ed2806bb 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -49,8 +49,8 @@
int board
 
 #define NCR5380_dma_xfer_lengeneric_NCR5380_dma_xfer_len
-#define NCR5380_dma_recv_setup  generic_NCR5380_pread
-#define NCR5380_dma_send_setup  generic_NCR5380_pwrite
+#define NCR5380_dma_recv_setup  generic_NCR5380_precv
+#define NCR5380_dma_send_setup  generic_NCR5380_psend
 #define NCR5380_dma_residualgeneric_NCR5380_dma_residual
 
 #define NCR5380_intrgeneric_NCR5380_intr
@@ -507,7 +507,7 @@ static void wait_for_53c80_access(struct NCR5380_hostdata 
*hostdata)
 }
 
 /**
- * generic_NCR5380_pread - pseudo DMA read
+ * generic_NCR5380_precv - pseudo DMA receive
  * @hostdata: scsi host private data
  * @dst: buffer to write into
  * @len: transfer size
@@ -515,7 +515,7 @@ static void wait_for_53c80_access(struct NCR5380_hostdata 
*hostdata)
  * Perform a pseudo DMA mode receive from a 53C400 or equivalent device.
  */
 
-static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
+static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 unsigned char *dst, int len)
 {
int residual;
@@ -574,7 +574,7 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
 }
 
 /**
- * generic_NCR5380_pwrite - pseudo DMA write
+ * generic_NCR5380_psend - pseudo DMA send
  * @hostdata: scsi host private data
  * @src: buffer to read from
  * @len: transfer size
@@ -582,8 +582,8 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
  * Perform a pseudo DMA mode send to a 53C400 or equivalent device.
  */
 
-static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
- unsigned char *src, int len)
+static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
+unsigned char *src, int len)
 {
int residual;
int start = 0;
-- 
2.13.0

Re: [PATCH v4 0/5] g_NCR5380: PDMA fixes and cleanup

2017-06-28 Thread Finn Thain

On Wed, 28 Jun 2017, Ondrej Zary wrote:

> 
> Now read seems to work on non-DTC chips. Writes continue in PDMA after 
> disconnect but there's a corruption - one 128 B block missing on 
> disconnect.
> 
> On DTC, the log is spammed with errors like this:
> sd 2:0:1:0: [sdb] tag#0 generic_NCR5380_pread: End of DMA timeout (0)
> 
> They're cause by read corruption on DTC: pread() is breaking at 
> start=3968 because of an end-of-DMA IRQ (BASR=0x98) but pdma_residual is 
> set to zero (block counter is zero because the data was read into the 
> buffer but we did not read it from there). So we lose one buffer of data 
> on each 4 KB read.
>

But the algorithm in the datasheet never reads from the buffer after the 
block counter reaches zero. (Of course, the only datasheet we have is for 
a 53c400 device not a DTC436 so all bets are off.)

Anyway, the corrupted data that you describe is telling. I think you're 
right, we have to drain the buffer even when Gated IRQ has been asserted 
(or find a better way to calculate the residual).

I can see a theoretical problem with the code I sent. If the 53c80 raises 
IRQ during the outsb() or insb(), we could still end up with start == end, 
which could mess up both the residual and the handling for an incomplete 
transfer.

> The PDMA is then reset which probably means BASR_END_DMA_TRANSFER will 
> not be asserted.
> 

But the BASR_END_DMA_TRANSFER flag is latched, and resetting the 53c400 
logic should not affect 53c80 registers (assuming they are accessible). So 
the reset does not explain the log messages.

Maybe your BASR=0x98 observations do not co-incide with the log messages. 
Or maybe we need to wait for registers to become accessible after the 
reset.

I've attempted to address all these issues in v5.

Thanks.

--

[PATCH V2 0/4] sched: cpufreq: Allow remote callbacks

2017-06-28 Thread Viresh Kumar

Hi,

Here is the second version of this series. The first [1] version was
sent several months back.

With Android UI and benchmarks the latency of cpufreq response to
certain scheduling events can become very critical. Currently, callbacks
into schedutil are only made from the scheduler if the target CPU of the
event is the same as the current CPU. This means there are certain
situations where a target CPU may not run schedutil for some time.

One testcase to show this behavior is where a task starts running on
CPU0, then a new task is also spawned on CPU0 by a task on CPU1. If the
system is configured such that new tasks should receive maximum demand
initially, this should result in CPU0 increasing frequency immediately.
Because of the above mentioned limitation though this does not occur.
This is verified using ftrace with the sample [2] application.

Maybe the ideal solution is to always allow remote callbacks but that
has its own challenges:

o There is no protection required for single CPU per policy case today,
  and adding any kind of locking there, to supply remote callbacks,
  isn't really a good idea.

o If is local CPU isn't part of the same cpufreq policy as the target
  CPU, then we wouldn't be able to do fast switching at all and have to
  use some kind of bottom half to schedule work on the target CPU to do
  real switching. That may be overkill as well.


Taking above challenges into consideration, this version proposes a much
simpler diff as compared to the first version.

This series only allows remote callbacks for target CPUs that share the
cpufreq policy with the local CPU. Locking is mostly in place everywhere
and we wouldn't be required to change a lot of things.

This series is tested with couple of usecases (Android: hackbench,
recentfling, galleryfling, vellamo, Ubuntu: hackbench) on ARM hikey
board (64 bit octa-core, single policy). Only galleryfling showed minor
improvements, while others didn't had much deviation.

The reason being that this patchset only targets a corner case, where
following are required to be true to improve performance and that
doesn't happen too often with these tests:

- Task is migrated to another CPU.
- The task has maximum demand initially, and should take the CPU to
  higher OPPs.
- And the target CPU doesn't call into schedutil until the next tick.


V1->V2:
- Don't support remote callbacks for unshared cpufreq policies.
- Don't support remote callbacks where local CPU isn't part of the
  target CPU's cpufreq policy.
- Dropped dvfs_possible_from_any_cpu flag.

--
viresh

[1] https://marc.info/?l=linux-pm=148906015927796=2
[2] http://pastebin.com/7LkMSRxE


Steve Muckle (1):
  intel_pstate: Ignore scheduler cpufreq callbacks on remote CPUs

Viresh Kumar (3):
  cpufreq: schedutil: Process remote callback for shared policies
  cpufreq: governor: Process remote callback for shared policies
  sched: cpufreq: Enable remote sched cpufreq callbacks

 drivers/cpufreq/cpufreq_governor.c |  4 
 drivers/cpufreq/intel_pstate.c |  3 +++
 include/linux/sched/cpufreq.h  |  1 +
 kernel/sched/cpufreq.c |  1 +
 kernel/sched/cpufreq_schedutil.c   | 19 ++-
 kernel/sched/deadline.c|  2 +-
 kernel/sched/fair.c|  8 +---
 kernel/sched/rt.c  |  2 +-
 kernel/sched/sched.h   | 10 ++
 9 files changed, 32 insertions(+), 18 deletions(-)

-- 
2.13.0.71.gd7076ec9c9cb

[PATCH V2 2/4] cpufreq: governor: Process remote callback for shared policies

2017-06-28 Thread Viresh Kumar

This patch updates the legacy governors (ondemand/conservative) to
process cpufreq utilization update hooks to be called for remote CPUs
(i.e. For updates to the runqueue of other non-local CPUs).

Based on initial work from Steve Muckle.

Signed-off-by: Viresh Kumar 
---
 drivers/cpufreq/cpufreq_governor.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/cpufreq/cpufreq_governor.c 
b/drivers/cpufreq/cpufreq_governor.c
index 47e24b5384b3..0b49fc8bb91d 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -275,6 +275,10 @@ static void dbs_update_util_handler(struct 
update_util_data *data, u64 time,
struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
u64 delta_ns, lst;
 
+   /* Allow remote callbacks only on the CPUs sharing cpufreq policy */
+   if (!cpumask_test_cpu(smp_processor_id(), policy_dbs->policy->cpus))
+   return;
+
/*
 * The work may not be allowed to be queued up right now.
 * Possible reasons:
-- 
2.13.0.71.gd7076ec9c9cb

[PATCH V2 4/4] sched: cpufreq: Enable remote sched cpufreq callbacks

2017-06-28 Thread Viresh Kumar

Now that all clients properly support (or ignore) remote scheduler
cpufreq callbacks, remove the restriction that such callbacks only be
made on the local CPU.

Also remove cpufreq_update_this_cpu() as all its users are migrated to
use cpufreq_update_util() instead.

Based on initial work from Steve Muckle.

Signed-off-by: Steve Muckle 
Signed-off-by: Viresh Kumar 
---
 kernel/sched/deadline.c |  2 +-
 kernel/sched/fair.c |  8 +---
 kernel/sched/rt.c   |  2 +-
 kernel/sched/sched.h| 10 ++
 4 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a2ce59015642..512d51226998 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -763,7 +763,7 @@ static void update_curr_dl(struct rq *rq)
}
 
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
-   cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
+   cpufreq_update_util(rq, SCHED_CPUFREQ_DL);
 
schedstat_set(curr->se.statistics.exec_max,
  max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c77e4b1d51c0..77ef663e1380 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3215,7 +3215,9 @@ static inline void set_tg_cfs_propagate(struct cfs_rq 
*cfs_rq) {}
 
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
-   if (_rq()->cfs == cfs_rq) {
+   struct rq *rq = rq_of(cfs_rq);
+
+   if (>cfs == cfs_rq) {
/*
 * There are a few boundary cases this might miss but it should
 * get called often enough that that should (hopefully) not be
@@ -3232,7 +3234,7 @@ static inline void cfs_rq_util_change(struct cfs_rq 
*cfs_rq)
 *
 * See cpu_util().
 */
-   cpufreq_update_util(rq_of(cfs_rq), 0);
+   cpufreq_update_util(rq, 0);
}
 }
 
@@ -4792,7 +4794,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, 
int flags)
 * passed.
 */
if (p->in_iowait)
-   cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+   cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
 
for_each_sched_entity(se) {
if (se->on_rq)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 979b7341008a..1e626e49f7fc 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -959,7 +959,7 @@ static void update_curr_rt(struct rq *rq)
return;
 
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-   cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
+   cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
 
schedstat_set(curr->se.statistics.exec_max,
  max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 6dda2aab731e..cce497b5837c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1987,19 +1987,13 @@ static inline void cpufreq_update_util(struct rq *rq, 
unsigned int flags)
 {
struct update_util_data *data;
 
-   data = rcu_dereference_sched(*this_cpu_ptr(_update_util_data));
+   data = rcu_dereference_sched(*per_cpu_ptr(_update_util_data,
+ cpu_of(rq)));
if (data)
data->func(data, rq_clock(rq), flags);
 }
-
-static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
-{
-   if (cpu_of(rq) == smp_processor_id())
-   cpufreq_update_util(rq, flags);
-}
 #else
 static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) 
{}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef arch_scale_freq_capacity
-- 
2.13.0.71.gd7076ec9c9cb

[PATCH V2 1/4] cpufreq: schedutil: Process remote callback for shared policies

2017-06-28 Thread Viresh Kumar

This patch updates the schedutil governor to process cpufreq utilization
update hooks called for remote CPUs (i.e. For updates to the
runqueue of other non-local CPUs). For now, we only support remote
callbacks for CPUs which share their cpufreq policy with the local CPU.

It may not be worth allowing remote callbacks in other cases, as we
wouldn't be able to update the frequency on local CPU in that case.

The schedutil governor already has proper locking in place for shared
policy update hooks.

This also adds a new field "cpu" in "struct update_util_data", to
identify the remote CPU.

Based on initial work from Steve Muckle.

Signed-off-by: Steve Muckle 
Signed-off-by: Viresh Kumar 
---
 include/linux/sched/cpufreq.h|  1 +
 kernel/sched/cpufreq.c   |  1 +
 kernel/sched/cpufreq_schedutil.c | 19 ++-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index d2be2ccbb372..8256a8f35f22 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -16,6 +16,7 @@
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
void (*func)(struct update_util_data *data, u64 time, unsigned int 
flags);
+   unsigned int cpu;
 };
 
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index dbc51442ecbc..ee4c596b71b4 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -42,6 +42,7 @@ void cpufreq_add_update_util_hook(int cpu, struct 
update_util_data *data,
return;
 
data->func = func;
+   data->cpu = cpu;
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
 }
 EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 076a2e31951c..3f9cae9ab326 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -154,12 +154,12 @@ static unsigned int get_next_freq(struct sugov_policy 
*sg_policy,
return cpufreq_driver_resolve_freq(policy, freq);
 }
 
-static void sugov_get_util(unsigned long *util, unsigned long *max)
+static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
 {
-   struct rq *rq = this_rq();
+   struct rq *rq = cpu_rq(cpu);
unsigned long cfs_max;
 
-   cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
+   cfs_max = arch_scale_cpu_capacity(NULL, cpu);
 
*util = min(rq->cfs.avg.util_avg, cfs_max);
*max = cfs_max;
@@ -218,6 +218,10 @@ static void sugov_update_single(struct update_util_data 
*hook, u64 time,
unsigned int next_f;
bool busy;
 
+   /* Remote callbacks aren't allowed for policies which aren't shared */
+   if (smp_processor_id() != hook->cpu)
+   return;
+
sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
 
@@ -229,7 +233,7 @@ static void sugov_update_single(struct update_util_data 
*hook, u64 time,
if (flags & SCHED_CPUFREQ_RT_DL) {
next_f = policy->cpuinfo.max_freq;
} else {
-   sugov_get_util(, );
+   sugov_get_util(, , hook->cpu);
sugov_iowait_boost(sg_cpu, , );
next_f = get_next_freq(sg_policy, util, max);
/*
@@ -287,10 +291,15 @@ static void sugov_update_shared(struct update_util_data 
*hook, u64 time,
 {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, 
update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
+   struct cpufreq_policy *policy = sg_policy->policy;
unsigned long util, max;
unsigned int next_f;
 
-   sugov_get_util(, );
+   /* Allow remote callbacks only on the CPUs sharing cpufreq policy */
+   if (!cpumask_test_cpu(smp_processor_id(), policy->cpus))
+   return;
+
+   sugov_get_util(, , hook->cpu);
 
raw_spin_lock(_policy->update_lock);
 
-- 
2.13.0.71.gd7076ec9c9cb

[PATCH] usb: dwc3: pci: constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
   2707 456   03163 c5b drivers/usb/dwc3/dwc3-pci.o

File size After adding 'const':
   textdata bss dec hex filename
   2899 264   03163 c5b drivers/usb/dwc3/dwc3-pci.o

Signed-off-by: Arvind Yadav 
---
 drivers/usb/dwc3/dwc3-pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index a15ec71..9b0fb05 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -342,7 +342,7 @@ static int dwc3_pci_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-static struct dev_pm_ops dwc3_pci_dev_pm_ops = {
+static const struct dev_pm_ops dwc3_pci_dev_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(dwc3_pci_suspend, dwc3_pci_resume)
SET_RUNTIME_PM_OPS(dwc3_pci_runtime_suspend, dwc3_pci_runtime_resume,
NULL)
-- 
1.9.1

[PATCH V2 3/4] intel_pstate: Ignore scheduler cpufreq callbacks on remote CPUs

2017-06-28 Thread Viresh Kumar

From: Steve Muckle 

In preparation for the scheduler cpufreq callback happening on remote
CPUs, check for this case in intel_pstate which currently requires the
callback run on the local CPU. Such callbacks are ignored for now.

Signed-off-by: Steve Muckle 
Signed-off-by: Viresh Kumar 
---
 drivers/cpufreq/intel_pstate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 4ce501148790..7a2a8ee579ef 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1755,6 +1755,9 @@ static void intel_pstate_update_util(struct 
update_util_data *data, u64 time,
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
u64 delta_ns;
 
+   if (smp_processor_id() != data->cpu)
+   return;
+
if (flags & SCHED_CPUFREQ_IOWAIT) {
cpu->iowait_boost = int_tofp(1);
} else if (cpu->iowait_boost) {
-- 
2.13.0.71.gd7076ec9c9cb

Re: [PATCH] thermal/intel_powerclamp: add const to thermal_cooling_device_ops structure

2017-06-28 Thread Zhang Rui

On Thu, 2017-06-29 at 10:41 +0530, Bhumika Goyal wrote:
> On Thu, Jun 29, 2017 at 8:30 AM, Zhang Rui 
> wrote:
> > 
> > On Wed, 2017-06-21 at 12:39 +0530, Bhumika Goyal wrote:
> > > 
> > > Declare thermal_cooling_device_ops structure as const as it is
> > > only
> > > passed
> > > as an argument to the function thermal_cooling_device_register
> > > and
> > > this
> > > argument is of type const. So, declare the structure as const.
> > > 
> > I checked the source and the code is written in this way as well in
> > some other drivers, why not fix them altogether?
> > 
> Hey,
> 
> I have already sent the patches for the drivers requiring this
> change.
> Some of them got applied as well.

I mean the other drivers that use thermal_cooling_device_ops.

Surely I can applied this patch, but still it would be nice if you can
check the other places that defines thermal_cooling_device_ops and fix
all of them in one time. :)

thanks,
rui
> 
> Thanks,
> Bhumika
> 
> > 
> > thanks,
> > rui
> > > 
> > > Signed-off-by: Bhumika Goyal 
> > > ---
> > >  drivers/thermal/intel_powerclamp.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/thermal/intel_powerclamp.c
> > > b/drivers/thermal/intel_powerclamp.c
> > > index d718cd1..e4c68b7 100644
> > > --- a/drivers/thermal/intel_powerclamp.c
> > > +++ b/drivers/thermal/intel_powerclamp.c
> > > @@ -659,7 +659,7 @@ static int powerclamp_set_cur_state(struct
> > > thermal_cooling_device *cdev,
> > >  }
> > > 
> > >  /* bind to generic thermal layer as cooling device*/
> > > -static struct thermal_cooling_device_ops powerclamp_cooling_ops
> > > = {
> > > +static const struct thermal_cooling_device_ops
> > > powerclamp_cooling_ops = {
> > >   .get_max_state = powerclamp_get_max_state,
> > >   .get_cur_state = powerclamp_get_cur_state,
> > >   .set_cur_state = powerclamp_set_cur_state,

linux-next: manual merge of the kspp tree with the file-locks tree

2017-06-28 Thread Stephen Rothwell

Hi Kees,

Today's linux-next merge of the kspp tree got a conflict in:

  include/linux/fs.h

between commit:

  1844a66c1c89 ("fs: new infrastructure for writeback error handling and 
reporting")

from the file-locks tree and commit:

  3abc2b3fcf5c ("randstruct: Mark various structs for randomization")

from the kspp tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc include/linux/fs.h
index 000cf03ec441,8f28143486c4..
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -293,8 -275,7 +293,8 @@@ struct kiocb 
void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
void*private;
int ki_flags;
 +  enum rw_hintki_hint;
- };
+ } __randomize_layout;
  
  static inline bool is_sync_kiocb(struct kiocb *kiocb)
  {
@@@ -401,8 -392,7 +401,8 @@@ struct address_space 
gfp_t   gfp_mask;   /* implicit gfp mask for 
allocations */
struct list_headprivate_list;   /* ditto */
void*private_data;  /* ditto */
 +  errseq_twb_err;
- } __attribute__((aligned(sizeof(long;
+ } __attribute__((aligned(sizeof(long __randomize_layout;
/*
 * On most architectures that alignment is already the case; but
 * must be enforced here for CRIS, to let the least significant bit
@@@ -880,9 -868,8 +880,10 @@@ struct file 
struct list_headf_tfile_llink;
  #endif /* #ifdef CONFIG_EPOLL */
struct address_space*f_mapping;
 +  errseq_tf_wb_err; /* data writeback error tracking */
 +  errseq_tf_md_wb_err; /* metadata wb error tracking */
- } __attribute__((aligned(4)));/* lest something weird decides that 2 
is OK */
+ } __randomize_layout
+   __attribute__((aligned(4)));/* lest something weird decides that 2 
is OK */
  
  struct file_handle {
__u32 handle_bytes;

Re: [PATCH] ib_isert: prevent NULL pointer dereference in isert_login_recv_done()

2017-06-28 Thread Sagi Grimberg




Just tested this patch, I wasn't able to reproduce the NULL pointer
dereference or any other bugs, so this fix seems safe enough to me.

Tested-by: Andrea Righi 


Can you test just the one liner fix below?


@@ -1452,7 +1452,7 @@
  isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
  {
struct isert_conn *isert_conn = wc->qp->qp_context;
-   struct ib_device *ib_dev = isert_conn->cm_id->device;
+   struct ib_device *ib_dev = isert_conn->device->ib_device;
  
  	if (unlikely(wc->status != IB_WC_SUCCESS)) {

isert_print_wc(wc, "login recv");

linux-next: build warning after merge of the rtc tree

2017-06-28 Thread Stephen Rothwell

Hi Alexandre,

After merging the rtc tree, today's linux-next build (arm
multi_v7_defconfig) produced this warning:

drivers/rtc/rtc-brcmstb-waketimer.c: In function 'brcmstb_waketmr_settime':
drivers/rtc/rtc-brcmstb-waketimer.c:142:6: warning: unused variable 'ret' 
[-Wunused-variable]
  int ret;
  ^

Introduced by commit

  9f4ad359c801 ("rtc: brcmstb-waketimer: Add Broadcom STB wake-timer")

-- 
Cheers,
Stephen Rothwell

[PATCH] net: ibm: ibmveth: constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
  154261256   0   16682412a drivers/net/ethernet/ibm/ibmveth.o

File size After adding 'const':
   textdata bss dec hex filename
  156181064   0   16682412a drivers/net/ethernet/ibm/ibmveth.o

Signed-off-by: Arvind Yadav 
---
 drivers/net/ethernet/ibm/ibmveth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c 
b/drivers/net/ethernet/ibm/ibmveth.c
index 72ab7b6..02b26bf 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1843,7 +1843,7 @@ static int ibmveth_resume(struct device *dev)
 };
 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
 
-static struct dev_pm_ops ibmveth_pm_ops = {
+static const struct dev_pm_ops ibmveth_pm_ops = {
.resume = ibmveth_resume
 };
 
-- 
1.9.1

Re: [PATCH v6 0/3] Add new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag

2017-06-28 Thread Ding Tianhong

ping

On 2017/6/22 20:15, Ding Tianhong wrote:
> Some devices have problems with Transaction Layer Packets with the Relaxed
> Ordering Attribute set.  This patch set adds a new PCIe Device Flag,
> PCI_DEV_FLAGS_NO_RELAXED_ORDERING, a set of PCI Quirks to catch some known
> devices with Relaxed Ordering issues, and a use of this new flag by the
> cxgb4 driver to avoid using Relaxed Ordering with problematic Root Complex
> Ports.
> 
> It's been years since I've submitted kernel.org patches, I appolgise for the
> almost certain submission errors.
> 
> v2: Alexander point out that the v1 was only a part of the whole solution,
> some platform which has some issues could use the new flag to indicate
> that it is not safe to enable relaxed ordering attribute, then we need
> to clear the relaxed ordering enable bits in the PCI configuration when
> initializing the device. So add a new second patch to modify the PCI
> initialization code to clear the relaxed ordering enable bit in the
> event that the root complex doesn't want relaxed ordering enabled.
> 
> The third patch was base on the v1's second patch and only be changed
> to query the relaxed ordering enable bit in the PCI configuration space
> to allow the Chelsio NIC to send TLPs with the relaxed ordering attributes
> set.
> 
> This version didn't plan to drop the defines for Intel Drivers to use the
> new checking way to enable relaxed ordering because it is not the hardest
> part of the moment, we could fix it in next patchset when this patches
> reach the goal.  
> 
> v3: Redesigned the logic for pci_configure_relaxed_ordering when 
> configuration,
> If a PCIe device didn't enable the relaxed ordering attribute default,
> we should not do anything in the PCIe configuration, otherwise we
> should check if any of the devices above us do not support relaxed
> ordering by the PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag, then base on
> the result if we get a return that indicate that the relaxed ordering
> is not supported we should update our device to disable relaxed ordering
> in configuration space. If the device above us doesn't exist or isn't
> the PCIe device, we shouldn't do anything and skip updating relaxed 
> ordering
> because we are probably running in a guest.
> 
> v4: Rename the functions pcie_get_relaxed_ordering and 
> pcie_disable_relaxed_ordering
> according John's suggestion, and modify the description, use the 
> true/false
> as the return value.
> 
> We shouldn't enable relaxed ordering attribute by the setting in the root
> complex configuration space for PCIe device, so fix it for cxgb4.
> 
> Fix some format issues.
> 
> v5: Removed the unnecessary code for some function which only return the bool
> value, and add the check for VF device.
> 
> Make this patch set base on 4.12-rc5.
> 
> v6: Fix the logic error in the need to enable the relaxed ordering attribute 
> for cxgb4.
>  
> Casey Leedom (2):
>   PCI: Add new PCIe Fabric End Node flag,
> PCI_DEV_FLAGS_NO_RELAXED_ORDERING
>   net/cxgb4: Use new PCI_DEV_FLAGS_NO_RELAXED_ORDERING flag
> 
> Ding Tianhong (1):
>   PCI: Enable PCIe Relaxed Ordering if supported
> 
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4.h  |  1 +
>  drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 17 ++
>  drivers/net/ethernet/chelsio/cxgb4/sge.c|  5 +--
>  drivers/pci/pci.c   | 32 +++
>  drivers/pci/probe.c | 41 
> +
>  drivers/pci/quirks.c| 38 +++
>  include/linux/pci.h |  4 +++
>  7 files changed, 136 insertions(+), 2 deletions(-)
>

Re: [PATCH v1] xen/input: add multi-touch support

2017-06-28 Thread Oleksandr Andrushchenko


ping

On 06/23/2017 09:09 AM, Oleksandr Andrushchenko wrote:

From: Oleksandr Andrushchenko 

Extend xen_kbdfront to provide multi-touch support
to unprivileged domains.

Signed-off-by: Oleksandr Andrushchenko 

---
Changes since initial:
  - use input_set_capability instead of setting flags directly
  - input_mt_init_slots: let userspace better chance of figuring
how to handle the device: use INPUT_MT_DIRECT,
drop INPUT_MT_DROP_UNUSED
  - add error handling for input_mt_init_slots
  - remove module paramters
  - remove odd unlikely
---
  drivers/input/misc/xen-kbdfront.c | 135 +-
  1 file changed, 133 insertions(+), 2 deletions(-)

diff --git a/drivers/input/misc/xen-kbdfront.c 
b/drivers/input/misc/xen-kbdfront.c
index eb770613a9bd..9fa005038773 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -17,6 +17,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  
  #include 

@@ -34,11 +35,14 @@
  struct xenkbd_info {
struct input_dev *kbd;
struct input_dev *ptr;
+   struct input_dev *mtouch;
struct xenkbd_page *page;
int gref;
int irq;
struct xenbus_device *xbdev;
char phys[32];
+   /* current MT slot/contact ID we are injecting events in */
+   int mtouch_cur_contact_id;
  };
  
  enum { KPARAM_X, KPARAM_Y, KPARAM_CNT };

@@ -100,6 +104,60 @@ static irqreturn_t input_handler(int rq, void *dev_id)
input_report_rel(dev, REL_WHEEL,
 -event->pos.rel_z);
break;
+   case XENKBD_TYPE_MTOUCH:
+   dev = info->mtouch;
+   if (unlikely(!dev))
+   break;
+   if (event->mtouch.contact_id !=
+   info->mtouch_cur_contact_id) {
+   info->mtouch_cur_contact_id =
+   event->mtouch.contact_id;
+   input_mt_slot(dev, event->mtouch.contact_id);
+   }
+   switch (event->mtouch.event_type) {
+   case XENKBD_MT_EV_DOWN:
+   input_mt_report_slot_state(dev, MT_TOOL_FINGER,
+  true);
+   input_event(dev, EV_ABS, ABS_MT_POSITION_X,
+   event->mtouch.u.pos.abs_x);
+   input_event(dev, EV_ABS, ABS_MT_POSITION_Y,
+   event->mtouch.u.pos.abs_y);
+   input_event(dev, EV_ABS, ABS_X,
+   event->mtouch.u.pos.abs_x);
+   input_event(dev, EV_ABS, ABS_Y,
+   event->mtouch.u.pos.abs_y);
+   break;
+   case XENKBD_MT_EV_UP:
+   input_mt_report_slot_state(dev, MT_TOOL_FINGER,
+  false);
+   break;
+   case XENKBD_MT_EV_MOTION:
+   input_event(dev, EV_ABS, ABS_MT_POSITION_X,
+   event->mtouch.u.pos.abs_x);
+   input_event(dev, EV_ABS, ABS_MT_POSITION_Y,
+   event->mtouch.u.pos.abs_y);
+   input_event(dev, EV_ABS, ABS_X,
+   event->mtouch.u.pos.abs_x);
+   input_event(dev, EV_ABS, ABS_Y,
+   event->mtouch.u.pos.abs_y);
+   break;
+   case XENKBD_MT_EV_SYN:
+   input_mt_sync_frame(dev);
+   break;
+   case XENKBD_MT_EV_SHAPE:
+   input_event(dev, EV_ABS, ABS_MT_TOUCH_MAJOR,
+   event->mtouch.u.shape.major);
+   input_event(dev, EV_ABS, ABS_MT_TOUCH_MINOR,
+   event->mtouch.u.shape.minor);
+   break;
+   case XENKBD_MT_EV_ORIENT:
+   input_event(dev, EV_ABS, ABS_MT_ORIENTATION,
+   event->mtouch.u.orientation);
+   break;
+   }
+   /* only report syn when requested */
+   if (event->mtouch.event_type != XENKBD_MT_EV_SYN)
+   dev = NULL;
}
if (dev)
input_sync(dev);
@@ -115,9

[PATCH] net: smc91x: constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
  18709 401   0   191104aa6 drivers/net/ethernet/smsc/smc91x.o

File size After adding 'const':
   textdata bss dec hex filename
  18901 201   0   191024a9e drivers/net/ethernet/smsc/smc91x.o

Signed-off-by: Arvind Yadav 
---
 drivers/net/ethernet/smsc/smc91x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/smsc/smc91x.c 
b/drivers/net/ethernet/smsc/smc91x.c
index 91e9bd7..83cf052 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -2488,7 +2488,7 @@ static int smc_drv_resume(struct device *dev)
return 0;
 }
 
-static struct dev_pm_ops smc_drv_pm_ops = {
+static const struct dev_pm_ops smc_drv_pm_ops = {
.suspend= smc_drv_suspend,
.resume = smc_drv_resume,
 };
-- 
1.9.1

Re: [PATCH] thermal/intel_powerclamp: add const to thermal_cooling_device_ops structure

2017-06-28 Thread Bhumika Goyal

On Thu, Jun 29, 2017 at 11:02 AM, Zhang Rui  wrote:
> On Thu, 2017-06-29 at 10:41 +0530, Bhumika Goyal wrote:
>> On Thu, Jun 29, 2017 at 8:30 AM, Zhang Rui 
>> wrote:
>> >
>> > On Wed, 2017-06-21 at 12:39 +0530, Bhumika Goyal wrote:
>> > >
>> > > Declare thermal_cooling_device_ops structure as const as it is
>> > > only
>> > > passed
>> > > as an argument to the function thermal_cooling_device_register
>> > > and
>> > > this
>> > > argument is of type const. So, declare the structure as const.
>> > >
>> > I checked the source and the code is written in this way as well in
>> > some other drivers, why not fix them altogether?
>> >
>> Hey,
>>
>> I have already sent the patches for the drivers requiring this
>> change.
>> Some of them got applied as well.
>
> I mean the other drivers that use thermal_cooling_device_ops.
>

Actually, I also meant the same but maybe I wasn't clear enough :)

> Surely I can applied this patch, but still it would be nice if you can
> check the other places that defines thermal_cooling_device_ops and fix
> all of them in one time. :)
>

I checked again and there remains only one file in power/supply where
thermal_cooling_device_ops can be made const. I will submit a patch
for it but I think I will have to create a separate patch because the
maintainers are different.
All other places are either already const or I have submitted patches
for them or cannot be made const.

Thanks,
Bhumika

> thanks,
> rui
>>
>> Thanks,
>> Bhumika
>>
>> >
>> > thanks,
>> > rui
>> > >
>> > > Signed-off-by: Bhumika Goyal 
>> > > ---
>> > >  drivers/thermal/intel_powerclamp.c | 2 +-
>> > >  1 file changed, 1 insertion(+), 1 deletion(-)
>> > >
>> > > diff --git a/drivers/thermal/intel_powerclamp.c
>> > > b/drivers/thermal/intel_powerclamp.c
>> > > index d718cd1..e4c68b7 100644
>> > > --- a/drivers/thermal/intel_powerclamp.c
>> > > +++ b/drivers/thermal/intel_powerclamp.c
>> > > @@ -659,7 +659,7 @@ static int powerclamp_set_cur_state(struct
>> > > thermal_cooling_device *cdev,
>> > >  }
>> > >
>> > >  /* bind to generic thermal layer as cooling device*/
>> > > -static struct thermal_cooling_device_ops powerclamp_cooling_ops
>> > > = {
>> > > +static const struct thermal_cooling_device_ops
>> > > powerclamp_cooling_ops = {
>> > >   .get_max_state = powerclamp_get_max_state,
>> > >   .get_cur_state = powerclamp_get_cur_state,
>> > >   .set_cur_state = powerclamp_set_cur_state,

[PATCH] net: freescale: gianfar : constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
  19057 392   0   194494bf9 drivers/net/ethernet/freescale/gianfar.o

File size After adding 'const':
   textdata bss dec hex filename
  19249 192   0   194414bf1 drivers/net/ethernet/freescale/gianfar.o

Signed-off-by: Arvind Yadav 
---
 drivers/net/ethernet/freescale/gianfar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c 
b/drivers/net/ethernet/freescale/gianfar.c
index 0ff166e..e3b0501 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1718,7 +1718,7 @@ static int gfar_restore(struct device *dev)
return 0;
 }
 
-static struct dev_pm_ops gfar_pm_ops = {
+static const struct dev_pm_ops gfar_pm_ops = {
.suspend = gfar_suspend,
.resume = gfar_resume,
.freeze = gfar_suspend,
-- 
1.9.1

Re: [PATCH] futex: avoid undefined behaviour when shift exponent is negative

2017-06-28 Thread zhong jiang

On 2017/6/29 12:29, h...@zytor.com wrote:
> On June 28, 2017 7:12:04 PM PDT, zhong jiang  wrote:
>> On 2017/6/29 5:43, h...@zytor.com wrote:
>>> On June 27, 2017 9:35:10 PM PDT, zhong jiang 
>> wrote:
 Hi,  Ingo

 Thank you for the comment.
 On 2017/6/22 0:40, Ingo Molnar wrote:
> * zhong jiang  wrote:
>
>> when shift expoment is negative, left shift alway zero. therefore,
 we
>> modify the logic to avoid the warining.
>>
>> Signed-off-by: zhong jiang 
>> ---
>>  arch/x86/include/asm/futex.h | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/futex.h
 b/arch/x86/include/asm/futex.h
>> index b4c1f54..2425fca 100644
>> --- a/arch/x86/include/asm/futex.h
>> +++ b/arch/x86/include/asm/futex.h
>> @@ -49,8 +49,12 @@ static inline int futex_atomic_op_inuser(int
 encoded_op, u32 __user *uaddr)
>>  int cmparg = (encoded_op << 20) >> 20;
>>  int oldval = 0, ret, tem;
>>  
>> -if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
>> -oparg = 1 << oparg;
>> +if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
>> +if (oparg >= 0)
>> +oparg = 1 << oparg;
>> +else
>> +oparg = 0;
>> +}
> Could we avoid all these complications by using an unsigned type?
 I think it is not feasible.  a negative shift exponent is likely
 existence and reasonable.
  as the above case,  oparg is a negative is common. 

 I think it can be avoided by following change. 

 diff --git a/arch/x86/include/asm/futex.h
 b/arch/x86/include/asm/futex.h
 index b4c1f54..3205e86 100644
 --- a/arch/x86/include/asm/futex.h
 +++ b/arch/x86/include/asm/futex.h
 @@ -50,7 +50,7 @@ static inline int futex_atomic_op_inuser(int
 encoded_op, u32 __user *uaddr)
int oldval = 0, ret, tem;

if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 -   oparg = 1 << oparg;
 +   oparg = safe_shift(1, oparg);

if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
 diff --git a/drivers/video/fbdev/core/fbmem.c
 b/drivers/video/fbdev/core/fbmem.c
 index 069fe79..b4edda3 100644
 --- a/drivers/video/fbdev/core/fbmem.c
 +++ b/drivers/video/fbdev/core/fbmem.c
 @@ -190,11 +190,6 @@ char* fb_get_buffer_offset(struct fb_info
>> *info,
 struct fb_pixmap *buf, u32 size

 #ifdef CONFIG_LOGO

 -static inline unsigned safe_shift(unsigned d, int n)
 -{
 -   return n < 0 ? d >> -n : d << n;
 -}
 -
 static void fb_set_logocmap(struct fb_info *info,
   const struct linux_logo *logo)
 {
 diff --git a/include/linux/kernel.h b/include/linux/kernel.h
 index d043ada..f3b8856 100644
 --- a/include/linux/kernel.h
 +++ b/include/linux/kernel.h
 @@ -841,6 +841,10 @@ static inline void ftrace_dump(enum
 ftrace_dump_mode oops_dump_mode) { }
  */
 #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)

 +static inline unsigned safe_shift(unsigned d, int n)
 +{
 +   return n < 0 ? d >> -n : d << n;
 +}

 Thansk
 zhongjiang

> Thanks,
>
>   Ingo
>
> .
>
>>> What makes it reasonable?  It is totally ill-defined and doesn't do
>> anything useful now?
>> Thanks you for comments.
>>
>> Maybe I mismake the meaning. I test the negative cases in x86 , all
>> case is zero. so I come to a conclusion.
>>
>> zj.c:15:8: warning: left shift count is negative
>> [-Wshift-count-negative]
>>  j = 1 << -2048;
>>^
>> [root@localhost zhongjiang]# ./zj
>> j = 0
>> j.c:15:8: warning: left shift count is negative
>> [-Wshift-count-negative]
>>  j = 1 << -2047;
>>^
>> [root@localhost zhongjiang]# ./zj
>> j = 0
>>
>> I insmod a module into kernel to test the testcasts, all of the result
>> is zero.
>>
>> I wonder whether I miss some point or not. Do you point out to me?
>> please
>>
>> Thanks
>> zhongjiang
>>
>>
> When you use compile-time constants, the compiler generates the value at 
> compile time, which can be totally different.
 yes, I test that. Thanks.

 Thanks
 zhongjiang

Re: [PATCH] lib/zstd: use div_u64() to let it build on 32-bit

2017-06-28 Thread Adam Borowski

On Tue, Jun 27, 2017 at 05:27:51AM +, Nick Terrell wrote:
> Adam, I’ve applied the same patch in my tree. I’ll send out the update [1]
> once it's reviewed, since I also reduced the stack usage of functions
> using over 1 KB of stack space.
> 
> I have userland tests set up mocking the linux kernel headers, and tested
> 32-bit mode there, but neglected to test the kernel on a 32-bit VM, which
> I’ve now corrected. Thanks for testing the patch on your ARM machine!

Is there a version I should be testing?

I got a bunch of those:
[10170.448783] kworker/u8:6: page allocation stalls for 60720ms, order:0, 
mode:0x14000c2(GFP_KERNEL|__GFP_HIGHMEM), nodemask=(null)
[10170.448819] kworker/u8:6 cpuset=/ mems_allowed=0
[10170.448842] CPU: 3 PID: 13430 Comm: kworker/u8:6 Not tainted 
4.12.0-rc7-00034-gdff47ed160bb #1
[10170.448846] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
[10170.448872] Workqueue: btrfs-endio btrfs_endio_helper
[10170.448910] [] (unwind_backtrace) from [] 
(show_stack+0x10/0x14)
[10170.448925] [] (show_stack) from [] 
(dump_stack+0x78/0x8c)
[10170.448942] [] (dump_stack) from [] 
(warn_alloc+0xc0/0x170)
[10170.448952] [] (warn_alloc) from [] 
(__alloc_pages_nodemask+0x97c/0xe30)
[10170.448964] [] (__alloc_pages_nodemask) from [] 
(__vmalloc_node_range+0x144/0x27c)
[10170.448976] [] (__vmalloc_node_range) from [] 
(__vmalloc_node.constprop.10+0x48/0x50)
[10170.448982] [] (__vmalloc_node.constprop.10) from [] 
(vmalloc+0x2c/0x34)
[10170.448990] [] (vmalloc) from [] 
(zstd_alloc_workspace+0x6c/0xb8)
[10170.448997] [] (zstd_alloc_workspace) from [] 
(find_workspace+0x120/0x1f4)
[10170.449002] [] (find_workspace) from [] 
(end_compressed_bio_read+0x1d4/0x3b0)
[10170.449016] [] (end_compressed_bio_read) from [] 
(process_one_work+0x1d8/0x3f0)
[10170.449026] [] (process_one_work) from [] 
(worker_thread+0x38/0x558)
[10170.449035] [] (worker_thread) from [] 
(kthread+0x124/0x154)
[10170.449042] [] (kthread) from [] 
(ret_from_fork+0x14/0x3c)

which never happened with compress=lzo, and a 2GB RAM machine that runs 4
threads of various builds runs into memory pressure quite often.  On the
other hand, I used 4.11 for lzo so this needs more testing before I can
blame the zstd code.

Also, I had network problems all day today so the machine was mostly idle
instead of doing further tests -- not quite going to pull sources to build
over a phone connection.

I'm on linus:4.12-rc7 with only a handful of btrfs patches (v3 of Qu's chunk
check, some misc crap) -- I guess I should use at least btrfs-for-4.13.  Or
would you prefer full-blown next?


Meow!
-- 
⢀⣴⠾⠻⢶⣦⠀ 
⣾⠁⢠⠒⠀⣿⡁ A dumb species has no way to open a tuna can.
⢿⡄⠘⠷⠚⠋⠀ A smart species invents a can opener.
⠈⠳⣄ A master species delegates.

Re: [PATCH] futex: avoid undefined behaviour when shift exponent is negative

2017-06-28 Thread zhong jiang

Hi, Thomas

Thank you for clarification.
On 2017/6/29 6:13, Thomas Gleixner wrote:
> On Wed, 28 Jun 2017, zhong jiang wrote:
>> On 2017/6/22 0:40, Ingo Molnar wrote:
>>> * zhong jiang  wrote:
>>>
 when shift expoment is negative, left shift alway zero. therefore, we
 modify the logic to avoid the warining.

 Signed-off-by: zhong jiang 
 ---
  arch/x86/include/asm/futex.h | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

 diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
 index b4c1f54..2425fca 100644
 --- a/arch/x86/include/asm/futex.h
 +++ b/arch/x86/include/asm/futex.h
 @@ -49,8 +49,12 @@ static inline int futex_atomic_op_inuser(int 
 encoded_op, u32 __user *uaddr)
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
  
 -  if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 -  oparg = 1 << oparg;
 +  if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
 +  if (oparg >= 0)
 +  oparg = 1 << oparg;
 +  else
 +  oparg = 0;
 +  }
>>> Could we avoid all these complications by using an unsigned type?
>>   I think it is not feasible.  a negative shift exponent is likely
>>   existence and reasonable.
> What is reasonable about a negative shift value?
>
>> as the above case, oparg is a negative is common.
> That's simply wrong. If oparg is negative and the SHIFT bit is set then the
> result is undefined today and there is no way that this can be used at
> all.
>
> On x86:
>
>1 << -1= 0x8000
>1 << -2048 = 0x0001
>1 << -2047 = 0x0002
  but I test the cases in x86_64 all is zero.   I wonder whether it is related 
to gcc or not

  zj.c:15:8: warning: left shift count is negative [-Wshift-count-negative]
  j = 1 << -2048;
^
[root@localhost zhongjiang]# ./zj
j = 0

 Thanks
 zhongjiang
> Anything using a shift value < 0 or > 31 will get crap as a
> result. Rightfully so because it's just undefined.
>
> Yes I know that the insanity of user space is unlimited, but anything
> attempting this is so broken that we cannot break it further by making that
> shift arg unsigned and actually limit it to 0-31
> Thanks,
>
>   tglx
>
>
>
> .
>

Re: [PATCH] char: ipmi: constify bmc_dev_attr_group and bmc_device_type

2017-06-28 Thread Corey Minyard


Applied, thanks.

-corey

On 06/23/2017 12:13 AM, Arvind Yadav wrote:

File size before:
text   data bss dec hex filename
   25678   1024  92   2679468aa 
drivers/char/ipmi/ipmi_msghandler.o

File size After adding 'const':
text   data bss dec hex filename
   25806896  92   2679468aa 
drivers/char/ipmi/ipmi_msghandler.o

Signed-off-by: Arvind Yadav 
---
  drivers/char/ipmi/ipmi_msghandler.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c 
b/drivers/char/ipmi/ipmi_msghandler.c
index 9f69995..d60c05c 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2397,7 +2397,7 @@ static umode_t bmc_dev_attr_is_visible(struct kobject 
*kobj,
return mode;
  }
  
-static struct attribute_group bmc_dev_attr_group = {

+static const struct attribute_group bmc_dev_attr_group = {
.attrs  = bmc_dev_attrs,
.is_visible = bmc_dev_attr_is_visible,
  };
@@ -2407,7 +2407,7 @@ static umode_t bmc_dev_attr_is_visible(struct kobject 
*kobj,
NULL
  };
  
-static struct device_type bmc_device_type = {

+static const struct device_type bmc_device_type = {
.groups = bmc_dev_attr_groups,
  };

Re: [PATCH] fs: ext4: inode->i_generation not assigned 0.

2017-06-28 Thread Andreas Dilger

On Jun 28, 2017, at 4:06 PM, Kyungchan Koh  wrote:
> 
> In fs/ext4/super.c, the function ext4_nfs_get_inode takes as input
> "generation" that can be used to specify the generation of the inode to
> be returned. When 0 is given as input, then inodes of any generation can
> be returned. Therefore, generation 0 is a special case that should be
> avoided when assigning generation to inodes.

I'd agree with this change to avoid assigning generation == 0 to real inodes.

Also, the separate question arises about whether we need to allow file handle
lookup with generation == 0?  That allows FID guessing easily, while requiring
a non-zero generation makes that a lot harder.

What are the cases where generation == 0 are used?

> A new inline function, ext4_inode_set_gen, will take care of the
> problem.  Now, inodes cannot have a generation of 0, so this patch fixes
> the issue.
> 
> Signed-off-by: Kyungchan Koh 
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3219154..74c6677 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1549,6 +1549,14 @@ static inline int ext4_valid_inum(struct super_block 
> *sb, unsigned long ino)
>ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
> }
> 
> +static inline void ext4_inode_set_gen(struct inode *inode,
> +   struct ext4_sb_info *sbi)
> +{
> + inode->i_generation = sbi->s_next_generation++;
> + if (!inode->i_generation)

This should be marked "unlikely()" since it happens at most once every 4B
file creations (though likely even less since it is unlikely that so many
files will be created in a single mount).

> + inode->i_generation = sbi->s_next_generation++;
> +}
> +
> 
> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 98ac2f1..d33f6f0 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -1072,7 +1072,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct 
> inode   }
>   spin_lock(>s_next_gen_lock);
> - inode->i_generation = sbi->s_next_generation++;
> + ext4_inode_set_gen(inode, sbi);
>   spin_unlock(>s_next_gen_lock);
> 
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 0c21e22..d52a467 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -160,8 +160,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
> 
>   spin_lock(>s_next_gen_lock);
> - inode->i_generation = sbi->s_next_generation++;
> - inode_bl->i_generation = sbi->s_next_generation++;
> + ext4_inode_set_gen(inode, sbi);
> + ext4_inode_set_gen(inode_bl, sbi);
>   spin_unlock(>s_next_gen_lock);
> 


Cheers, Andreas







signature.asc
Description: Message signed with OpenPGP

Re: [PATCH v7] drm/sun4i: hdmi: Implement I2C adapter for A10s DDC bus

2017-06-28 Thread Chen-Yu Tsai

Hi,

On Wed, Jun 28, 2017 at 6:52 PM, Jonathan Liu  wrote:
> The documentation for drm_do_get_edid in drivers/gpu/drm/drm_edid.c states:
> "As in the general case the DDC bus is accessible by the kernel at the I2C
> level, drivers must make all reasonable efforts to expose it as an I2C
> adapter and use drm_get_edid() instead of abusing this function."
>
> Exposing the DDC bus as an I2C adapter is more beneficial as it can be used
> for purposes other than reading the EDID such as modifying the EDID or
> using the HDMI DDC pins as an I2C bus through the I2C dev interface from
> userspace (e.g. i2c-tools).
>
> Implement this for A10s.
>
> Signed-off-by: Jonathan Liu 
> ---
> Changes for v7:
>  - Fix mixed declarations and code compiler warning for level variable
>
> Changes for v6:
>  - Use fixed byte time of 100 us instead of dynamically calculating from DDC
>clock that is set to a fixed 100 MHz rate anyway
>  - Change is_fifo_flag_unset to not read the status register as well to be
>more consistent with is_err_status
>
> Changes for v5:
>  - Use devm_kzalloc instead of devm_kmemdup and remove const struct 
> i2c_adapter
>  - Rework to use readl_poll_timeout for checking FIFO status
>
> Changes for v4:
>  - Carry over copyright from initial I2C code into sun4i_hdmi_i2c.c
>  - Clean up indentation in sun4i_hdmi.h
>  - Rename SUN4I_HDMI_DDC_MAX_TRANSFER_SIZE to SUN4I_HDMI_DDC_BYTE_COUNT_MAX
>and group it under the SUN4I_HDMI_DDC_BYTE_COUNT_REG define, changing the
>value to use the GENMASK macro to make it clear that it is derived from
>the width of the field in the register
>  - Fix SUN4I_HDMI_DDC_INT_STATUS_DDC_TX_FIFO_UNDERFLOW typo which should be
>SUN4I_HDMI_DDC_INT_STATUS_DDC_TX_FIFO_OVERFLOW
>  - Remove redundant rewriting of SUN4I_HDMI_DDC_INT_STATUS_REG register
>  - Change struct i2c_adapter to be const by using devm_kmemdup on creation
>  - Return -ETIMEDOUT instead of -EIO if there is timeout while transferring an
>I2C message
>  - Instead of waiting for 1-2 bytes to transfer, wait for the time it would
>take for remaining bytes to transfer (limited by FIFO size)
>  - Add additional comments
>
> Changes for v3:
>  - Explain why drm_do_get_edid should be used and why it's better to expose it
>as an I2C adapter in commit message
>  - Reorder bit defines in descending order for consistency
>  - Keep old unused macros instead of removing them
>  - The v2 algorithm split large transfers into 16 byte transfers but this may
>cause a large single write to be treated as multiple writes causing data
>corruption. The algorithm has been reworked to not split larger transfers
>and make more use of the FIFO to avoid this.
>  - Moved the creation of the DDC clock from sun4i_hdmi_enc.c to
>sun4i_hdmi_i2c.c
>  - Reformatted code
>  - Instead of masking bits that we don't want to check for errors, explicitly
>check the error bits
>  - Clear error bits at start of transfer in case of error from previous 
> transfer
>  - Poll for completion of FIFO clear after setting FIFO clear bit
>
> Changes for v2:
>  - Rebased against Maxime's sunxi-drm/for-next branch
>  - Fix up error paths in sun4i_hdmi_bind so that the I2C adapter is deleted if
>any of the calls after the I2C adapter is created fails
>  - Remove unnecessary includes in sun4i_hdmi_i2c.c
>
>  drivers/gpu/drm/sun4i/Makefile |   1 +
>  drivers/gpu/drm/sun4i/sun4i_hdmi.h |  23 
>  drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 101 ++
>  drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c | 234 
> +
>  4 files changed, 269 insertions(+), 90 deletions(-)
>  create mode 100644 drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
>

[...]

> diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c 
> b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
> new file mode 100644
> index ..ce954ee25ae4
> --- /dev/null
> +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
> @@ -0,0 +1,234 @@

[...]

> +static int fifo_transfer(struct sun4i_hdmi *hdmi, u8 *buf, int len, bool 
> read)
> +{
> +   /*
> +* 1 byte takes 9 clock cycles (8 bits + 1 ACK) = 90 us for 100 MHz
> +* clock. As clock rate is fixed, just round it up to 100 us.

This looks fishy. I2C busses are never that fast. Maybe kHz?

ChenYu

> +*/
> +   const unsigned long byte_time_ns = 100;
> +   u32 int_status;
> +   u32 fifo_status;
> +   /* Read needs empty flag unset, write needs full flag unset */
> +   u32 flag = read ? SUN4I_HDMI_DDC_FIFO_STATUS_EMPTY :
> + SUN4I_HDMI_DDC_FIFO_STATUS_FULL;
> +   int level;
> +   int ret;
> +
> +   /* Wait until error or FIFO ready */
> +   ret = readl_poll_timeout(hdmi->base + SUN4I_HDMI_DDC_INT_STATUS_REG,
> +int_status,
> +is_err_status(int_status) ||
> +

[PATCH v7 3/4] KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf

2017-06-28 Thread Wanpeng Li

From: Wanpeng Li 

Add an nested_apf field to vcpu->arch.exception to identify an async page 
fault, and constructs the expected vm-exit information fields. Force a 
nested VM exit from nested_vmx_check_exception() if the injected #PF is 
async page fault.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 arch/x86/include/asm/kvm_emulate.h |  1 +
 arch/x86/include/asm/kvm_host.h|  2 ++
 arch/x86/kvm/svm.c | 16 ++--
 arch/x86/kvm/vmx.c | 17 ++---
 arch/x86/kvm/x86.c |  9 -
 5 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_emulate.h 
b/arch/x86/include/asm/kvm_emulate.h
index 722d0e5..fde36f1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -23,6 +23,7 @@ struct x86_exception {
u16 error_code;
bool nested_page_fault;
u64 address; /* cr2 or nested page fault gpa */
+   u8 async_page_fault;
 };
 
 /*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e20d8a8..71aef4b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -545,6 +545,7 @@ struct kvm_vcpu_arch {
bool reinject;
u8 nr;
u32 error_code;
+   u8 nested_apf;
} exception;
 
struct kvm_queued_interrupt {
@@ -646,6 +647,7 @@ struct kvm_vcpu_arch {
u32 id;
bool send_user_only;
u32 host_apf_reason;
+   unsigned long nested_apf_token;
} apf;
 
/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8f263bf..49cdb8e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2367,15 +2367,19 @@ static int nested_svm_check_exception(struct vcpu_svm 
*svm, unsigned nr,
if (!is_guest_mode(>vcpu))
return 0;
 
+   vmexit = nested_svm_intercept(svm);
+   if (vmexit != NESTED_EXIT_DONE)
+   return 0;
+
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = error_code;
-   svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
-
-   vmexit = nested_svm_intercept(svm);
-   if (vmexit == NESTED_EXIT_DONE)
-   svm->nested.exit_required = true;
+   if (svm->vcpu.arch.exception.nested_apf)
+   svm->vmcb->control.exit_info_2 = 
svm->vcpu.arch.apf.nested_apf_token;
+   else
+   svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
 
+   svm->nested.exit_required = true;
return vmexit;
 }
 
@@ -2568,7 +2572,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
vmexit = NESTED_EXIT_DONE;
/* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
-svm->vcpu.arch.apf.host_apf_reason != 0)
+svm->vcpu.arch.exception.nested_apf != 0)
vmexit = NESTED_EXIT_DONE;
break;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d20f794..8724ea6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2418,13 +2418,24 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
  * KVM wants to inject page-faults which it got to the guest. This function
  * checks whether in a nested guest, we need to inject them to L1 or L2.
  */
-static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
+static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
 {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+   unsigned int nr = vcpu->arch.exception.nr;
 
-   if (!(vmcs12->exception_bitmap & (1u << nr)))
+   if (!((vmcs12->exception_bitmap & (1u << nr)) ||
+   (nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
return 0;
 
+   if (vcpu->arch.exception.nested_apf) {
+   vmcs_write32(VM_EXIT_INTR_ERROR_CODE, 
vcpu->arch.exception.error_code);
+   nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+   PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
+   INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
+   vcpu->arch.apf.nested_apf_token);
+   return 1;
+   }
+
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
  vmcs_read32(VM_EXIT_INTR_INFO),
  vmcs_readl(EXIT_QUALIFICATION));
@@ -2441,7 +2452,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
if (!reinject && is_guest_mode(vcpu) &&
-   nested_vmx_check_exception(vcpu, nr))
+

[PATCH v7 4/4] KVM: async_pf: Let host know whether the guest support delivery async_pf as #PF vmexit

2017-06-28 Thread Wanpeng Li

From: Wanpeng Li 

Adds another flag bit (bit 2) to MSR_KVM_ASYNC_PF_EN. If bit 2 is 1, async
page faults are delivered to L1 as #PF vmexits; if bit 2 is 0, 
kvm_can_do_async_pf
returns 0 if in guest mode.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 Documentation/virtual/kvm/msr.txt| 5 +++--
 arch/x86/include/asm/kvm_host.h  | 1 +
 arch/x86/include/uapi/asm/kvm_para.h | 1 +
 arch/x86/kernel/kvm.c| 7 ++-
 arch/x86/kvm/mmu.c   | 2 +-
 arch/x86/kvm/vmx.c   | 2 +-
 arch/x86/kvm/x86.c   | 5 +++--
 7 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/Documentation/virtual/kvm/msr.txt 
b/Documentation/virtual/kvm/msr.txt
index 0a9ea51..1ebecc1 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt
@@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
 MSR_KVM_ASYNC_PF_EN: 0x4b564d02
data: Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be
-   zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+   zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when
disabled. Bit 1 is 1 if asynchronous page faults can be injected
-   when vcpu is in cpl == 0.
+   when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
+   are delivered to L1 as #PF vmexits.
 
First 4 byte of 64 byte memory location will be written to by
the hypervisor at the time of asynchronous page fault (APF)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 71aef4b..a981ab8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -648,6 +648,7 @@ struct kvm_vcpu_arch {
bool send_user_only;
u32 host_apf_reason;
unsigned long nested_apf_token;
+   bool delivery_as_pf_vmexit;
} apf;
 
/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/include/uapi/asm/kvm_para.h 
b/arch/x86/include/uapi/asm/kvm_para.h
index cff0bb6..a965e5b 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -67,6 +67,7 @@ struct kvm_clock_pairing {
 
 #define KVM_ASYNC_PF_ENABLED   (1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS   (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
 
 /* Operations for KVM_HC_MMU_OP */
 #define KVM_MMU_OP_WRITE_PTE1
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 43e10d6..71c17a5 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
 #ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS;
 #endif
-   wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED);
+   pa |= KVM_ASYNC_PF_ENABLED;
+
+   /* Async page fault support for L1 hypervisor is optional */
+   if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
+   (pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0x, 
pa >> 32) < 0)
+   wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n",
   smp_processor_id());
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4a7dc00..fb8c35f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3705,7 +3705,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
 kvm_event_needs_reinjection(vcpu)))
return false;
 
-   if (is_guest_mode(vcpu))
+   if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false;
 
return kvm_x86_ops->interrupt_allowed(vcpu);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8724ea6..4f616db 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8001,7 +8001,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
if (is_nmi(intr_info))
return false;
else if (is_page_fault(intr_info))
-   return enable_ept;
+   return !vmx->vcpu.arch.apf.host_apf_reason && 
enable_ept;
else if (is_no_device(intr_info) &&
 !(vmcs12->guest_cr0 & X86_CR0_TS))
return false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2a4520f..fdeeb66 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2065,8 +2065,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, 
u64 data)
 {
gpa_t gpa = data & ~0x3f;
 
-   /* Bits 2:5 are reserved, Should be zero */
-   if (data & 0x3c)
+   /* Bits 3:5 are reserved,

[PATCH v7 2/4] KVM: async_pf: Add L1 guest async_pf #PF vmexit handler

2017-06-28 Thread Wanpeng Li

From: Wanpeng Li 

This patch adds the L1 guest async page fault #PF vmexit handler, such
#PF is converted into vmexit from L2 to L1 on #PF which is then handled
by L1 similar to ordinary async page fault.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c  | 33 +
 arch/x86/kvm/mmu.h  |  2 ++
 arch/x86/kvm/svm.c  | 36 +---
 arch/x86/kvm/vmx.c  | 12 +---
 5 files changed, 46 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1f01bfb..e20d8a8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -645,6 +645,7 @@ struct kvm_vcpu_arch {
u64 msr_val;
u32 id;
bool send_user_only;
+   u32 host_apf_reason;
} apf;
 
/* OSVW MSRs (AMD only) */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cb82259..4a7dc00 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -46,6 +46,7 @@
 #include 
 #include 
 #include 
+#include "trace.h"
 
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
@@ -3736,6 +3737,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool 
prefault, gfn_t gfn,
return false;
 }
 
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+   u64 fault_address)
+{
+   int r = 1;
+
+   switch (vcpu->arch.apf.host_apf_reason) {
+   default:
+   /* TDP won't cause page fault directly */
+   WARN_ON_ONCE(tdp_enabled);
+   trace_kvm_page_fault(fault_address, error_code);
+
+   if (kvm_event_needs_reinjection(vcpu))
+   kvm_mmu_unprotect_page_virt(vcpu, fault_address);
+   r = kvm_mmu_page_fault(vcpu, fault_address, error_code, NULL, 
0);
+   break;
+   case KVM_PV_REASON_PAGE_NOT_PRESENT:
+   vcpu->arch.apf.host_apf_reason = 0;
+   local_irq_disable();
+   kvm_async_pf_task_wait(fault_address);
+   local_irq_enable();
+   break;
+   case KVM_PV_REASON_PAGE_READY:
+   vcpu->arch.apf.host_apf_reason = 0;
+   local_irq_disable();
+   kvm_async_pf_task_wake(fault_address);
+   local_irq_enable();
+   break;
+   }
+   return r;
+}
+EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
+
 static bool
 check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 330bf3a..2ae88f0 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -77,6 +77,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
 void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
 bool accessed_dirty);
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
+int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
+   u64 fault_address);
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e1f8e89..8f263bf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -192,7 +192,6 @@ struct vcpu_svm {
 
unsigned int3_injected;
unsigned long int3_rip;
-   u32 apf_reason;
 
/* cached guest cpuid flags for faster access */
bool nrips_enabled  : 1;
@@ -2071,34 +2070,9 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned 
long value)
 static int pf_interception(struct vcpu_svm *svm)
 {
u64 fault_address = svm->vmcb->control.exit_info_2;
-   u64 error_code;
-   int r = 1;
+   u64 error_code = svm->vmcb->control.exit_info_1;
 
-   switch (svm->apf_reason) {
-   default:
-   error_code = svm->vmcb->control.exit_info_1;
-
-   trace_kvm_page_fault(fault_address, error_code);
-   if (!npt_enabled && kvm_event_needs_reinjection(>vcpu))
-   kvm_mmu_unprotect_page_virt(>vcpu, fault_address);
-   r = kvm_mmu_page_fault(>vcpu, fault_address, error_code,
-   svm->vmcb->control.insn_bytes,
-   svm->vmcb->control.insn_len);
-   break;
-   case KVM_PV_REASON_PAGE_NOT_PRESENT:
-   svm->apf_reason = 0;
-   local_irq_disable();
-   kvm_async_pf_task_wait(fault_address);
-   local_irq_enable();
-   break;
-   case KVM_PV_REASON_PAGE_READY:
-   svm->apf_reason = 0;
-   local_irq_disable();
-   kvm_async_pf_task_wake(fault_address);
-   local_irq_enable();
-

[PATCH] btrfs: Keep one more workspace around

2017-06-28 Thread Nick Terrell

> Is there a version I should be testing?

Not yet, I'm working on v2 of the patch set, which will be ready soon.

> I got a bunch of those:
> [10170.448783] kworker/u8:6: page allocation stalls for 60720ms, order:0, 
> mode:0x14000c2(GFP_KERNEL|__GFP_HIGHMEM), nodemask=(null)
> [10170.448819] kworker/u8:6 cpuset=/ mems_allowed=0
> [10170.448842] CPU: 3 PID: 13430 Comm: kworker/u8:6 Not tainted 
> 4.12.0-rc7-00034-gdff47ed160bb #1
> [10170.448846] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> [10170.448872] Workqueue: btrfs-endio btrfs_endio_helper
> [10170.448910] [] (unwind_backtrace) from [] 
> (show_stack+0x10/0x14)
> [10170.448925] [] (show_stack) from [] 
> (dump_stack+0x78/0x8c)
> [10170.448942] [] (dump_stack) from [] 
> (warn_alloc+0xc0/0x170)
> [10170.448952] [] (warn_alloc) from [] 
> (__alloc_pages_nodemask+0x97c/0xe30)
> [10170.448964] [] (__alloc_pages_nodemask) from [] 
> (__vmalloc_node_range+0x144/0x27c)
> [10170.448976] [] (__vmalloc_node_range) from [] 
> (__vmalloc_node.constprop.10+0x48/0x50)
> [10170.448982] [] (__vmalloc_node.constprop.10) from [] 
> (vmalloc+0x2c/0x34)
> [10170.448990] [] (vmalloc) from [] 
> (zstd_alloc_workspace+0x6c/0xb8)
> [10170.448997] [] (zstd_alloc_workspace) from [] 
> (find_workspace+0x120/0x1f4)
> [10170.449002] [] (find_workspace) from [] 
> (end_compressed_bio_read+0x1d4/0x3b0)
> [10170.449016] [] (end_compressed_bio_read) from [] 
> (process_one_work+0x1d8/0x3f0)
> [10170.449026] [] (process_one_work) from [] 
> (worker_thread+0x38/0x558)
> [10170.449035] [] (worker_thread) from [] 
> (kthread+0x124/0x154)
> [10170.449042] [] (kthread) from [] 
> (ret_from_fork+0x14/0x3c)
>
> which never happened with compress=lzo, and a 2GB RAM machine that runs 4
> threads of various builds runs into memory pressure quite often.  On the
> other hand, I used 4.11 for lzo so this needs more testing before I can
> blame the zstd code.

I'm not sure what is causing the symptom of stalls in vmalloc(), but I
think I know what is causing vmalloc() to be called so often. Its probably
showing up for zstd and not lzo because it requires more memory.

find_workspace() allocates up to num_online_cpus() + 1 workspaces.
free_workspace() will only keep num_online_cpus() workspaces. When
(de)compressing we will allocate num_online_cpus() + 1 workspaces, then
free one, and repeat. Instead, we can just keep num_online_cpus() + 1
workspaces around, and never have to allocate/free another workspace in the
common case.

I tested on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. I mounted a
BtrFS partition with -o compress-force={lzo,zlib,zstd} and logged whenever
a workspace was allocated of freed. Then I copied vmlinux (527 MB) to the
partition. Before the patch, during the copy it would allocate and free 5-6
workspaces. After, it only allocated the initial 3. This held true for lzo,
zlib, and zstd.

> I'm on linus:4.12-rc7 with only a handful of btrfs patches (v3 of Qu's chunk
> check, some misc crap) -- I guess I should use at least btrfs-for-4.13.  Or
> would you prefer full-blown next?

Whatever is convenient for you. The relevant code in BtrFS hasn't changed
for a few months, so it shouldn't matter too much.

Signed-off-by: Nick Terrell 
---
 fs/btrfs/compression.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 3beb0d0..1a0ef55 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -874,7 +874,7 @@ static void free_workspace(int type, struct list_head 
*workspace)
int *free_ws= _comp_ws[idx].free_ws;

spin_lock(ws_lock);
-   if (*free_ws < num_online_cpus()) {
+   if (*free_ws <= num_online_cpus()) {
list_add(workspace, idle_ws);
(*free_ws)++;
spin_unlock(ws_lock);
--
2.9.3

[PATCH v7 0/4] KVM: async_pf: Fix async pf exception injection

2017-06-28 Thread Wanpeng Li

 INFO: task gnome-terminal-:1734 blocked for more than 120 seconds.
   Not tainted 4.12.0-rc4+ #8
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 gnome-terminal- D0  1734   1015 0x
 Call Trace:
  __schedule+0x3cd/0xb30
  schedule+0x40/0x90
  kvm_async_pf_task_wait+0x1cc/0x270
  ? __vfs_read+0x37/0x150
  ? prepare_to_swait+0x22/0x70
  do_async_page_fault+0x77/0xb0
  ? do_async_page_fault+0x77/0xb0
  async_page_fault+0x28/0x30

This is triggered by running both win7 and win2016 on L1 KVM simultaneously, 
and then gives stress to memory on L1, I can observed this hang on L1 when 
at least ~70% swap area is occupied on L0.

This is due to async pf was injected to L2 which should be injected to L1, 
L2 guest starts receiving pagefault w/ bogus %cr2(apf token from the host 
actually), and L1 guest starts accumulating tasks stuck in D state in 
kvm_async_pf_task_wait() since missing PAGE_READY async_pfs.

This patchset fixes it according to Radim's proposal "force a nested VM exit 
from nested_vmx_check_exception if the injected #PF is async_pf and handle 
the #PF VM exit in L1". https://www.spinics.net/lists/kvm/msg142498.html

v6 -> v7:
 * drop KVM_GET/PUT_VCPU_EVENTS stuff for nested_apf

v5 -> v6: 
 * move vcpu_svm's apf_reason to vcpu->arch.apf.host_apf_reason
 * introduce function kvm_handle_page_fault() to be used by both VMX/SVM
 * introduce svm's codes posted by Paolo
 * introduce nested_apf 
 * better set MSR_KVM_ASYNC_PF_EN 

v4 -> v5:
 * utilize wrmsr_safe for MSR_KVM_ASYNC_PF_EN

v3 -> v4:
 * reuse pad field in kvm_vcpu_events for async_page_fault
 * update kvm_vcpu_events API documentations
 * change async_page_fault type in vcpu->arch.exception from bool to u8

v2 -> v3:
 * add the flag to the userspace interface(KVM_GET/PUT_VCPU_EVENTS)

v1 -> v2:
 * remove nested_vmx_check_exception nr parameter
 * construct a simple special vm-exit information field for async pf
 * introduce nested_apf_token to vcpu->arch.apf to avoid change the CR2 
   visible in L2 guest 
 * avoid pass the apf directed towards it (L1) into L2 if there is L3 
   at the moment

Wanpeng Li (4):
  KVM: x86: Simple kvm_x86_ops->queue_exception parameter
  KVM: async_pf: Add L1 guest async_pf #PF vmexit handler
  KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
  KVM: async_pf: Let host know whether the guest support delivery async_pf as 
#PF vmexit

 Documentation/virtual/kvm/msr.txt|  5 ++--
 arch/x86/include/asm/kvm_emulate.h   |  1 +
 arch/x86/include/asm/kvm_host.h  |  8 +++--
 arch/x86/include/uapi/asm/kvm_para.h |  1 +
 arch/x86/kernel/kvm.c|  7 -
 arch/x86/kvm/mmu.c   | 35 +-
 arch/x86/kvm/mmu.h   |  2 ++
 arch/x86/kvm/svm.c   | 58 
 arch/x86/kvm/vmx.c   | 39 +++-
 arch/x86/kvm/x86.c   | 19 +++-
 10 files changed, 108 insertions(+), 67 deletions(-)

-- 
2.7.4

[PATCH v7 1/4] KVM: x86: Simple kvm_x86_ops->queue_exception parameter

2017-06-28 Thread Wanpeng Li

From: Wanpeng Li 

This patch removes all arguments except the first in 
kvm_x86_ops->queue_exception
since they can extract the arguments from vcpu->arch.exception themselves, do 
the
same in nested_{vmx,svm}_check_exception.

Cc: Paolo Bonzini 
Cc: Radim Krčmář 
Signed-off-by: Wanpeng Li 
---
 arch/x86/include/asm/kvm_host.h | 4 +---
 arch/x86/kvm/svm.c  | 8 +---
 arch/x86/kvm/vmx.c  | 8 +---
 arch/x86/kvm/x86.c  | 5 +
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 695605e..1f01bfb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -948,9 +948,7 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu);
-   void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
-   bool has_error_code, u32 error_code,
-   bool reinject);
+   void (*queue_exception)(struct kvm_vcpu *vcpu);
void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ba9891a..e1f8e89 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -631,11 +631,13 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
svm_set_interrupt_shadow(vcpu, 0);
 }
 
-static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-   bool has_error_code, u32 error_code,
-   bool reinject)
+static void svm_queue_exception(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
+   unsigned nr = vcpu->arch.exception.nr;
+   bool has_error_code = vcpu->arch.exception.has_error_code;
+   bool reinject = vcpu->arch.exception.reinject;
+   u32 error_code = vcpu->arch.exception.error_code;
 
/*
 * If we are within a nested VM we'd better #VMEXIT and let the guest
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ca5d2b9..df825bb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2431,11 +2431,13 @@ static int nested_vmx_check_exception(struct kvm_vcpu 
*vcpu, unsigned nr)
return 1;
 }
 
-static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-   bool has_error_code, u32 error_code,
-   bool reinject)
+static void vmx_queue_exception(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
+   unsigned nr = vcpu->arch.exception.nr;
+   bool has_error_code = vcpu->arch.exception.has_error_code;
+   bool reinject = vcpu->arch.exception.reinject;
+   u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
if (!reinject && is_guest_mode(vcpu) &&
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e846f0..7511c0a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6347,10 +6347,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, 
bool req_int_win)
kvm_update_dr7(vcpu);
}
 
-   kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
- vcpu->arch.exception.has_error_code,
- vcpu->arch.exception.error_code,
- vcpu->arch.exception.reinject);
+   kvm_x86_ops->queue_exception(vcpu);
return 0;
}
 
-- 
2.7.4

linux-next: manual merge of the kvm-arm tree with the iommu tree

2017-06-28 Thread Stephen Rothwell

Hi all,

Today's linux-next merge of the kvm-arm tree got a conflict in:

  Documentation/arm64/silicon-errata.txt

between commits:

  e5b829de053d ("iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum 
#74")
  f935448acf46 ("iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum 
#126")

from the iommu tree and commit:

  690a341577f9 ("arm64: Add workaround for Cavium Thunder erratum 30115")

from the kvm-arm tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc Documentation/arm64/silicon-errata.txt
index 856479525776,f5f93dca54b7..
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@@ -62,8 -62,7 +62,9 @@@ stable kernels
  | Cavium | ThunderX GICv3  | #23154  | CAVIUM_ERRATUM_23154   
 |
  | Cavium | ThunderX Core   | #27456  | CAVIUM_ERRATUM_27456   
 |
  | Cavium | ThunderX SMMUv2 | #27704  | N/A
 |
+ | Cavium | ThunderX Core   | #30115  | CAVIUM_ERRATUM_30115   
 |
 +| Cavium | ThunderX2 SMMUv3| #74 | N/A
 |
 +| Cavium | ThunderX2 SMMUv3| #126| N/A
 |
  || | |
 |
  | Freescale/NXP  | LS2080A/LS1043A | A-008585| FSL_ERRATUM_A008585
 |
  || | |
 |

[PATCH 10/16] switchtec_ntb: initialize hardware for doorbells and messages

2017-06-28 Thread Logan Gunthorpe

This patch sets up some hardware registers and creates interrupt service
routines for the doorbells and messages.

There are 64 doorbells in the switch that are shared between all
partitions. The upper 4 doorbells are also shared with the messages
and are there for not used. Thus, this code provides 28 doorbells
for each partition.

Signed-off-by: Logan Gunthorpe 
Reviewed-by: Stephen Bates 
Reviewed-by: Kurt Schwemmer 
---
 drivers/ntb/hw/mscc/switchtec_ntb.c | 144 
 1 file changed, 144 insertions(+)

diff --git a/drivers/ntb/hw/mscc/switchtec_ntb.c 
b/drivers/ntb/hw/mscc/switchtec_ntb.c
index ff03424153d9..4f48b34ce0b5 100644
--- a/drivers/ntb/hw/mscc/switchtec_ntb.c
+++ b/drivers/ntb/hw/mscc/switchtec_ntb.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_DESCRIPTION("Microsemi Switchtec(tm) NTB Driver");
 MODULE_VERSION("0.1");
@@ -77,6 +78,9 @@ struct switchtec_ntb {
int self_partition;
int peer_partition;
 
+   int doorbell_irq;
+   int message_irq;
+
struct ntb_info_regs __iomem *mmio_ntb;
struct ntb_ctrl_regs __iomem *mmio_ctrl;
struct ntb_dbmsg_regs __iomem *mmio_dbmsg;
@@ -88,6 +92,11 @@ struct switchtec_ntb {
struct shared_mw __iomem *peer_shared;
dma_addr_t self_shared_dma;
 
+   u64 db_mask;
+   u64 db_valid_mask;
+   int db_shift;
+   int db_peer_shift;
+
int nr_direct_mw;
int nr_lut_mw;
int direct_mw_to_bar[MAX_DIRECT_MW];
@@ -196,6 +205,49 @@ static void switchtec_ntb_init_mw(struct switchtec_ntb 
*sndev)
sndev->nr_direct_mw, cnt - sndev->nr_direct_mw);
 }
 
+/*
+ * There are 64 doorbells in the switch hardware but this is
+ * shared among all partitions. So we must split them in half
+ * (32 for each partition). However, the message interrupts are
+ * also shared with the top 4 doorbells so we just limit this to
+ * 28 doorbells per partition
+ */
+static void switchtec_ntb_init_db(struct switchtec_ntb *sndev)
+{
+   sndev->db_valid_mask = 0x0FFF;
+
+   if (sndev->self_partition < sndev->peer_partition) {
+   sndev->db_shift = 0;
+   sndev->db_peer_shift = 32;
+   } else {
+   sndev->db_shift = 32;
+   sndev->db_peer_shift = 0;
+   }
+
+   sndev->db_mask = 0x0FFFULL;
+   iowrite64(~sndev->db_mask, >mmio_self_dbmsg->idb_mask);
+   iowrite64(sndev->db_valid_mask << sndev->db_peer_shift,
+ >mmio_self_dbmsg->odb_mask);
+}
+
+static void switchtec_ntb_init_msgs(struct switchtec_ntb *sndev)
+{
+   int i;
+   u32 msg_map = 0;
+
+   for (i = 0; i < ARRAY_SIZE(sndev->mmio_self_dbmsg->imsg); i++) {
+   int m = i | sndev->peer_partition << 2;
+
+   msg_map |= m << i * 8;
+   }
+
+   iowrite32(msg_map, >mmio_self_dbmsg->msg_map);
+
+   for (i = 0; i < ARRAY_SIZE(sndev->mmio_self_dbmsg->imsg); i++)
+   iowrite64(NTB_DBMSG_IMSG_STATUS | NTB_DBMSG_IMSG_MASK,
+ >mmio_self_dbmsg->imsg[i]);
+}
+
 static int switchtec_ntb_init_req_id_table(struct switchtec_ntb *sndev)
 {
int rc = 0;
@@ -342,6 +394,87 @@ static void switchtec_ntb_deinit_shared_mw(struct 
switchtec_ntb *sndev)
  sndev->self_shared_dma);
 }
 
+static irqreturn_t switchtec_ntb_doorbell_isr(int irq, void *dev)
+{
+   struct switchtec_ntb *sndev = dev;
+
+   dev_dbg(>stdev->dev, "doorbell\n");
+
+   return IRQ_HANDLED;
+}
+
+static irqreturn_t switchtec_ntb_message_isr(int irq, void *dev)
+{
+   int i;
+   struct switchtec_ntb *sndev = dev;
+
+   for (i = 0; i < ARRAY_SIZE(sndev->mmio_self_dbmsg->imsg); i++) {
+   u64 msg = ioread64(>mmio_self_dbmsg->imsg[i]);
+
+   if (msg & NTB_DBMSG_IMSG_STATUS) {
+   dev_dbg(>stdev->dev, "message: %d %08x\n", i,
+   (u32)msg);
+   iowrite8(1, >mmio_self_dbmsg->imsg[i].status);
+   }
+   }
+
+   return IRQ_HANDLED;
+}
+
+static int switchtec_ntb_init_db_msg_irq(struct switchtec_ntb *sndev)
+{
+   int i;
+   int rc;
+   int doorbell_irq = 0;
+   int message_irq = 0;
+   int event_irq;
+   int idb_vecs = sizeof(sndev->mmio_self_dbmsg->idb_vec_map);
+
+   event_irq = ioread32(>stdev->mmio_part_cfg->vep_vector_number);
+
+   while (doorbell_irq == event_irq)
+   doorbell_irq++;
+   while (message_irq == doorbell_irq ||
+  message_irq == event_irq)
+   message_irq++;
+
+   dev_dbg(>stdev->dev, "irqs - event: %d, db: %d, msgs: %d",
+   event_irq, doorbell_irq, message_irq);
+
+   for (i = 0; i < idb_vecs - 4; i++)
+   iowrite8(doorbell_irq,
+>mmio_self_dbmsg->idb_vec_map[i]);
+
+

[PATCH 11/16] switchtec_ntb: add skeleton ntb driver

2017-06-28 Thread Logan Gunthorpe

This patch simply adds a skeleton NTB driver which will be filled
out in subsequent patches.

Signed-off-by: Logan Gunthorpe 
Reviewed-by: Stephen Bates 
Reviewed-by: Kurt Schwemmer 
---
 drivers/ntb/hw/mscc/switchtec_ntb.c | 148 +++-
 include/linux/ntb.h |   3 +
 2 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/drivers/ntb/hw/mscc/switchtec_ntb.c 
b/drivers/ntb/hw/mscc/switchtec_ntb.c
index 4f48b34ce0b5..0587b2380bcc 100644
--- a/drivers/ntb/hw/mscc/switchtec_ntb.c
+++ b/drivers/ntb/hw/mscc/switchtec_ntb.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 
 MODULE_DESCRIPTION("Microsemi Switchtec(tm) NTB Driver");
 MODULE_VERSION("0.1");
@@ -73,6 +74,7 @@ struct shared_mw {
 #define LUT_SIZE SZ_64K
 
 struct switchtec_ntb {
+   struct ntb_dev ntb;
struct switchtec_dev *stdev;
 
int self_partition;
@@ -159,10 +161,148 @@ static int switchtec_ntb_part_op(struct switchtec_ntb 
*sndev,
return -EIO;
 }
 
+static int switchtec_ntb_mw_count(struct ntb_dev *ntb, int pidx)
+{
+   return 0;
+}
+
+static int switchtec_ntb_mw_get_align(struct ntb_dev *ntb, int pidx,
+ int widx, resource_size_t *addr_align,
+ resource_size_t *size_align,
+ resource_size_t *size_max)
+{
+   return 0;
+}
+
+static int switchtec_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int widx,
+ dma_addr_t addr, resource_size_t size)
+{
+   return 0;
+}
+
+static int switchtec_ntb_peer_mw_count(struct ntb_dev *ntb)
+{
+   return 0;
+}
+
+static int switchtec_ntb_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+ phys_addr_t *base,
+ resource_size_t *size)
+{
+   return 0;
+}
+
+static u64 switchtec_ntb_link_is_up(struct ntb_dev *ntb,
+   enum ntb_speed *speed,
+   enum ntb_width *width)
+{
+   return 0;
+}
+
+static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
+enum ntb_speed max_speed,
+enum ntb_width max_width)
+{
+   return 0;
+}
+
+static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
+{
+   return 0;
+}
+
+static u64 switchtec_ntb_db_valid_mask(struct ntb_dev *ntb)
+{
+   return 0;
+}
+
+static int switchtec_ntb_db_vector_count(struct ntb_dev *ntb)
+{
+   return 0;
+}
+
+static u64 switchtec_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
+{
+   return 0;
+}
+
+static u64 switchtec_ntb_db_read(struct ntb_dev *ntb)
+{
+   return 0;
+}
+
+static int switchtec_ntb_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+   return 0;
+}
+
+static int switchtec_ntb_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+   return 0;
+}
+
+static int switchtec_ntb_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+   return 0;
+}
+
+static int switchtec_ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+   return 0;
+}
+
+static int switchtec_ntb_spad_count(struct ntb_dev *ntb)
+{
+   return 0;
+}
+
+static u32 switchtec_ntb_spad_read(struct ntb_dev *ntb, int idx)
+{
+   return 0;
+}
+
+static int switchtec_ntb_spad_write(struct ntb_dev *ntb, int idx, u32 val)
+{
+   return 0;
+}
+
+static int switchtec_ntb_peer_spad_write(struct ntb_dev *ntb, int pidx,
+int sidx, u32 val)
+{
+   return 0;
+}
+
+static const struct ntb_dev_ops switchtec_ntb_ops = {
+   .mw_count   = switchtec_ntb_mw_count,
+   .mw_get_align   = switchtec_ntb_mw_get_align,
+   .mw_set_trans   = switchtec_ntb_mw_set_trans,
+   .peer_mw_count  = switchtec_ntb_peer_mw_count,
+   .peer_mw_get_addr   = switchtec_ntb_peer_mw_get_addr,
+   .link_is_up = switchtec_ntb_link_is_up,
+   .link_enable= switchtec_ntb_link_enable,
+   .link_disable   = switchtec_ntb_link_disable,
+   .db_valid_mask  = switchtec_ntb_db_valid_mask,
+   .db_vector_count= switchtec_ntb_db_vector_count,
+   .db_vector_mask = switchtec_ntb_db_vector_mask,
+   .db_read= switchtec_ntb_db_read,
+   .db_clear   = switchtec_ntb_db_clear,
+   .db_set_mask= switchtec_ntb_db_set_mask,
+   .db_clear_mask  = switchtec_ntb_db_clear_mask,
+   .peer_db_set= switchtec_ntb_peer_db_set,
+   .spad_count = switchtec_ntb_spad_count,
+   .spad_read  = switchtec_ntb_spad_read,
+   .spad_write = switchtec_ntb_spad_write,
+   .peer_spad_write= switchtec_ntb_peer_spad_write,
+};
+
 static void

Re: [PATCH v7] drm/sun4i: hdmi: Implement I2C adapter for A10s DDC bus

2017-06-28 Thread Jonathan Liu

Hi Chen-Yu,

On 29 June 2017 at 12:47, Chen-Yu Tsai  wrote:
> Hi,
>
> On Wed, Jun 28, 2017 at 6:52 PM, Jonathan Liu  wrote:
>> The documentation for drm_do_get_edid in drivers/gpu/drm/drm_edid.c states:
>> "As in the general case the DDC bus is accessible by the kernel at the I2C
>> level, drivers must make all reasonable efforts to expose it as an I2C
>> adapter and use drm_get_edid() instead of abusing this function."
>>
>> Exposing the DDC bus as an I2C adapter is more beneficial as it can be used
>> for purposes other than reading the EDID such as modifying the EDID or
>> using the HDMI DDC pins as an I2C bus through the I2C dev interface from
>> userspace (e.g. i2c-tools).
>>
>> Implement this for A10s.
>>
>> Signed-off-by: Jonathan Liu 
>> ---
>> Changes for v7:
>>  - Fix mixed declarations and code compiler warning for level variable
>>
>> Changes for v6:
>>  - Use fixed byte time of 100 us instead of dynamically calculating from DDC
>>clock that is set to a fixed 100 MHz rate anyway
>>  - Change is_fifo_flag_unset to not read the status register as well to be
>>more consistent with is_err_status
>>
>> Changes for v5:
>>  - Use devm_kzalloc instead of devm_kmemdup and remove const struct 
>> i2c_adapter
>>  - Rework to use readl_poll_timeout for checking FIFO status
>>
>> Changes for v4:
>>  - Carry over copyright from initial I2C code into sun4i_hdmi_i2c.c
>>  - Clean up indentation in sun4i_hdmi.h
>>  - Rename SUN4I_HDMI_DDC_MAX_TRANSFER_SIZE to SUN4I_HDMI_DDC_BYTE_COUNT_MAX
>>and group it under the SUN4I_HDMI_DDC_BYTE_COUNT_REG define, changing the
>>value to use the GENMASK macro to make it clear that it is derived from
>>the width of the field in the register
>>  - Fix SUN4I_HDMI_DDC_INT_STATUS_DDC_TX_FIFO_UNDERFLOW typo which should be
>>SUN4I_HDMI_DDC_INT_STATUS_DDC_TX_FIFO_OVERFLOW
>>  - Remove redundant rewriting of SUN4I_HDMI_DDC_INT_STATUS_REG register
>>  - Change struct i2c_adapter to be const by using devm_kmemdup on creation
>>  - Return -ETIMEDOUT instead of -EIO if there is timeout while transferring 
>> an
>>I2C message
>>  - Instead of waiting for 1-2 bytes to transfer, wait for the time it would
>>take for remaining bytes to transfer (limited by FIFO size)
>>  - Add additional comments
>>
>> Changes for v3:
>>  - Explain why drm_do_get_edid should be used and why it's better to expose 
>> it
>>as an I2C adapter in commit message
>>  - Reorder bit defines in descending order for consistency
>>  - Keep old unused macros instead of removing them
>>  - The v2 algorithm split large transfers into 16 byte transfers but this may
>>cause a large single write to be treated as multiple writes causing data
>>corruption. The algorithm has been reworked to not split larger transfers
>>and make more use of the FIFO to avoid this.
>>  - Moved the creation of the DDC clock from sun4i_hdmi_enc.c to
>>sun4i_hdmi_i2c.c
>>  - Reformatted code
>>  - Instead of masking bits that we don't want to check for errors, explicitly
>>check the error bits
>>  - Clear error bits at start of transfer in case of error from previous 
>> transfer
>>  - Poll for completion of FIFO clear after setting FIFO clear bit
>>
>> Changes for v2:
>>  - Rebased against Maxime's sunxi-drm/for-next branch
>>  - Fix up error paths in sun4i_hdmi_bind so that the I2C adapter is deleted 
>> if
>>any of the calls after the I2C adapter is created fails
>>  - Remove unnecessary includes in sun4i_hdmi_i2c.c
>>
>>  drivers/gpu/drm/sun4i/Makefile |   1 +
>>  drivers/gpu/drm/sun4i/sun4i_hdmi.h |  23 
>>  drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c | 101 ++
>>  drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c | 234 
>> +
>>  4 files changed, 269 insertions(+), 90 deletions(-)
>>  create mode 100644 drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
>>
>
> [...]
>
>> diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c 
>> b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
>> new file mode 100644
>> index ..ce954ee25ae4
>> --- /dev/null
>> +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c
>> @@ -0,0 +1,234 @@
>
> [...]
>
>> +static int fifo_transfer(struct sun4i_hdmi *hdmi, u8 *buf, int len, bool 
>> read)
>> +{
>> +   /*
>> +* 1 byte takes 9 clock cycles (8 bits + 1 ACK) = 90 us for 100 MHz
>> +* clock. As clock rate is fixed, just round it up to 100 us.
>
> This looks fishy. I2C busses are never that fast. Maybe kHz?
>
> ChenYu
>

You're right it should be 100 kHz. I will fix that.

>> +*/
>> +   const unsigned long byte_time_ns = 100;
>> +   u32 int_status;
>> +   u32 fifo_status;
>> +   /* Read needs empty flag unset, write needs full flag unset */
>> +   u32 flag = read ? SUN4I_HDMI_DDC_FIFO_STATUS_EMPTY :
>> + SUN4I_HDMI_DDC_FIFO_STATUS_FULL;
>> +   int level;
>> +   int ret;
>> +
>> +   /* Wait until error or FIFO

Re: [PATCH] fs: ext4: inode->i_generation not assigned 0.

2017-06-28 Thread William Koh

On 6/28/17, 7:32 PM, "Andreas Dilger"  wrote:

On Jun 28, 2017, at 4:06 PM, Kyungchan Koh  wrote:
> 
> In fs/ext4/super.c, the function ext4_nfs_get_inode takes as input
> "generation" that can be used to specify the generation of the inode to
> be returned. When 0 is given as input, then inodes of any generation can
> be returned. Therefore, generation 0 is a special case that should be
> avoided when assigning generation to inodes.

I'd agree with this change to avoid assigning generation == 0 to real 
inodes.

Also, the separate question arises about whether we need to allow file 
handle
lookup with generation == 0?  That allows FID guessing easily, while 
requiring
a non-zero generation makes that a lot harder.

What are the cases where generation == 0 are used?

Honestly, I’m not too sure. I just noticed that generation 0 was a special
case from reading the code.

> A new inline function, ext4_inode_set_gen, will take care of the
> problem.  Now, inodes cannot have a generation of 0, so this patch fixes
> the issue.
> 
> Signed-off-by: Kyungchan Koh 
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3219154..74c6677 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1549,6 +1549,14 @@ static inline int ext4_valid_inum(struct 
super_block *sb, unsigned long ino)
>ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
> }
> 
> +static inline void ext4_inode_set_gen(struct inode *inode,
> +   struct ext4_sb_info *sbi)
> +{
> + inode->i_generation = sbi->s_next_generation++;
> + if (!inode->i_generation)

This should be marked "unlikely()" since it happens at most once every 4B
file creations (though likely even less since it is unlikely that so many
files will be created in a single mount).

Got it.

> + inode->i_generation = sbi->s_next_generation++;
> +}
> +
> 
> diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> index 98ac2f1..d33f6f0 100644
> --- a/fs/ext4/ialloc.c
> +++ b/fs/ext4/ialloc.c
> @@ -1072,7 +1072,7 @@ struct inode *__ext4_new_inode(handle_t *handle, 
struct inode   }
>   spin_lock(>s_next_gen_lock);
> - inode->i_generation = sbi->s_next_generation++;
> + ext4_inode_set_gen(inode, sbi);
>   spin_unlock(>s_next_gen_lock);
> 
> diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
> index 0c21e22..d52a467 100644
> --- a/fs/ext4/ioctl.c
> +++ b/fs/ext4/ioctl.c
> @@ -160,8 +160,8 @@ static long swap_inode_boot_loader(struct super_block 
*sb,
> 
>   spin_lock(>s_next_gen_lock);
> - inode->i_generation = sbi->s_next_generation++;
> - inode_bl->i_generation = sbi->s_next_generation++;
> + ext4_inode_set_gen(inode, sbi);
> + ext4_inode_set_gen(inode_bl, sbi);
>   spin_unlock(>s_next_gen_lock);
> 

Cheers, Andreas

This is applicable to many fs, including ext2, ext4, exofs, jfs, and f2fs.
Therefore, a shared helper in linux/fs.h will allow for easy changes
in all fs. Is there any reason that might be a bad idea?

Best,
Kyungchan Koh

linux-next: manual merge of the usb tree with the uuid tree

2017-06-28 Thread Stephen Rothwell

Hi Greg,

Today's linux-next merge of the usb tree got a conflict in:

  drivers/usb/misc/ucsi.c

between commit:

  94116f8126de ("ACPI: Switch to use generic guid_t in acpi_evaluate_dsm()")

from the uuid tree and commit:

  8243edf44152 ("usb: typec: ucsi: Add ACPI driver")

from the usb tree.

I fixed it up (the latter deleted the file, so I did that - I think the
new code is ok with then new guid handling) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

Almost right :-(

I got the following error:

drivers/usb/typec/ucsi/ucsi_acpi.c: In function 'ucsi_acpi_dsm':
drivers/usb/typec/ucsi/ucsi_acpi.c:33:48: error: passing argument 2 of 
'acpi_evaluate_dsm' from incompatible pointer type 
[-Werror=incompatible-pointer-types]
  obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), ua->uuid.b, 1, func,
^
In file included from include/linux/acpi.h:44:0,
 from drivers/usb/typec/ucsi/ucsi_acpi.c:14:
include/acpi/acpi_bus.h:65:20: note: expected 'const guid_t * {aka const struct 
 *}' but argument is of type '__u8 * {aka unsigned char *}'
 union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,  
^

I have applied the following merge fix patch (the first hunk is
probably not strictly necessary):

From: Stephen Rothwell 
Date: Thu, 29 Jun 2017 14:36:10 +1000
Subject: [PATCH] usb: typec: fix for "ACPI: Switch to use generic guid_t in
 acpi_evaluate_dsm()"

Signed-off-by: Stephen Rothwell 
---
 drivers/usb/typec/ucsi/ucsi_acpi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c 
b/drivers/usb/typec/ucsi/ucsi_acpi.c
index 3fb2e48e1c91..7b7c9373a9b6 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -23,14 +23,14 @@ struct ucsi_acpi {
struct device *dev;
struct ucsi *ucsi;
struct ucsi_ppm ppm;
-   uuid_le uuid;
+   guid_t uuid;
 };
 
 static int ucsi_acpi_dsm(struct ucsi_acpi *ua, int func)
 {
union acpi_object *obj;
 
-   obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), ua->uuid.b, 1, func,
+   obj = acpi_evaluate_dsm(ACPI_HANDLE(ua->dev), >uuid, 1, func,
NULL);
if (!obj) {
dev_err(ua->dev, "%s: failed to evaluate _DSM %d\n",
-- 
2.11.0

-- 
Cheers,
Stephen Rothwell

[PATCH v5 2/6] g_NCR5380: End PDMA transfer correctly on target disconnection

2017-06-28 Thread Finn Thain

From: Ondrej Zary 

When an IRQ arrives during PDMA transfer, pread() and pwrite() return
without waiting for the 53C80 registers to be ready and this ends up
messing up the chip state. This was observed with SONY CDU-55S which is
slow enough to disconnect during 4096-byte reads.

IRQ during PDMA is not an error so don't return -1. Instead, store the
remaining byte count for use by NCR5380_dma_residual().

[Poll for the BASR_END_DMA_TRANSFER condition rather than remove the
error message -- F.T.]

Signed-off-by: Ondrej Zary 
Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 48 +++-
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index 14ef4e8c4713..911a4300ea51 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -44,12 +44,13 @@
int c400_ctl_status; \
int c400_blk_cnt; \
int c400_host_buf; \
-   int io_width
+   int io_width; \
+   int pdma_residual
 
 #define NCR5380_dma_xfer_lengeneric_NCR5380_dma_xfer_len
 #define NCR5380_dma_recv_setup  generic_NCR5380_pread
 #define NCR5380_dma_send_setup  generic_NCR5380_pwrite
-#define NCR5380_dma_residualNCR5380_dma_residual_none
+#define NCR5380_dma_residualgeneric_NCR5380_dma_residual
 
 #define NCR5380_intrgeneric_NCR5380_intr
 #define NCR5380_queue_command   generic_NCR5380_queue_command
@@ -500,10 +501,8 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
while (1) {
if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ) {
-   printk(KERN_ERR "53C400r: Got 53C80_IRQ start=%d, 
blocks=%d\n", start, blocks);
-   return -1;
-   }
+   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
+   goto out_wait;
while (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_HOST_BUF_NOT_RDY)
; /* FIXME - no timeout */
 
@@ -542,13 +541,19 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
if (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_GATED_53C80_IRQ))
printk("53C400r: no 53C80 gated irq after transfer");
 
+out_wait:
+   hostdata->pdma_residual = len - start;
+
/* wait for 53C80 registers to be available */
while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG))
;
 
-   if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER))
-   printk(KERN_ERR "53C400r: no end dma signal\n");
-   
+   if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+ BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+ HZ / 64) < 0)
+   scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA 
timeout (%d)\n",
+   __func__, hostdata->pdma_residual);
+
return 0;
 }
 
@@ -571,10 +576,8 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
NCR5380_write(hostdata->c400_ctl_status, CSR_BASE);
NCR5380_write(hostdata->c400_blk_cnt, blocks);
while (1) {
-   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ) {
-   printk(KERN_ERR "53C400w: Got 53C80_IRQ start=%d, 
blocks=%d\n", start, blocks);
-   return -1;
-   }
+   if (NCR5380_read(hostdata->c400_ctl_status) & 
CSR_GATED_53C80_IRQ)
+   goto out_wait;
 
if (NCR5380_read(hostdata->c400_blk_cnt) == 0)
break;
@@ -612,18 +615,24 @@ static inline int generic_NCR5380_pwrite(struct 
NCR5380_hostdata *hostdata,
blocks--;
}
 
+out_wait:
+   hostdata->pdma_residual = len - start;
+
/* wait for 53C80 registers to be available */
while (!(NCR5380_read(hostdata->c400_ctl_status) & CSR_53C80_REG)) {
udelay(4); /* DTC436 chip hangs without this */
/* FIXME - no timeout */
}
 
-   if (!(NCR5380_read(BUS_AND_STATUS_REG) & BASR_END_DMA_TRANSFER)) {
-   printk(KERN_ERR "53C400w: no end dma signal\n");
-   }
-
while (!(NCR5380_read(TARGET_COMMAND_REG) & TCR_LAST_BYTE_SENT))
;   // TIMEOUT
+
+   if (NCR5380_poll_politely(hostdata, BUS_AND_STATUS_REG,
+ BASR_END_DMA_TRANSFER, BASR_END_DMA_TRANSFER,
+ HZ / 64) < 0)
+   scmd_printk(KERN_ERR, hostdata->connected, "%s: End of DMA 
timeout (%d)\n",
+

[PATCH v5 6/6] g_NCR5380: Use unambiguous terminology for PDMA send and receive

2017-06-28 Thread Finn Thain

The word "read" may be used to mean "DMA read operation" or
"SCSI READ command", though a READ command implies writing to memory.

Signed-off-by: Finn Thain 
---
 drivers/scsi/g_NCR5380.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/g_NCR5380.c b/drivers/scsi/g_NCR5380.c
index f7e50d2bca07..b8b1ed2806bb 100644
--- a/drivers/scsi/g_NCR5380.c
+++ b/drivers/scsi/g_NCR5380.c
@@ -49,8 +49,8 @@
int board
 
 #define NCR5380_dma_xfer_lengeneric_NCR5380_dma_xfer_len
-#define NCR5380_dma_recv_setup  generic_NCR5380_pread
-#define NCR5380_dma_send_setup  generic_NCR5380_pwrite
+#define NCR5380_dma_recv_setup  generic_NCR5380_precv
+#define NCR5380_dma_send_setup  generic_NCR5380_psend
 #define NCR5380_dma_residualgeneric_NCR5380_dma_residual
 
 #define NCR5380_intrgeneric_NCR5380_intr
@@ -507,7 +507,7 @@ static void wait_for_53c80_access(struct NCR5380_hostdata 
*hostdata)
 }
 
 /**
- * generic_NCR5380_pread - pseudo DMA read
+ * generic_NCR5380_precv - pseudo DMA receive
  * @hostdata: scsi host private data
  * @dst: buffer to write into
  * @len: transfer size
@@ -515,7 +515,7 @@ static void wait_for_53c80_access(struct NCR5380_hostdata 
*hostdata)
  * Perform a pseudo DMA mode receive from a 53C400 or equivalent device.
  */
 
-static inline int generic_NCR5380_pread(struct NCR5380_hostdata *hostdata,
+static inline int generic_NCR5380_precv(struct NCR5380_hostdata *hostdata,
 unsigned char *dst, int len)
 {
int residual;
@@ -574,7 +574,7 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
 }
 
 /**
- * generic_NCR5380_pwrite - pseudo DMA write
+ * generic_NCR5380_psend - pseudo DMA send
  * @hostdata: scsi host private data
  * @src: buffer to read from
  * @len: transfer size
@@ -582,8 +582,8 @@ static inline int generic_NCR5380_pread(struct 
NCR5380_hostdata *hostdata,
  * Perform a pseudo DMA mode send to a 53C400 or equivalent device.
  */
 
-static inline int generic_NCR5380_pwrite(struct NCR5380_hostdata *hostdata,
- unsigned char *src, int len)
+static inline int generic_NCR5380_psend(struct NCR5380_hostdata *hostdata,
+unsigned char *src, int len)
 {
int residual;
int start = 0;
-- 
2.13.0

[linux-next][PATCH] usb: dwc3: core: Call dwc3_core_get_phy() before initializing phys

2017-06-28 Thread Vignesh R

commit f54edb539c116 ("usb: dwc3: core: initialize ULPI before trying to
get the PHY") moved call to dwc3_core_get_phy() from dwc3_probe() to
dwc3_core_init() after dwc3_core_soft_reset(). But
dwc3_core_soft_reset() calls phy_init(), therefore dwc3_core_get_phy()
needs to be called before dwc3_core_soft_reset().

Fix this by moving call to dwc3_core_get_phy() before
dwc3_core_soft_reset().

This fixes the following abort seen on DRA7xx platforms
[   24.769118] usb usb2: SerialNumber: xhci-hcd.1.auto
[   24.781144] hub 2-0:1.0: USB hub found
[   24.787836] hub 2-0:1.0: 1 port detected
[   24.809939] Unhandled fault: imprecise external abort (0x1406) at 0x

Reported-by: Carlos Hernandez 
Signed-off-by: Vignesh R 
---
 drivers/usb/dwc3/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 326b302fc440..03474d3575ab 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -766,15 +766,15 @@ static int dwc3_core_init(struct dwc3 *dwc)
dwc->maximum_speed = USB_SPEED_HIGH;
}
 
-   ret = dwc3_core_soft_reset(dwc);
+   ret = dwc3_core_get_phy(dwc);
if (ret)
goto err0;
 
-   ret = dwc3_phy_setup(dwc);
+   ret = dwc3_core_soft_reset(dwc);
if (ret)
goto err0;
 
-   ret = dwc3_core_get_phy(dwc);
+   ret = dwc3_phy_setup(dwc);
if (ret)
goto err0;
 
-- 
2.13.0

Re: [PATCH] ib_isert: prevent NULL pointer dereference in isert_login_recv_done()

2017-06-28 Thread Sagi Grimberg




Just tested this patch, I wasn't able to reproduce the NULL pointer
dereference or any other bugs, so this fix seems safe enough to me.

Tested-by: Andrea Righi 


Can you test just the one liner fix below?


@@ -1452,7 +1452,7 @@
  isert_login_recv_done(struct ib_cq *cq, struct ib_wc *wc)
  {
struct isert_conn *isert_conn = wc->qp->qp_context;
-   struct ib_device *ib_dev = isert_conn->cm_id->device;
+   struct ib_device *ib_dev = isert_conn->device->ib_device;
  
  	if (unlikely(wc->status != IB_WC_SUCCESS)) {

isert_print_wc(wc, "login recv");

[PATCH] net: ibm: ibmveth: constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
  154261256   0   16682412a drivers/net/ethernet/ibm/ibmveth.o

File size After adding 'const':
   textdata bss dec hex filename
  156181064   0   16682412a drivers/net/ethernet/ibm/ibmveth.o

Signed-off-by: Arvind Yadav 
---
 drivers/net/ethernet/ibm/ibmveth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.c 
b/drivers/net/ethernet/ibm/ibmveth.c
index 72ab7b6..02b26bf 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1843,7 +1843,7 @@ static int ibmveth_resume(struct device *dev)
 };
 MODULE_DEVICE_TABLE(vio, ibmveth_device_table);
 
-static struct dev_pm_ops ibmveth_pm_ops = {
+static const struct dev_pm_ops ibmveth_pm_ops = {
.resume = ibmveth_resume
 };
 
-- 
1.9.1

[PATCH] net: freescale: gianfar : constify dev_pm_ops structures.

2017-06-28 Thread Arvind Yadav

dev_pm_ops are not supposed to change at runtime. All functions
working with dev_pm_ops provided by  work with const
dev_pm_ops. So mark the non-const structs as const.

File size before:
   textdata bss dec hex filename
  19057 392   0   194494bf9 drivers/net/ethernet/freescale/gianfar.o

File size After adding 'const':
   textdata bss dec hex filename
  19249 192   0   194414bf1 drivers/net/ethernet/freescale/gianfar.o

Signed-off-by: Arvind Yadav 
---
 drivers/net/ethernet/freescale/gianfar.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/gianfar.c 
b/drivers/net/ethernet/freescale/gianfar.c
index 0ff166e..e3b0501 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1718,7 +1718,7 @@ static int gfar_restore(struct device *dev)
return 0;
 }
 
-static struct dev_pm_ops gfar_pm_ops = {
+static const struct dev_pm_ops gfar_pm_ops = {
.suspend = gfar_suspend,
.resume = gfar_resume,
.freeze = gfar_suspend,
-- 
1.9.1

Re: [PATCH] thermal/intel_powerclamp: add const to thermal_cooling_device_ops structure

2017-06-28 Thread Bhumika Goyal

On Thu, Jun 29, 2017 at 11:02 AM, Zhang Rui  wrote:
> On Thu, 2017-06-29 at 10:41 +0530, Bhumika Goyal wrote:
>> On Thu, Jun 29, 2017 at 8:30 AM, Zhang Rui 
>> wrote:
>> >
>> > On Wed, 2017-06-21 at 12:39 +0530, Bhumika Goyal wrote:
>> > >
>> > > Declare thermal_cooling_device_ops structure as const as it is
>> > > only
>> > > passed
>> > > as an argument to the function thermal_cooling_device_register
>> > > and
>> > > this
>> > > argument is of type const. So, declare the structure as const.
>> > >
>> > I checked the source and the code is written in this way as well in
>> > some other drivers, why not fix them altogether?
>> >
>> Hey,
>>
>> I have already sent the patches for the drivers requiring this
>> change.
>> Some of them got applied as well.
>
> I mean the other drivers that use thermal_cooling_device_ops.
>

Actually, I also meant the same but maybe I wasn't clear enough :)

> Surely I can applied this patch, but still it would be nice if you can
> check the other places that defines thermal_cooling_device_ops and fix
> all of them in one time. :)
>

I checked again and there remains only one file in power/supply where
thermal_cooling_device_ops can be made const. I will submit a patch
for it but I think I will have to create a separate patch because the
maintainers are different.
All other places are either already const or I have submitted patches
for them or cannot be made const.

Thanks,
Bhumika

> thanks,
> rui
>>
>> Thanks,
>> Bhumika
>>
>> >
>> > thanks,
>> > rui
>> > >
>> > > Signed-off-by: Bhumika Goyal 
>> > > ---
>> > >  drivers/thermal/intel_powerclamp.c | 2 +-
>> > >  1 file changed, 1 insertion(+), 1 deletion(-)
>> > >
>> > > diff --git a/drivers/thermal/intel_powerclamp.c
>> > > b/drivers/thermal/intel_powerclamp.c
>> > > index d718cd1..e4c68b7 100644
>> > > --- a/drivers/thermal/intel_powerclamp.c
>> > > +++ b/drivers/thermal/intel_powerclamp.c
>> > > @@ -659,7 +659,7 @@ static int powerclamp_set_cur_state(struct
>> > > thermal_cooling_device *cdev,
>> > >  }
>> > >
>> > >  /* bind to generic thermal layer as cooling device*/
>> > > -static struct thermal_cooling_device_ops powerclamp_cooling_ops
>> > > = {
>> > > +static const struct thermal_cooling_device_ops
>> > > powerclamp_cooling_ops = {
>> > >   .get_max_state = powerclamp_get_max_state,
>> > >   .get_cur_state = powerclamp_get_cur_state,
>> > >   .set_cur_state = powerclamp_set_cur_state,

Re: [PATCH] futex: avoid undefined behaviour when shift exponent is negative

2017-06-28 Thread zhong jiang

On 2017/6/29 12:29, h...@zytor.com wrote:
> On June 28, 2017 7:12:04 PM PDT, zhong jiang  wrote:
>> On 2017/6/29 5:43, h...@zytor.com wrote:
>>> On June 27, 2017 9:35:10 PM PDT, zhong jiang 
>> wrote:
 Hi,  Ingo

 Thank you for the comment.
 On 2017/6/22 0:40, Ingo Molnar wrote:
> * zhong jiang  wrote:
>
>> when shift expoment is negative, left shift alway zero. therefore,
 we
>> modify the logic to avoid the warining.
>>
>> Signed-off-by: zhong jiang 
>> ---
>>  arch/x86/include/asm/futex.h | 8 ++--
>>  1 file changed, 6 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/futex.h
 b/arch/x86/include/asm/futex.h
>> index b4c1f54..2425fca 100644
>> --- a/arch/x86/include/asm/futex.h
>> +++ b/arch/x86/include/asm/futex.h
>> @@ -49,8 +49,12 @@ static inline int futex_atomic_op_inuser(int
 encoded_op, u32 __user *uaddr)
>>  int cmparg = (encoded_op << 20) >> 20;
>>  int oldval = 0, ret, tem;
>>  
>> -if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
>> -oparg = 1 << oparg;
>> +if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
>> +if (oparg >= 0)
>> +oparg = 1 << oparg;
>> +else
>> +oparg = 0;
>> +}
> Could we avoid all these complications by using an unsigned type?
 I think it is not feasible.  a negative shift exponent is likely
 existence and reasonable.
  as the above case,  oparg is a negative is common. 

 I think it can be avoided by following change. 

 diff --git a/arch/x86/include/asm/futex.h
 b/arch/x86/include/asm/futex.h
 index b4c1f54..3205e86 100644
 --- a/arch/x86/include/asm/futex.h
 +++ b/arch/x86/include/asm/futex.h
 @@ -50,7 +50,7 @@ static inline int futex_atomic_op_inuser(int
 encoded_op, u32 __user *uaddr)
int oldval = 0, ret, tem;

if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
 -   oparg = 1 << oparg;
 +   oparg = safe_shift(1, oparg);

if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
 diff --git a/drivers/video/fbdev/core/fbmem.c
 b/drivers/video/fbdev/core/fbmem.c
 index 069fe79..b4edda3 100644
 --- a/drivers/video/fbdev/core/fbmem.c
 +++ b/drivers/video/fbdev/core/fbmem.c
 @@ -190,11 +190,6 @@ char* fb_get_buffer_offset(struct fb_info
>> *info,
 struct fb_pixmap *buf, u32 size

 #ifdef CONFIG_LOGO

 -static inline unsigned safe_shift(unsigned d, int n)
 -{
 -   return n < 0 ? d >> -n : d << n;
 -}
 -
 static void fb_set_logocmap(struct fb_info *info,
   const struct linux_logo *logo)
 {
 diff --git a/include/linux/kernel.h b/include/linux/kernel.h
 index d043ada..f3b8856 100644
 --- a/include/linux/kernel.h
 +++ b/include/linux/kernel.h
 @@ -841,6 +841,10 @@ static inline void ftrace_dump(enum
 ftrace_dump_mode oops_dump_mode) { }
  */
 #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)

 +static inline unsigned safe_shift(unsigned d, int n)
 +{
 +   return n < 0 ? d >> -n : d << n;
 +}

 Thansk
 zhongjiang

> Thanks,
>
>   Ingo
>
> .
>
>>> What makes it reasonable?  It is totally ill-defined and doesn't do
>> anything useful now?
>> Thanks you for comments.
>>
>> Maybe I mismake the meaning. I test the negative cases in x86 , all
>> case is zero. so I come to a conclusion.
>>
>> zj.c:15:8: warning: left shift count is negative
>> [-Wshift-count-negative]
>>  j = 1 << -2048;
>>^
>> [root@localhost zhongjiang]# ./zj
>> j = 0
>> j.c:15:8: warning: left shift count is negative
>> [-Wshift-count-negative]
>>  j = 1 << -2047;
>>^
>> [root@localhost zhongjiang]# ./zj
>> j = 0
>>
>> I insmod a module into kernel to test the testcasts, all of the result
>> is zero.
>>
>> I wonder whether I miss some point or not. Do you point out to me?
>> please
>>
>> Thanks
>> zhongjiang
>>
>>
> When you use compile-time constants, the compiler generates the value at 
> compile time, which can be totally different.
 yes, I test that. Thanks.

 Thanks
 zhongjiang

[PATCH 15/15] mm/migrate: allow migrate_vma() to alloc new page on empty entry v3

2017-06-28 Thread Jérôme Glisse

This allow caller of migrate_vma() to allocate new page for empty CPU
page table entry. It only support anoymous memory and it won't allow
new page to be instance if userfaultfd is armed.

This is useful to device driver that want to migrate a range of virtual
address and would rather allocate new memory than having to fault later
on.

Changed sinve v2:
  - differentiate between empty CPU page table entry and non empty
  - improve code comments explaining how this works
Changed since v1:
  - 5 level page table fix

Signed-off-by: Jérôme Glisse 
---
 include/linux/migrate.h |   9 +++
 mm/migrate.c| 158 ++--
 2 files changed, 161 insertions(+), 6 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index c84541a9610f..c65d0875cdc9 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -205,6 +205,15 @@ static inline unsigned long migrate_pfn(unsigned long pfn)
  * driver should avoid setting MIGRATE_PFN_ERROR unless it is really in an
  * unrecoverable state.
  *
+ * For empty entry inside CPU page table (pte_none() or pmd_none() is true) we
+ * do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
+ * allowing device driver to allocate device memory for those unback virtual
+ * address. For this the device driver simply have to allocate device memory
+ * and properly set the destination entry like for regular migration. Note that
+ * this can still fails and thus inside the device driver must check if the
+ * migration was successful for those entry inside the finalize_and_map()
+ * callback just like for regular migration.
+ *
  * THE alloc_and_copy() CALLBACK MUST NOT CHANGE ANY OF THE SRC ARRAY ENTRIES
  * OR BAD THINGS WILL HAPPEN !
  *
diff --git a/mm/migrate.c b/mm/migrate.c
index c3ff98a4c3dc..643ea61ca9bb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2102,7 +2103,23 @@ static int migrate_vma_collect_hole(unsigned long start,
struct mm_walk *walk)
 {
struct migrate_vma *migrate = walk->private;
-   unsigned long addr, next;
+   unsigned long addr;
+
+   for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+   migrate->cpages++;
+   migrate->dst[migrate->npages] = 0;
+   migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE;
+   }
+
+   return 0;
+}
+
+static int migrate_vma_collect_thp(unsigned long start,
+   unsigned long end,
+   struct mm_walk *walk)
+{
+   struct migrate_vma *migrate = walk->private;
+   unsigned long addr;
 
for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
migrate->dst[migrate->npages] = 0;
@@ -2123,10 +2140,14 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
spinlock_t *ptl;
pte_t *ptep;
 
-   if (pmd_none(*pmdp) || pmd_trans_unstable(pmdp)) {
+   if (pmd_none(*pmdp)) {
/* FIXME support THP */
return migrate_vma_collect_hole(start, end, walk);
}
+   if (pmd_trans_unstable(pmdp)) {
+   /* FIXME support THP */
+   return migrate_vma_collect_thp(start, end, walk);
+   }
 
ptep = pte_offset_map_lock(mm, pmdp, addr, );
arch_enter_lazy_mmu_mode();
@@ -2141,7 +2162,9 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
pfn = pte_pfn(pte);
 
if (pte_none(pte)) {
-   mpfn = pfn = 0;
+   mpfn = MIGRATE_PFN_MIGRATE;
+   migrate->cpages++;
+   pfn = 0;
goto next;
}
 
@@ -2454,6 +2477,118 @@ static void migrate_vma_unmap(struct migrate_vma 
*migrate)
}
 }
 
+static void migrate_vma_insert_page(struct migrate_vma *migrate,
+   unsigned long addr,
+   struct page *page,
+   unsigned long *src,
+   unsigned long *dst)
+{
+   struct vm_area_struct *vma = migrate->vma;
+   struct mm_struct *mm = vma->vm_mm;
+   struct mem_cgroup *memcg;
+   spinlock_t *ptl;
+   pgd_t *pgdp;
+   p4d_t *p4dp;
+   pud_t *pudp;
+   pmd_t *pmdp;
+   pte_t *ptep;
+   pte_t entry;
+
+   /* Only allow populating anonymous memory */
+   if (!vma_is_anonymous(vma))
+   goto abort;
+
+   pgdp = pgd_offset(mm, addr);
+   p4dp = p4d_alloc(mm, pgdp, addr);
+   if (!p4dp)
+   goto abort;
+   pudp = pud_alloc(mm, p4dp, addr);
+   if (!pudp)
+   goto abort;
+   pmdp = pmd_alloc(mm, pudp, addr);
+   if (!pmdp)
+   goto abort;
+
+   if (pmd_trans_unstable(pmdp) ||

[PATCH 09/15] mm/hmm/devmem: device memory hotplug using ZONE_DEVICE v6

2017-06-28 Thread Jérôme Glisse

This introduce a simple struct and associated helpers for device driver
to use when hotpluging un-addressable device memory as ZONE_DEVICE. It
will find a unuse physical address range and trigger memory hotplug for
it which allocates and initialize struct page for the device memory.

Changed since v5:
  - kernel configuration simplification
  - remove now unuse device driver helper
Changed since v4:
  - enable device_private_key static key when adding device memory
Changed since v3:
  - s/device unaddressable/device private/
Changed since v2:
  - s/SECTION_SIZE/PA_SECTION_SIZE
Changed since v1:
  - change to adapt to new add_pages() helper
  - make this x86-64 only for now

Signed-off-by: Jérôme Glisse 
Signed-off-by: Evgeny Baskakov 
Signed-off-by: John Hubbard 
Signed-off-by: Mark Hairgrove 
Signed-off-by: Sherry Cheung 
Signed-off-by: Subhash Gutti 
---
 include/linux/hmm.h | 104 +++
 mm/hmm.c| 375 +++-
 2 files changed, 478 insertions(+), 1 deletion(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 248a6e09cb56..2fd539b0eab3 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -72,6 +72,11 @@
 
 #if IS_ENABLED(CONFIG_HMM)
 
+#include 
+#include 
+#include 
+
+
 struct hmm;
 
 /*
@@ -322,6 +327,105 @@ int hmm_vma_fault(struct vm_area_struct *vma,
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 
+#if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
+struct hmm_devmem;
+
+struct page *hmm_vma_alloc_locked_page(struct vm_area_struct *vma,
+  unsigned long addr);
+
+/*
+ * struct hmm_devmem_ops - callback for ZONE_DEVICE memory events
+ *
+ * @free: call when refcount on page reach 1 and thus is no longer use
+ * @fault: call when there is a page fault to unaddressable memory
+ */
+struct hmm_devmem_ops {
+   void (*free)(struct hmm_devmem *devmem, struct page *page);
+   int (*fault)(struct hmm_devmem *devmem,
+struct vm_area_struct *vma,
+unsigned long addr,
+struct page *page,
+unsigned int flags,
+pmd_t *pmdp);
+};
+
+/*
+ * struct hmm_devmem - track device memory
+ *
+ * @completion: completion object for device memory
+ * @pfn_first: first pfn for this resource (set by hmm_devmem_add())
+ * @pfn_last: last pfn for this resource (set by hmm_devmem_add())
+ * @resource: IO resource reserved for this chunk of memory
+ * @pagemap: device page map for that chunk
+ * @device: device to bind resource to
+ * @ops: memory operations callback
+ * @ref: per CPU refcount
+ *
+ * This an helper structure for device drivers that do not wish to implement
+ * the gory details related to hotplugging new memoy and allocating struct
+ * pages.
+ *
+ * Device drivers can directly use ZONE_DEVICE memory on their own if they
+ * wish to do so.
+ */
+struct hmm_devmem {
+   struct completion   completion;
+   unsigned long   pfn_first;
+   unsigned long   pfn_last;
+   struct resource *resource;
+   struct device   *device;
+   struct dev_pagemap  pagemap;
+   const struct hmm_devmem_ops *ops;
+   struct percpu_ref   ref;
+};
+
+/*
+ * To add (hotplug) device memory, HMM assumes that there is no real resource
+ * that reserves a range in the physical address space (this is intended to be
+ * use by unaddressable device memory). It will reserve a physical range big
+ * enough and allocate struct page for it.
+ *
+ * The device driver can wrap the hmm_devmem struct inside a private device
+ * driver struct. The device driver must call hmm_devmem_remove() before the
+ * device goes away and before freeing the hmm_devmem struct memory.
+ */
+struct hmm_devmem *hmm_devmem_add(const struct hmm_devmem_ops *ops,
+ struct device *device,
+ unsigned long size);
+void hmm_devmem_remove(struct hmm_devmem *devmem);
+
+/*
+ * hmm_devmem_page_set_drvdata - set per-page driver data field
+ *
+ * @page: pointer to struct page
+ * @data: driver data value to set
+ *
+ * Because page can not be on lru we have an unsigned long that driver can use
+ * to store a per page field. This just a simple helper to do that.
+ */
+static inline void hmm_devmem_page_set_drvdata(struct page *page,
+  unsigned long data)
+{
+   unsigned long *drvdata = (unsigned long *)>pgmap;
+
+   drvdata[1] = data;
+}
+
+/*
+ * hmm_devmem_page_get_drvdata - get per page driver data field
+ *
+ * @page: pointer to struct page
+ * Return: driver data value
+ */
+static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page)
+{
+   unsigned long *drvdata = (unsigned

[PATCH 14/15] mm/migrate: support un-addressable ZONE_DEVICE page in migration v2

2017-06-28 Thread Jérôme Glisse

Allow to unmap and restore special swap entry of un-addressable
ZONE_DEVICE memory.

Changed since v1:
  - s/device unaddressable/device private/

Signed-off-by: Jérôme Glisse 
Cc: Kirill A. Shutemov 
---
 include/linux/migrate.h |  10 +++-
 mm/migrate.c| 134 ++--
 mm/page_vma_mapped.c|  10 
 mm/rmap.c   |  25 +
 4 files changed, 150 insertions(+), 29 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce87a2946897..c84541a9610f 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -146,12 +146,18 @@ static inline int migrate_misplaced_transhuge_page(struct 
mm_struct *mm,
 
 #ifdef CONFIG_MIGRATION
 
+/*
+ * Watch out for PAE architecture, which has an unsigned long, and might not
+ * have enough bits to store all physical address and flags. So far we have
+ * enough room for all our flags.
+ */
 #define MIGRATE_PFN_VALID  (1UL << 0)
 #define MIGRATE_PFN_MIGRATE(1UL << 1)
 #define MIGRATE_PFN_LOCKED (1UL << 2)
 #define MIGRATE_PFN_WRITE  (1UL << 3)
-#define MIGRATE_PFN_ERROR  (1UL << 4)
-#define MIGRATE_PFN_SHIFT  5
+#define MIGRATE_PFN_DEVICE (1UL << 4)
+#define MIGRATE_PFN_ERROR  (1UL << 5)
+#define MIGRATE_PFN_SHIFT  6
 
 static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
 {
diff --git a/mm/migrate.c b/mm/migrate.c
index d11fb4c8d785..c3ff98a4c3dc 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -227,7 +228,15 @@ static bool remove_migration_pte(struct page *page, struct 
vm_area_struct *vma,
if (is_write_migration_entry(entry))
pte = maybe_mkwrite(pte, vma);
 
-   flush_dcache_page(new);
+   if (unlikely(is_zone_device_page(new)) &&
+   is_device_private_page(new)) {
+   entry = make_device_private_entry(new, pte_write(pte));
+   pte = swp_entry_to_pte(entry);
+   if (pte_swp_soft_dirty(*pvmw.pte))
+   pte = pte_mksoft_dirty(pte);
+   } else
+   flush_dcache_page(new);
+
 #ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new)) {
pte = pte_mkhuge(pte);
@@ -2131,17 +2140,40 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
pte = *ptep;
pfn = pte_pfn(pte);
 
-   if (!pte_present(pte)) {
+   if (pte_none(pte)) {
mpfn = pfn = 0;
goto next;
}
 
+   if (!pte_present(pte)) {
+   mpfn = pfn = 0;
+
+   /*
+* Only care about unaddressable device page special
+* page table entry. Other special swap entries are not
+* migratable, and we ignore regular swapped page.
+*/
+   entry = pte_to_swp_entry(pte);
+   if (!is_device_private_entry(entry))
+   goto next;
+
+   page = device_private_entry_to_page(entry);
+   mpfn = migrate_pfn(page_to_pfn(page))|
+   MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
+   if (is_write_device_private_entry(entry))
+   mpfn |= MIGRATE_PFN_WRITE;
+   } else {
+   page = vm_normal_page(migrate->vma, addr, pte);
+   mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
+   mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
+   }
+
/* FIXME support THP */
-   page = vm_normal_page(migrate->vma, addr, pte);
if (!page || !page->mapping || PageTransCompound(page)) {
mpfn = pfn = 0;
goto next;
}
+   pfn = page_to_pfn(page);
 
/*
 * By getting a reference on the page we pin it and that blocks
@@ -2154,8 +2186,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 */
get_page(page);
migrate->cpages++;
-   mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
-   mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
 
/*
 * Optimize for the common case where page is only mapped once
@@ -2186,6 +2216,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
}
 
 next:
+   migrate->dst[migrate->npages] = 0;
migrate->src[migrate->npages++] = mpfn;
}
arch_leave_lazy_mmu_mode();
@@ -2255,6 +2286,15 @@ static bool migrate_vma_check_page(struct page

[PATCH 13/15] mm/migrate: migrate_vma() unmap page from vma while collecting pages

2017-06-28 Thread Jérôme Glisse

Common case for migration of virtual address range is page are map
only once inside the vma in which migration is taking place. Because
we already walk the CPU page table for that range we can directly do
the unmap there and setup special migration swap entry.

Signed-off-by: Jérôme Glisse 
Signed-off-by: Evgeny Baskakov 
Signed-off-by: John Hubbard 
Signed-off-by: Mark Hairgrove 
Signed-off-by: Sherry Cheung 
Signed-off-by: Subhash Gutti 
---
 mm/migrate.c | 114 ++-
 1 file changed, 98 insertions(+), 16 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index c2080658b905..d11fb4c8d785 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2110,7 +2110,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 {
struct migrate_vma *migrate = walk->private;
struct mm_struct *mm = walk->vma->vm_mm;
-   unsigned long addr = start;
+   unsigned long addr = start, unmapped = 0;
spinlock_t *ptl;
pte_t *ptep;
 
@@ -2120,9 +2120,12 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
}
 
ptep = pte_offset_map_lock(mm, pmdp, addr, );
+   arch_enter_lazy_mmu_mode();
+
for (; addr < end; addr += PAGE_SIZE, ptep++) {
unsigned long mpfn, pfn;
struct page *page;
+   swp_entry_t entry;
pte_t pte;
 
pte = *ptep;
@@ -2154,11 +2157,44 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
 
+   /*
+* Optimize for the common case where page is only mapped once
+* in one process. If we can lock the page, then we can safely
+* set up a special migration page table entry now.
+*/
+   if (trylock_page(page)) {
+   pte_t swp_pte;
+
+   mpfn |= MIGRATE_PFN_LOCKED;
+   ptep_get_and_clear(mm, addr, ptep);
+
+   /* Setup special migration page table entry */
+   entry = make_migration_entry(page, pte_write(pte));
+   swp_pte = swp_entry_to_pte(entry);
+   if (pte_soft_dirty(pte))
+   swp_pte = pte_swp_mksoft_dirty(swp_pte);
+   set_pte_at(mm, addr, ptep, swp_pte);
+
+   /*
+* This is like regular unmap: we remove the rmap and
+* drop page refcount. Page won't be freed, as we took
+* a reference just above.
+*/
+   page_remove_rmap(page, false);
+   put_page(page);
+   unmapped++;
+   }
+
 next:
migrate->src[migrate->npages++] = mpfn;
}
+   arch_leave_lazy_mmu_mode();
pte_unmap_unlock(ptep - 1, ptl);
 
+   /* Only flush the TLB if we actually modified any entries */
+   if (unmapped)
+   flush_tlb_range(walk->vma, start, end);
+
return 0;
 }
 
@@ -2183,7 +2219,13 @@ static void migrate_vma_collect(struct migrate_vma 
*migrate)
mm_walk.mm = migrate->vma->vm_mm;
mm_walk.private = migrate;
 
+   mmu_notifier_invalidate_range_start(mm_walk.mm,
+   migrate->start,
+   migrate->end);
walk_page_range(migrate->start, migrate->end, _walk);
+   mmu_notifier_invalidate_range_end(mm_walk.mm,
+ migrate->start,
+ migrate->end);
 
migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT);
 }
@@ -2239,12 +2281,16 @@ static void migrate_vma_prepare(struct migrate_vma 
*migrate)
 
for (i = 0; i < npages; i++) {
struct page *page = migrate_pfn_to_page(migrate->src[i]);
+   bool remap = true;
 
if (!page)
continue;
 
-   lock_page(page);
-   migrate->src[i] |= MIGRATE_PFN_LOCKED;
+   if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
+   remap = false;
+   lock_page(page);
+   migrate->src[i] |= MIGRATE_PFN_LOCKED;
+   }
 
if (!PageLRU(page) && allow_drain) {
/* Drain CPU's pagevec */
@@ -2253,21 +2299,50 @@ static void migrate_vma_prepare(struct migrate_vma 
*migrate)
}
 
if (isolate_lru_page(page)) {
-   migrate->src[i] = 0;
-   unlock_page(page);
-   migrate->cpages--;
-

Re: [PATCH v4 1/6] clk: sunxi-ng: div: Add support for fixed post-divider

2017-06-28 Thread Priit Laes

On Tue, Jun 27, 2017 at 11:46:07AM +0200, Maxime Ripard wrote:
> Hi!
> 
> On Sun, Jun 25, 2017 at 11:45:42PM +0300, Priit Laes wrote:
> > SATA clock on sun4i/sun7i is of type (parent) / M / 6 where
> > 6 is fixed post-divider.
> > 
> > Signed-off-by: Priit Laes 
> > ---
> >  drivers/clk/sunxi-ng/ccu_div.c | 12 ++--
> >  drivers/clk/sunxi-ng/ccu_div.h |  3 ++-
> >  2 files changed, 12 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/clk/sunxi-ng/ccu_div.c b/drivers/clk/sunxi-ng/ccu_div.c
> > index c0e5c10..de30e15 100644
> > --- a/drivers/clk/sunxi-ng/ccu_div.c
> > +++ b/drivers/clk/sunxi-ng/ccu_div.c
> > @@ -62,8 +62,13 @@ static unsigned long ccu_div_recalc_rate(struct clk_hw 
> > *hw,
> > parent_rate = ccu_mux_helper_apply_prediv(>common, >mux, -1,
> >   parent_rate);
> >  
> > -   return divider_recalc_rate(hw, parent_rate, val, cd->div.table,
> > -  cd->div.flags);
> > +   val = divider_recalc_rate(hw, parent_rate, val, cd->div.table,
> > + cd->div.flags);
> > +
> > +   if (cd->common.features & CCU_FEATURE_FIXED_POSTDIV)
> > +   val /= cd->fixed_post_div;
> > +
> > +   return val;
> >  }
> >  
> >  static int ccu_div_determine_rate(struct clk_hw *hw,
> > @@ -89,6 +94,9 @@ static int ccu_div_set_rate(struct clk_hw *hw, unsigned 
> > long rate,
> > val = divider_get_val(rate, parent_rate, cd->div.table, cd->div.width,
> >   cd->div.flags);
> >  
> > +   if (cd->common.features & CCU_FEATURE_FIXED_POSTDIV)
> > +   val *= cd->fixed_post_div;
> > +
> > spin_lock_irqsave(cd->common.lock, flags);
> >  
> > reg = readl(cd->common.base + cd->common.reg);
> 
> You also need to adjust the round rate call back to take into account
> the post divider before calling divider_round_rate_parent, and after
> since that function can modify the parent_rate.

Is there a way to trigger this function? I don't see it getting called.

> > diff --git a/drivers/clk/sunxi-ng/ccu_div.h b/drivers/clk/sunxi-ng/ccu_div.h
> > index 08d0744..f3a5028 100644
> > --- a/drivers/clk/sunxi-ng/ccu_div.h
> > +++ b/drivers/clk/sunxi-ng/ccu_div.h
> > @@ -86,9 +86,10 @@ struct ccu_div_internal {
> >  struct ccu_div {
> > u32 enable;
> >  
> > -   struct ccu_div_internal div;
> > +   struct ccu_div_internal div;
> 
> Spurious change?

Nope, it was not indented the same way as other variables.
Should I send it as separate patch?

> 
> Thanks!
> Maxime
> 
> -- 
> Maxime Ripard, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com

[PATCH 08/15] mm/ZONE_DEVICE: special case put_page() for device private pages v2

2017-06-28 Thread Jérôme Glisse

A ZONE_DEVICE page that reach a refcount of 1 is free ie no longer
have any user. For device private pages this is important to catch
and thus we need to special case put_page() for this.

Changed since v1:
  - use static key to disable special code path in put_page() by
default
  - uninline put_zone_device_private_page()
  - fix build issues with some kernel config related to header
inter-dependency

Signed-off-by: Jérôme Glisse 
Cc: Kirill A. Shutemov 
Cc: Dan Williams 
Cc: Ross Zwisler 
---
 include/linux/memremap.h | 13 +
 include/linux/mm.h   | 31 ++-
 kernel/memremap.c| 19 ++-
 mm/hmm.c |  8 
 4 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 45a97f7a612f..57546a07a558 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -126,6 +126,14 @@ struct dev_pagemap {
 void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap);
 struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
+
+static inline bool is_zone_device_page(const struct page *page);
+
+static inline bool is_device_private_page(const struct page *page)
+{
+   return is_zone_device_page(page) &&
+   page->pgmap->type == MEMORY_DEVICE_PRIVATE;
+}
 #else
 static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
@@ -144,6 +152,11 @@ static inline struct dev_pagemap 
*find_dev_pagemap(resource_size_t phys)
 {
return NULL;
 }
+
+static inline bool is_device_private_page(const struct page *page)
+{
+   return false;
+}
 #endif
 
 /**
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d53add704a7d..330a216ac315 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct mempolicy;
 struct anon_vma;
@@ -788,25 +789,25 @@ static inline bool is_zone_device_page(const struct page 
*page)
 {
return page_zonenum(page) == ZONE_DEVICE;
 }
-
-static inline bool is_device_private_page(const struct page *page)
-{
-   /* See MEMORY_DEVICE_PRIVATE in include/linux/memory_hotplug.h */
-   return ((page_zonenum(page) == ZONE_DEVICE) &&
-   (page->pgmap->type == MEMORY_DEVICE_PRIVATE));
-}
 #else
 static inline bool is_zone_device_page(const struct page *page)
 {
return false;
 }
+#endif
 
-static inline bool is_device_private_page(const struct page *page)
+#ifdef CONFIG_DEVICE_PRIVATE
+void put_zone_device_private_page(struct page *page);
+#else
+static inline void put_zone_device_private_page(struct page *page)
 {
-   return false;
 }
 #endif
 
+static inline bool is_device_private_page(const struct page *page);
+
+DECLARE_STATIC_KEY_FALSE(device_private_key);
+
 static inline void get_page(struct page *page)
 {
page = compound_head(page);
@@ -822,6 +823,18 @@ static inline void put_page(struct page *page)
 {
page = compound_head(page);
 
+   /*
+* For private device pages we need to catch refcount transition from
+* 2 to 1, when refcount reach one it means the private device page is
+* free and we need to inform the device driver through callback. See
+* include/linux/memremap.h and HMM for details.
+*/
+   if (static_branch_unlikely(_private_key) &&
+   unlikely(is_device_private_page(page))) {
+   put_zone_device_private_page(page);
+   return;
+   }
+
if (put_page_testzero(page))
__put_page(page);
 }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index cd596d4a7356..b9baa6c07918 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -11,7 +11,6 @@
  * General Public License for more details.
  */
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -464,3 +463,21 @@ struct vmem_altmap *to_vmem_altmap(unsigned long 
memmap_start)
return pgmap ? pgmap->altmap : NULL;
 }
 #endif /* CONFIG_ZONE_DEVICE */
+
+
+#ifdef CONFIG_DEVICE_PRIVATE
+void put_zone_device_private_page(struct page *page)
+{
+   int count = page_ref_dec_return(page);
+
+   /*
+* If refcount is 1 then page is freed and refcount is stable as nobody
+* holds a reference on the page.
+*/
+   if (count == 1)
+   page->pgmap->page_free(page, page->pgmap->data);
+   else if (!count)
+   __put_page(page);
+}
+EXPORT_SYMBOL(put_zone_device_private_page);
+#endif /* CONFIG_DEVICE_PRIVATE */
diff --git a/mm/hmm.c b/mm/hmm.c
index e7d5a363d6e5..ff9011ef51f3 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -25,9 +25,17 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 
+/*
+ * Device private memory see HMM

[PATCH 05/15] mm/hmm/mirror: device page fault handler

2017-06-28 Thread Jérôme Glisse

This handle page fault on behalf of device driver, unlike handle_mm_fault()
it does not trigger migration back to system memory for device memory.

Signed-off-by: Jérôme Glisse 
Signed-off-by: Evgeny Baskakov 
Signed-off-by: John Hubbard 
Signed-off-by: Mark Hairgrove 
Signed-off-by: Sherry Cheung 
Signed-off-by: Subhash Gutti 
---
 include/linux/hmm.h |  27 ++
 mm/hmm.c| 256 +---
 2 files changed, 271 insertions(+), 12 deletions(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index f254856cd727..248a6e09cb56 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -292,6 +292,33 @@ int hmm_vma_get_pfns(struct vm_area_struct *vma,
 unsigned long end,
 hmm_pfn_t *pfns);
 bool hmm_vma_range_done(struct vm_area_struct *vma, struct hmm_range *range);
+
+
+/*
+ * Fault memory on behalf of device driver. Unlike handle_mm_fault(), this will
+ * not migrate any device memory back to system memory. The hmm_pfn_t array 
will
+ * be updated with the fault result and current snapshot of the CPU page table
+ * for the range.
+ *
+ * The mmap_sem must be taken in read mode before entering and it might be
+ * dropped by the function if the block argument is false. In that case, the
+ * function returns -EAGAIN.
+ *
+ * Return value does not reflect if the fault was successful for every single
+ * address or not. Therefore, the caller must to inspect the hmm_pfn_t array to
+ * determine fault status for each address.
+ *
+ * Trying to fault inside an invalid vma will result in -EINVAL.
+ *
+ * See the function description in mm/hmm.c for further documentation.
+ */
+int hmm_vma_fault(struct vm_area_struct *vma,
+ struct hmm_range *range,
+ unsigned long start,
+ unsigned long end,
+ hmm_pfn_t *pfns,
+ bool write,
+ bool block);
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 
diff --git a/mm/hmm.c b/mm/hmm.c
index 472d2370a242..e7d5a363d6e5 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -236,6 +236,36 @@ void hmm_mirror_unregister(struct hmm_mirror *mirror)
 }
 EXPORT_SYMBOL(hmm_mirror_unregister);
 
+struct hmm_vma_walk {
+   struct hmm_range*range;
+   unsigned long   last;
+   boolfault;
+   boolblock;
+   boolwrite;
+};
+
+static int hmm_vma_do_fault(struct mm_walk *walk,
+   unsigned long addr,
+   hmm_pfn_t *pfn)
+{
+   unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
+   struct hmm_vma_walk *hmm_vma_walk = walk->private;
+   struct vm_area_struct *vma = walk->vma;
+   int r;
+
+   flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
+   flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+   r = handle_mm_fault(vma, addr, flags);
+   if (r & VM_FAULT_RETRY)
+   return -EBUSY;
+   if (r & VM_FAULT_ERROR) {
+   *pfn = HMM_PFN_ERROR;
+   return -EFAULT;
+   }
+
+   return -EAGAIN;
+}
+
 static void hmm_pfns_special(hmm_pfn_t *pfns,
 unsigned long addr,
 unsigned long end)
@@ -259,34 +289,62 @@ static int hmm_pfns_bad(unsigned long addr,
return 0;
 }
 
+static void hmm_pfns_clear(hmm_pfn_t *pfns,
+  unsigned long addr,
+  unsigned long end)
+{
+   for (; addr < end; addr += PAGE_SIZE, pfns++)
+   *pfns = 0;
+}
+
 static int hmm_vma_walk_hole(unsigned long addr,
 unsigned long end,
 struct mm_walk *walk)
 {
-   struct hmm_range *range = walk->private;
+   struct hmm_vma_walk *hmm_vma_walk = walk->private;
+   struct hmm_range *range = hmm_vma_walk->range;
hmm_pfn_t *pfns = range->pfns;
unsigned long i;
 
+   hmm_vma_walk->last = addr;
i = (addr - range->start) >> PAGE_SHIFT;
-   for (; addr < end; addr += PAGE_SIZE, i++)
+   for (; addr < end; addr += PAGE_SIZE, i++) {
pfns[i] = HMM_PFN_EMPTY;
+   if (hmm_vma_walk->fault) {
+   int ret;
 
-   return 0;
+   ret = hmm_vma_do_fault(walk, addr, [i]);
+   if (ret != -EAGAIN)
+   return ret;
+   }
+   }
+
+   return hmm_vma_walk->fault ? -EAGAIN : 0;
 }
 
 static int hmm_vma_walk_clear(unsigned long addr,
  unsigned long end,
  struct mm_walk *walk)
 {
-   struct hmm_range *range = walk->private;
+   struct hmm_vma_walk *hmm_vma_walk = walk->private;
+   struct hmm_range

[PATCH 10/15] mm/hmm/devmem: dummy HMM device for ZONE_DEVICE memory v3

2017-06-28 Thread Jérôme Glisse

This introduce a dummy HMM device class so device driver can use it to
create hmm_device for the sole purpose of registering device memory.
It is useful to device driver that want to manage multiple physical
device memory under same struct device umbrella.

Changed since v2:
  - use device_initcall() and drop everything that is module specific
Changed since v1:
  - Improve commit message
  - Add drvdata parameter to set on struct device

Signed-off-by: Jérôme Glisse 
Signed-off-by: Evgeny Baskakov 
Signed-off-by: John Hubbard 
Signed-off-by: Mark Hairgrove 
Signed-off-by: Sherry Cheung 
Signed-off-by: Subhash Gutti 
---
 include/linux/hmm.h | 22 +-
 mm/hmm.c| 88 +
 2 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 2fd539b0eab3..458d0d6d82f3 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -72,11 +72,11 @@
 
 #if IS_ENABLED(CONFIG_HMM)
 
+#include 
 #include 
 #include 
 #include 
 
-
 struct hmm;
 
 /*
@@ -423,6 +423,26 @@ static inline unsigned long 
hmm_devmem_page_get_drvdata(struct page *page)
 
return drvdata[1];
 }
+
+
+/*
+ * struct hmm_device - fake device to hang device memory onto
+ *
+ * @device: device struct
+ * @minor: device minor number
+ */
+struct hmm_device {
+   struct device   device;
+   unsigned intminor;
+};
+
+/*
+ * A device driver that wants to handle multiple devices memory through a
+ * single fake device can use hmm_device to do so. This is purely a helper and
+ * it is not strictly needed, in order to make use of any HMM functionality.
+ */
+struct hmm_device *hmm_device_new(void *drvdata);
+void hmm_device_put(struct hmm_device *hmm_device);
 #endif /* IS_ENABLED(CONFIG_DEVICE_PRIVATE) */
 
 
diff --git a/mm/hmm.c b/mm/hmm.c
index ce403b7a7ee3..4e01c9ba9cc1 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -19,6 +19,7 @@
  */
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1102,4 +1103,91 @@ void hmm_devmem_remove(struct hmm_devmem *devmem)
devm_release_mem_region(device, start, size);
 }
 EXPORT_SYMBOL(hmm_devmem_remove);
+
+/*
+ * A device driver that wants to handle multiple devices memory through a
+ * single fake device can use hmm_device to do so. This is purely a helper
+ * and it is not needed to make use of any HMM functionality.
+ */
+#define HMM_DEVICE_MAX 256
+
+static DECLARE_BITMAP(hmm_device_mask, HMM_DEVICE_MAX);
+static DEFINE_SPINLOCK(hmm_device_lock);
+static struct class *hmm_device_class;
+static dev_t hmm_device_devt;
+
+static void hmm_device_release(struct device *device)
+{
+   struct hmm_device *hmm_device;
+
+   hmm_device = container_of(device, struct hmm_device, device);
+   spin_lock(_device_lock);
+   clear_bit(hmm_device->minor, hmm_device_mask);
+   spin_unlock(_device_lock);
+
+   kfree(hmm_device);
+}
+
+struct hmm_device *hmm_device_new(void *drvdata)
+{
+   struct hmm_device *hmm_device;
+   int ret;
+
+   hmm_device = kzalloc(sizeof(*hmm_device), GFP_KERNEL);
+   if (!hmm_device)
+   return ERR_PTR(-ENOMEM);
+
+   ret = alloc_chrdev_region(_device->device.devt, 0, 1, "hmm_device");
+   if (ret < 0) {
+   kfree(hmm_device);
+   return NULL;
+   }
+
+   spin_lock(_device_lock);
+   hmm_device->minor = find_first_zero_bit(hmm_device_mask, 
HMM_DEVICE_MAX);
+   if (hmm_device->minor >= HMM_DEVICE_MAX) {
+   spin_unlock(_device_lock);
+   kfree(hmm_device);
+   return NULL;
+   }
+   set_bit(hmm_device->minor, hmm_device_mask);
+   spin_unlock(_device_lock);
+
+   dev_set_name(_device->device, "hmm_device%d", hmm_device->minor);
+   hmm_device->device.devt = MKDEV(MAJOR(hmm_device_devt),
+   hmm_device->minor);
+   hmm_device->device.release = hmm_device_release;
+   dev_set_drvdata(_device->device, drvdata);
+   hmm_device->device.class = hmm_device_class;
+   device_initialize(_device->device);
+
+   return hmm_device;
+}
+EXPORT_SYMBOL(hmm_device_new);
+
+void hmm_device_put(struct hmm_device *hmm_device)
+{
+   put_device(_device->device);
+}
+EXPORT_SYMBOL(hmm_device_put);
+
+static int __init hmm_init(void)
+{
+   int ret;
+
+   ret = alloc_chrdev_region(_device_devt, 0,
+ HMM_DEVICE_MAX,
+ "hmm_device");
+   if (ret)
+   return ret;
+
+   hmm_device_class = class_create(THIS_MODULE, "hmm_device");
+   if (IS_ERR(hmm_device_class)) {
+   unregister_chrdev_region(hmm_device_devt, HMM_DEVICE_MAX);
+   return PTR_ERR(hmm_device_class);
+   }
+   return 0;
+}
+

[PATCH 11/15] mm/migrate: new migrate mode MIGRATE_SYNC_NO_COPY

2017-06-28 Thread Jérôme Glisse

Introduce a new migration mode that allow to offload the copy to
a device DMA engine. This changes the workflow of migration and
not all address_space migratepage callback can support this. So
it needs to be tested in those cases.

This is intended to be use by migrate_vma() which itself is use
for thing like HMM (see include/linux/hmm.h).

Signed-off-by: Jérôme Glisse 
---
 fs/aio.c |  8 +++
 fs/f2fs/data.c   |  5 -
 fs/hugetlbfs/inode.c |  5 -
 fs/ubifs/file.c  |  5 -
 include/linux/migrate.h  |  5 +
 include/linux/migrate_mode.h |  5 +
 mm/balloon_compaction.c  |  8 +++
 mm/migrate.c | 52 ++--
 mm/zsmalloc.c|  8 +++
 9 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 34027b67e2f4..e908a30a1c8a 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -373,6 +373,14 @@ static int aio_migratepage(struct address_space *mapping, 
struct page *new,
pgoff_t idx;
int rc;
 
+   /*
+* We cannot support the _NO_COPY case here, because copy needs to
+* happen under the ctx->completion_lock. That does not work with the
+* migration workflow of MIGRATE_SYNC_NO_COPY.
+*/
+   if (mode == MIGRATE_SYNC_NO_COPY)
+   return -EINVAL;
+
rc = 0;
 
/* mapping->private_lock here protects against the kioctx teardown.  */
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7697d03e8a98..1d441c091a38 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2235,7 +2235,10 @@ int f2fs_migrate_page(struct address_space *mapping,
SetPagePrivate(newpage);
set_page_private(newpage, page_private(page));
 
-   migrate_page_copy(newpage, page);
+   if (mode != MIGRATE_SYNC_NO_COPY)
+   migrate_page_copy(newpage, page);
+   else
+   migrate_page_states(newpage, page);
 
return MIGRATEPAGE_SUCCESS;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 52388611635e..1db9c8df886d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -846,7 +846,10 @@ static int hugetlbfs_migrate_page(struct address_space 
*mapping,
rc = migrate_huge_page_move_mapping(mapping, newpage, page);
if (rc != MIGRATEPAGE_SUCCESS)
return rc;
-   migrate_page_copy(newpage, page);
+   if (mode != MIGRATE_SYNC_NO_COPY)
+   migrate_page_copy(newpage, page);
+   else
+   migrate_page_states(newpage, page);
 
return MIGRATEPAGE_SUCCESS;
 }
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2cda3d67e2d0..b2292be50de0 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1482,7 +1482,10 @@ static int ubifs_migrate_page(struct address_space 
*mapping,
SetPagePrivate(newpage);
}
 
-   migrate_page_copy(newpage, page);
+   if (mode != MIGRATE_SYNC_NO_COPY)
+   migrate_page_copy(newpage, page);
+   else
+   migrate_page_states(newpage, page);
return MIGRATEPAGE_SUCCESS;
 }
 #endif
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3e0d405dc842..e646ae44077d 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -59,6 +59,7 @@ extern void putback_movable_page(struct page *page);
 
 extern int migrate_prep(void);
 extern int migrate_prep_local(void);
+extern void migrate_page_states(struct page *newpage, struct page *page);
 extern void migrate_page_copy(struct page *newpage, struct page *page);
 extern int migrate_huge_page_move_mapping(struct address_space *mapping,
  struct page *newpage, struct page *page);
@@ -79,6 +80,10 @@ static inline int isolate_movable_page(struct page *page, 
isolate_mode_t mode)
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
 
+static inline void migrate_page_states(struct page *newpage, struct page *page)
+{
+}
+
 static inline void migrate_page_copy(struct page *newpage,
 struct page *page) {}
 
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index ebf3d89a3919..bdf66af9b937 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -6,11 +6,16 @@
  * on most operations but not ->writepage as the potential stall time
  * is too significant
  * MIGRATE_SYNC will block when migrating pages
+ * MIGRATE_SYNC_NO_COPY will block when migrating pages but will not copy pages
+ * with the CPU. Instead, page copy happens outside the migratepage()
+ * callback and is likely using a DMA engine. See migrate_vma() and HMM
+ * (mm/hmm.c) for users of this mode.
  */
 enum migrate_mode {
MIGRATE_ASYNC,
MIGRATE_SYNC_LIGHT,
MIGRATE_SYNC,
+   MIGRATE_SYNC_NO_COPY,
 };
 
 #endif /*

Re: [lttng-dev] Clarification on SCHED_FIFO support

2017-06-28 Thread Steven Rostedt

On Wed, 28 Jun 2017 18:22:38 + (UTC)
Mathieu Desnoyers  wrote:

> - On Jun 27, 2017, at 7:36 PM, Sukanya Sekar sse...@andrew.cmu.edu wrote:
> 
> > Greetings!
> > We are exploring the LTTng Kernel Tracer (version 2.9) on Ubuntu 16.04. We 
> > are
> > particularly interested in real-time applications. As far as we explored, we
> > couldn't find support for SCHED_FIFO events. Also, we found a comment in the
> > source code (.../sched.h) indicating that the sched_stat support to
> > SCHED_FIFO/RR is yet to be added.  
> 
> > So we would like to confirm if SCHED_FIFO isn't supported in the latest 
> > version
> > of the tool.  
> 
> This is a problem in the upstream Linux kernel instrumentation.
> 
> I am assuming you refer to the sched_stat_template event class, used by 
> sched_stat_wait, sched_stat_sleep, sched_stat_iowait, and sched_stat_blocked 
> events. For those specific events, LTTng modules has the same limitation as 
> the Linux kernel scheduler instrumentation, where we find this comment 
> (include/trace/events/sched.h): 
> 
> /* 
> * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE 
> * adding sched_stat support to SCHED_FIFO/RR would be welcome. 
> */ 
> 
> This is not specific to LTTng. Ftrace, Perf, and SystemTAP have the same 
> limitation 
> when using this scheduler tracepoint. 
> 
> In the past year, Julien Desfossez proposed enhancements to the Linux kernel 
> scheduler instrumentation [1], but there are still some disagreements on how
> to expose the new interfaces to user-space. 
> 
> Through a follow up IRC discussion we had with Peter Zijlstra and Steven 
> Rostedt,
> we found out that they have opinions on how to evolve the kernel ABI exposed 
> by
> Ftrace and Perf (keeping a single event for sched_switch, not having unneeded
> information in the event, exposing multiple numbered event format files 
> format,
> format2, format3..., cumulative enabling semantic (enabling format3 enables 
> format2 and 
> format), and so on), but the set of requirements is still unclear, and they 
> have not 
> formulated an ABI proposal that those involved generally agree on. 
> 
> IMHO, part of the issue here is to mistake kernel ABIs for end user tooling 
> interfaces. 
> If, for instance, trace-cmd exposes the new sched_switch_{rt,fail,dl} events 
> as a single 
> synthetic sched_switch event from a user perspective, what do we really gain 
> by
> complexifying the kernel ABI to still allow enabling a single event instead 
> of 3 using
> "echo" from a shell ?
> 
> As soon as the Linux kernel adds the proper instrumentation to deal with 
> those 
> scheduler policies, we will add support for it in LTTng (only for the newer 
> kernels 
> that contain that instrumentation, of course). 
> 

You can propose settling this as a [TECH TOPIC] for kernel summit in
Prague.

-- Steve

Re: [PATCH v3 net-next 03/12] nfp: change bpf verifier hooks to match new verifier data structures

2017-06-28 Thread Daniel Borkmann


On 06/27/2017 02:57 PM, Edward Cree wrote:

Signed-off-by: Edward Cree 


Acked-by: Daniel Borkmann

Re: [PATCH 1/2] irqchip: RISC-V Local Interrupt Controller Driver

2017-06-28 Thread Thomas Gleixner

On Mon, 26 Jun 2017, Palmer Dabbelt wrote:
> +DEFINE_PER_CPU(struct riscv_irq_data, riscv_irq_data);
> +DEFINE_PER_CPU(atomic_long_t, riscv_early_sie);
> +
> +static void riscv_software_interrupt(void)
> +{
> +#ifdef CONFIG_SMP
> + irqreturn_t ret;
> +
> + ret = handle_ipi();
> + if (ret != IRQ_NONE)
> + return;
> +#endif
> +
> + BUG();

Are you sure you want to crash the system just because a spurious interrupt
happened?

> +}
> +
> +asmlinkage void __irq_entry do_IRQ(unsigned int cause, struct pt_regs *regs)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> + struct irq_domain *domain;
> +
> + irq_enter();
> +
> + /* There are three classes of interrupt: timer, software, and

Please use proper multiline comment formatting:

/*
 * bla.
 * foo.
 */

> +  * external devices.  We dispatch between them here.  External
> +  * device interrupts use the generic IRQ mechanisms.
> +  */
> + switch (cause) {
> + case INTERRUPT_CAUSE_TIMER:
> + riscv_timer_interrupt();
> + break;
> + case INTERRUPT_CAUSE_SOFTWARE:
> + riscv_software_interrupt();
> + break;
> + default:
> + domain = per_cpu(riscv_irq_data, smp_processor_id()).domain;
> + generic_handle_irq(irq_find_mapping(domain, cause));
> + break;
> + }
> +
> + irq_exit();
> + set_irq_regs(old_regs);
> +}
> +
> +static int riscv_irqdomain_map(struct irq_domain *d, unsigned int irq,
> +irq_hw_number_t hwirq)
> +{
> + struct riscv_irq_data *data = d->host_data;
> +
> + irq_set_chip_and_handler(irq, >chip, handle_simple_irq);
> + irq_set_chip_data(irq, data);
> + irq_set_noprobe(irq);
> +
> + return 0;
> +}
> +
> +static const struct irq_domain_ops riscv_irqdomain_ops = {
> + .map= riscv_irqdomain_map,
> + .xlate  = irq_domain_xlate_onecell,
> +};
> +
> +static void riscv_irq_mask(struct irq_data *d)
> +{
> + struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> + BUG_ON(smp_processor_id() != data->hart);

Crashing the machine is the last resort if there is no chance to handle a
situation gracefully. Something like

if (WARN_ON_ONCE(smp_processor_id() != data->hart))
do_something_sensible();
else
.

might at least keep the machine halfways functional for debugging.

> + csr_clear(sie, 1 << (long)d->hwirq);
> +}
> +
> +static void riscv_irq_unmask(struct irq_data *d)
> +{
> + struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> + BUG_ON(smp_processor_id() != data->hart);
> + csr_set(sie, 1 << (long)d->hwirq);
> +}
> +
> +static void riscv_irq_enable_helper(void *d)
> +{
> + riscv_irq_unmask(d);
> +}
> +
> +static void riscv_irq_enable(struct irq_data *d)
> +{
> + struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> + /* There are two phases to setting up an interrupt: first we set a bit
> +  * in this bookkeeping structure, which is used by trap_init to
> +  * initialize SIE for each hart as it comes up.

And what exactly has this to do with irq_enable()? Why would you call that
for an interrupt which solely goes to a offline cpu?

> +static void riscv_irq_disable(struct irq_data *d)
> +{
> + struct riscv_irq_data *data = irq_data_get_irq_chip_data(d);
> +
> + /* This is the mirror of riscv_irq_enable. */
> + atomic_long_and(~(1 << (long)d->hwirq),
> + _cpu(riscv_early_sie, data->hart));
> + if (data->hart == smp_processor_id())
> + riscv_irq_mask(d);
> + else if (cpu_online(data->hart))
> + smp_call_function_single(data->hart,
> +  riscv_irq_disable_helper,
> +  d,
> +  true);

Same question as above.

> +}
> +
> +static void riscv_irq_mask_noop(struct irq_data *d) { }
> +
> +static void riscv_irq_unmask_noop(struct irq_data *d) { }
> 
> +static void riscv_irq_enable_noop(struct irq_data *d)
> +{
> + struct device_node *data = irq_data_get_irq_chip_data(d);
> + u32 hart;
> +
> + if (!of_property_read_u32(data, "reg", ))
> + printk(
> +   KERN_WARNING "enabled interrupt %d for missing hart %d (this 
> interrupt has no handler)\n",

Has no handler? I really have a hard time to understand the logic here.

> +   (int)d->hwirq, hart);
> +}
> +
> +static struct irq_chip riscv_noop_chip = {
> + .name = "riscv,cpu-intc,noop",
> + .irq_mask = riscv_irq_mask_noop,
> + .irq_unmask = riscv_irq_unmask_noop,
> + .irq_enable = riscv_irq_enable_noop,

Please write that in tabular fashion:

.name   = "riscv,cpu-intc,noop",
.irq_mask   = riscv_irq_mask_noop,

> +};
> +
> +static int riscv_irqdomain_map_noop(struct

[tip:x86/platform] x86/PCI: Remove duplicate defines

2017-06-28 Thread tip-bot for Thomas Gleixner

Commit-ID:  9304d1621e6019c63497f8a4aad09d003916dbe9
Gitweb: http://git.kernel.org/tip/9304d1621e6019c63497f8a4aad09d003916dbe9
Author: Thomas Gleixner 
AuthorDate: Thu, 16 Mar 2017 22:50:03 +0100
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Jun 2017 22:32:55 +0200

x86/PCI: Remove duplicate defines

For some historic reason these defines are duplicated and also available in
arch/x86/include/asm/pci_x86.h,

Remove them.

Signed-off-by: Thomas Gleixner 
Acked-by: Bjorn Helgaas 
Cc: Andi Kleen 
Cc: Peter Zijlstra 
Cc: Stephane Eranian 
Cc: Borislav Petkov 
Cc: linux-...@vger.kernel.org
Link: http://lkml.kernel.org/r/20170316215056.967808...@linutronix.de
Signed-off-by: Thomas Gleixner 

---
 arch/x86/include/asm/pci.h | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f513cc2..473a729 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -77,14 +77,8 @@ static inline bool is_vmd(struct pci_bus *bus)
 
 extern unsigned int pcibios_assign_all_busses(void);
 extern int pci_legacy_init(void);
-# ifdef CONFIG_ACPI
-#  define x86_default_pci_init pci_acpi_init
-# else
-#  define x86_default_pci_init pci_legacy_init
-# endif
 #else
-# define pcibios_assign_all_busses()   0
-# define x86_default_pci_init  NULL
+static inline int pcibios_assign_all_busses(void) { return 0; }
 #endif
 
 extern unsigned long pci_mem_start;

[tip:irq/core] blk-mq: Include all present CPUs in the default queue mapping

2017-06-28 Thread tip-bot for Christoph Hellwig

Commit-ID:  5f042e7cbd9ebd3580077dcdc21f35e68c2adf5f
Gitweb: http://git.kernel.org/tip/5f042e7cbd9ebd3580077dcdc21f35e68c2adf5f
Author: Christoph Hellwig 
AuthorDate: Mon, 26 Jun 2017 12:20:56 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Jun 2017 23:00:06 +0200

blk-mq: Include all present CPUs in the default queue mapping

This way we get a nice distribution independent of the current cpu
online / offline state.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Jens Axboe 
Cc: Keith Busch 
Cc: linux-bl...@vger.kernel.org
Cc: linux-n...@lists.infradead.org
Link: http://lkml.kernel.org/r/20170626102058.10200-2-...@lst.de
Signed-off-by: Thomas Gleixner 

---
 block/blk-mq-cpumap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 8e61e86..5eaecd4 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -35,7 +35,6 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 {
unsigned int *map = set->mq_map;
unsigned int nr_queues = set->nr_hw_queues;
-   const struct cpumask *online_mask = cpu_online_mask;
unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
cpumask_var_t cpus;
 
@@ -44,7 +43,7 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 
cpumask_clear(cpus);
nr_cpus = nr_uniq_cpus = 0;
-   for_each_cpu(i, online_mask) {
+   for_each_present_cpu(i) {
nr_cpus++;
first_sibling = get_first_sibling(i);
if (!cpumask_test_cpu(first_sibling, cpus))
@@ -54,7 +53,7 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set)
 
queue = 0;
for_each_possible_cpu(i) {
-   if (!cpumask_test_cpu(i, online_mask)) {
+   if (!cpumask_test_cpu(i, cpu_present_mask)) {
map[i] = 0;
continue;
}

[tip:irq/core] blk-mq: Create hctx for each present CPU

2017-06-28 Thread tip-bot for Christoph Hellwig

Commit-ID:  4b855ad37194f7bdbb200ce7a1c7051fecb56a08
Gitweb: http://git.kernel.org/tip/4b855ad37194f7bdbb200ce7a1c7051fecb56a08
Author: Christoph Hellwig 
AuthorDate: Mon, 26 Jun 2017 12:20:57 +0200
Committer:  Thomas Gleixner 
CommitDate: Wed, 28 Jun 2017 23:00:07 +0200

blk-mq: Create hctx for each present CPU

Currently we only create hctx for online CPUs, which can lead to a lot
of churn due to frequent soft offline / online operations.  Instead
allocate one for each present CPU to avoid this and dramatically simplify
the code.

Signed-off-by: Christoph Hellwig 
Reviewed-by: Jens Axboe 
Cc: Keith Busch 
Cc: linux-bl...@vger.kernel.org
Cc: linux-n...@lists.infradead.org
Link: http://lkml.kernel.org/r/20170626102058.10200-3-...@lst.de
Signed-off-by: Thomas Gleixner 

---
 block/blk-mq.c | 120 +
 block/blk-mq.h |   5 --
 include/linux/cpuhotplug.h |   1 -
 3 files changed, 11 insertions(+), 115 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96..dd390e2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -37,9 +37,6 @@
 #include "blk-wbt.h"
 #include "blk-mq-sched.h"
 
-static DEFINE_MUTEX(all_q_mutex);
-static LIST_HEAD(all_q_list);
-
 static void blk_mq_poll_stats_start(struct request_queue *q);
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
 static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
@@ -1975,8 +1972,8 @@ static void blk_mq_init_cpu_queues(struct request_queue 
*q,
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
 
-   /* If the cpu isn't online, the cpu is mapped to first hctx */
-   if (!cpu_online(i))
+   /* If the cpu isn't present, the cpu is mapped to first hctx */
+   if (!cpu_present(i))
continue;
 
hctx = blk_mq_map_queue(q, i);
@@ -2019,8 +2016,7 @@ static void blk_mq_free_map_and_requests(struct 
blk_mq_tag_set *set,
}
 }
 
-static void blk_mq_map_swqueue(struct request_queue *q,
-  const struct cpumask *online_mask)
+static void blk_mq_map_swqueue(struct request_queue *q)
 {
unsigned int i, hctx_idx;
struct blk_mq_hw_ctx *hctx;
@@ -2038,13 +2034,11 @@ static void blk_mq_map_swqueue(struct request_queue *q,
}
 
/*
-* Map software to hardware queues
+* Map software to hardware queues.
+*
+* If the cpu isn't present, the cpu is mapped to first hctx.
 */
-   for_each_possible_cpu(i) {
-   /* If the cpu isn't online, the cpu is mapped to first hctx */
-   if (!cpumask_test_cpu(i, online_mask))
-   continue;
-
+   for_each_present_cpu(i) {
hctx_idx = q->mq_map[i];
/* unmapped hw queue can be remapped after CPU topo changed */
if (!set->tags[hctx_idx] &&
@@ -2330,16 +2324,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct 
blk_mq_tag_set *set,
blk_queue_softirq_done(q, set->ops->complete);
 
blk_mq_init_cpu_queues(q, set->nr_hw_queues);
-
-   get_online_cpus();
-   mutex_lock(_q_mutex);
-
-   list_add_tail(>all_q_node, _q_list);
blk_mq_add_queue_tag_set(set, q);
-   blk_mq_map_swqueue(q, cpu_online_mask);
-
-   mutex_unlock(_q_mutex);
-   put_online_cpus();
+   blk_mq_map_swqueue(q);
 
if (!(set->flags & BLK_MQ_F_NO_SCHED)) {
int ret;
@@ -2365,18 +2351,12 @@ void blk_mq_free_queue(struct request_queue *q)
 {
struct blk_mq_tag_set   *set = q->tag_set;
 
-   mutex_lock(_q_mutex);
-   list_del_init(>all_q_node);
-   mutex_unlock(_q_mutex);
-
blk_mq_del_queue_tag_set(q);
-
blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 }
 
 /* Basically redo blk_mq_init_queue with queue frozen */
-static void blk_mq_queue_reinit(struct request_queue *q,
-   const struct cpumask *online_mask)
+static void blk_mq_queue_reinit(struct request_queue *q)
 {
WARN_ON_ONCE(!atomic_read(>mq_freeze_depth));
 
@@ -2389,76 +2369,12 @@ static void blk_mq_queue_reinit(struct request_queue *q,
 * involves free and re-allocate memory, worthy doing?)
 */
 
-   blk_mq_map_swqueue(q, online_mask);
+   blk_mq_map_swqueue(q);
 
blk_mq_sysfs_register(q);
blk_mq_debugfs_register_hctxs(q);
 }
 
-/*
- * New online cpumask which is going to be set in this hotplug event.
- * Declare this cpumasks as global as cpu-hotplug operation is invoked
- * one-by-one and dynamically allocating this could result in a failure.
- */
-static struct cpumask cpuhp_online_new;
-
-static void blk_mq_queue_reinit_work(void)
-{
-   struct request_queue *q;
-
-

[PATCH v5 2/2]Input: Add driver for Goodix GTx5 series touchscreen

2017-06-28 Thread Wang Yafei

This driver is for Goodix GTx5 series touchscreen controllers
such as GT8589, GT7589. This driver designed with hierarchical structure,
for that can be modified to support subsequent controllers easily.
Some zones of the touchscreen can be set to buttons(according to the
hardware). That is why it handles button and multitouch events.

A brief description of driver structure
- Core Layer: This layer responsible for basic input events report,
  GPIO pinctrl, Interrupt, Power resources manager and sub-modules manager.
- Hardware Layer: This layer responsible for controllers initialization,
  irq handle as well as bus read/write.
- External Module Layer: This layer used for support more features
  such as firmware update, debug tools and gesture wakeup.

Signed-off-by: Wang Yafei 
---
Changes in v2:
- replace touchscreen properties according to the description in
  Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
  - Droped all compat stuff for older kernels
  - Removed Android stuff (EARLY_SUSPEND, CONFIG_FB)
  - Use device_property_read_*  get device properties
  - Use get-unaligned_*() API
  - Use dev_err() dev_dbg() for logging
  - Remove pinctrl functions
  - Remove some unused functions

Changes in v3:
  - Modify Kconfig

Changes in v4:
  - Unify functions name and file name
  - Recheck code spell and style problems
  - Modify code comments
  - Remove firmware update and debug tool modules

Changes in v5:
  - Modify DT related functions
  - Remove irq-gpio
---
 drivers/input/touchscreen/Kconfig |   12 +
 drivers/input/touchscreen/Makefile|1 +
 drivers/input/touchscreen/gtx5_core.c | 1259 +
 drivers/input/touchscreen/gtx5_core.h |  398 +++
 drivers/input/touchscreen/gtx5_i2c.c  |  802 +
 5 files changed, 2472 insertions(+)
 create mode 100644 drivers/input/touchscreen/gtx5_core.c
 create mode 100644 drivers/input/touchscreen/gtx5_core.h
 create mode 100644 drivers/input/touchscreen/gtx5_i2c.c

diff --git a/drivers/input/touchscreen/Kconfig 
b/drivers/input/touchscreen/Kconfig
index cf26ca4..e879af8 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -344,6 +344,18 @@ config TOUCHSCREEN_GOODIX
  To compile this driver as a module, choose M here: the
  module will be called goodix.
 
+config TOUCHSCREEN_GTX5
+   tristate "Goodix GTx5 touchscreen"
+   depends on I2C && OF
+   depends on GPIOLIB
+   help
+ Say Y here if you have a touchscreen using Goodix GTx5 series
+ controller.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here
+
 config TOUCHSCREEN_ILI210X
tristate "Ilitek ILI210X based touchscreen"
depends on I2C
diff --git a/drivers/input/touchscreen/Makefile 
b/drivers/input/touchscreen/Makefile
index 18e4769..00643d4 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_TOUCHSCREEN_EGALAX)  += egalax_ts.o
 obj-$(CONFIG_TOUCHSCREEN_EGALAX_SERIAL)+= egalax_ts_serial.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)  += fujitsu_ts.o
 obj-$(CONFIG_TOUCHSCREEN_GOODIX)   += goodix.o
+obj-$(CONFIG_TOUCHSCREEN_GTX5) += gtx5_i2c.o gtx5_core.o
 obj-$(CONFIG_TOUCHSCREEN_ILI210X)  += ili210x.o
 obj-$(CONFIG_TOUCHSCREEN_IMX6UL_TSC)   += imx6ul_tsc.o
 obj-$(CONFIG_TOUCHSCREEN_INEXIO)   += inexio.o
diff --git a/drivers/input/touchscreen/gtx5_core.c 
b/drivers/input/touchscreen/gtx5_core.c
new file mode 100644
index 000..72a58ce
--- /dev/null
+++ b/drivers/input/touchscreen/gtx5_core.c
@@ -0,0 +1,1259 @@
+/*
+ * Goodix GTx5 Touchscreen Driver
+ * Core layer of gtx5 touchscreen driver.
+ *
+ * Copyright (C) 2015 - 2016 Goodix, Inc.
+ *
+ * Authors:  Wang Yafei 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2 of the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "gtx5_core.h"
+
+#define INPUT_TYPE_B_PROTOCOL
+
+#define GOOIDX_INPUT_PHYS  "gtx5_ts/input0"
+#define PINCTRL_STATE_ACTIVE"pmx_ts_active"
+#define PINCTRL_STATE_SUSPEND   "pmx_ts_suspend"
+
+/*
+ * struct gtx5_modules - external modules container
+ * @head: external modules list
+ * @initialized: whether this struct is initialized
+ * @mutex: module mutex lock
+ * @count: current number of registered external module
+ * @wq: workqueue for module register work
+ * @core_exit: if gtx5 touch core exit, then no
+ *   registration is allowed.
+ * @core_data: core_data pointer
+ */
+struct gtx5_modules {
+   struct list_head head;
+   bool initialized;
+   struct mutex mutex;
+   unsigned int count;
+   struct workqueue_struct *wq;
+   bool core_exit;
+

[PATCH] mm: convert three more cases to kvmalloc

2017-06-28 Thread Mikulas Patocka

Hi

I'm submitting this for the next merge window.

Mikulas



From: Mikulas Patocka 

The patch a7c3e901 ("mm: introduce kv[mz]alloc helpers") converted a lot 
of kernel code to kvmalloc. This patch converts three more forgotten 
cases.

Signed-off-by: Mikulas Patocka 

---
 fs/file.c |   12 +---
 kernel/bpf/syscall.c  |   11 +--
 kernel/cgroup/cgroup-v1.c |7 +--
 3 files changed, 3 insertions(+), 27 deletions(-)

Index: linux-2.6/fs/file.c
===
--- linux-2.6.orig/fs/file.c
+++ linux-2.6/fs/file.c
@@ -32,17 +32,7 @@ unsigned int sysctl_nr_open_max =
 
 static void *alloc_fdmem(size_t size)
 {
-   /*
-* Very large allocations can stress page reclaim, so fall back to
-* vmalloc() if the allocation size will be considered "large" by the 
VM.
-*/
-   if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-   void *data = kmalloc(size, GFP_KERNEL_ACCOUNT |
-__GFP_NOWARN | __GFP_NORETRY);
-   if (data != NULL)
-   return data;
-   }
-   return __vmalloc(size, GFP_KERNEL_ACCOUNT, PAGE_KERNEL);
+   return kvmalloc(size, GFP_KERNEL_ACCOUNT);
 }
 
 static void __free_fdtable(struct fdtable *fdt)
Index: linux-2.6/kernel/bpf/syscall.c
===
--- linux-2.6.orig/kernel/bpf/syscall.c
+++ linux-2.6/kernel/bpf/syscall.c
@@ -58,16 +58,7 @@ void *bpf_map_area_alloc(size_t size)
 * trigger under memory pressure as we really just want to
 * fail instead.
 */
-   const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
-   void *area;
-
-   if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
-   area = kmalloc(size, GFP_USER | flags);
-   if (area != NULL)
-   return area;
-   }
-
-   return __vmalloc(size, GFP_KERNEL | flags, PAGE_KERNEL);
+   return kvmalloc(size, GFP_USER | __GFP_NOWARN | __GFP_NORETRY | 
__GFP_ZERO);
 }
 
 void bpf_map_area_free(void *area)
Index: linux-2.6/kernel/cgroup/cgroup-v1.c
===
--- linux-2.6.orig/kernel/cgroup/cgroup-v1.c
+++ linux-2.6/kernel/cgroup/cgroup-v1.c
@@ -184,15 +184,10 @@ struct cgroup_pidlist {
 /*
  * The following two functions "fix" the issue where there are more pids
  * than kmalloc will give memory for; in such cases, we use vmalloc/vfree.
- * TODO: replace with a kernel-wide solution to this problem
  */
-#define PIDLIST_TOO_LARGE(c) ((c) * sizeof(pid_t) > (PAGE_SIZE * 2))
 static void *pidlist_allocate(int count)
 {
-   if (PIDLIST_TOO_LARGE(count))
-   return vmalloc(count * sizeof(pid_t));
-   else
-   return kmalloc(count * sizeof(pid_t), GFP_KERNEL);
+   return kvmalloc(count * sizeof(pid_t), GFP_KERNEL);
 }
 
 static void pidlist_free(void *p)

[PATCH v4 2/2] fs/dcache.c: fix spin lockup issue on nlru->lock

2017-06-28 Thread Sahitya Tummala

__list_lru_walk_one() acquires nlru spin lock (nlru->lock) for
longer duration if there are more number of items in the lru list.
As per the current code, it can hold the spin lock for upto maximum
UINT_MAX entries at a time. So if there are more number of items in
the lru list, then "BUG: spinlock lockup suspected" is observed in
the below path -

[] spin_bug+0x90
[] do_raw_spin_lock+0xfc
[] _raw_spin_lock+0x28
[] list_lru_add+0x28
[] dput+0x1c8
[] path_put+0x20
[] terminate_walk+0x3c
[] path_lookupat+0x100
[] filename_lookup+0x6c
[] user_path_at_empty+0x54
[] SyS_faccessat+0xd0
[] el0_svc_naked+0x24

This nlru->lock is acquired by another CPU in this path -

[] d_lru_shrink_move+0x34
[] dentry_lru_isolate_shrink+0x48
[] __list_lru_walk_one.isra.10+0x94
[] list_lru_walk_node+0x40
[] shrink_dcache_sb+0x60
[] do_remount_sb+0xbc
[] do_emergency_remount+0xb0
[] process_one_work+0x228
[] worker_thread+0x2e0
[] kthread+0xf4
[] ret_from_fork+0x10

Fix this lockup by reducing the number of entries to be shrinked
from the lru list to 1024 at once. Also, add cond_resched() before
processing the lru list again.

Link: http://marc.info/?t=14972286491=1=2
Fix-suggested-by: Jan kara 
Fix-suggested-by: Vladimir Davydov 
Signed-off-by: Sahitya Tummala 
---
 fs/dcache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index a9f995f..1161390 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1133,11 +1133,12 @@ void shrink_dcache_sb(struct super_block *sb)
LIST_HEAD(dispose);
 
freed = list_lru_walk(>s_dentry_lru,
-   dentry_lru_isolate_shrink, , UINT_MAX);
+   dentry_lru_isolate_shrink, , 1024);
 
this_cpu_sub(nr_dentry_unused, freed);
shrink_dentry_list();
-   } while (freed > 0);
+   cond_resched();
+   } while (list_lru_count(>s_dentry_lru) > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.

[PATCH v4 1/2] mm/list_lru.c: fix list_lru_count_node() to be race free

2017-06-28 Thread Sahitya Tummala

list_lru_count_node() iterates over all memcgs to get
the total number of entries on the node but it can race with
memcg_drain_all_list_lrus(), which migrates the entries from
a dead cgroup to another. This can return incorrect number of
entries from list_lru_count_node().

Fix this by keeping track of entries per node and simply return
it in list_lru_count_node().

Signed-off-by: Sahitya Tummala 
---
 include/linux/list_lru.h |  1 +
 mm/list_lru.c| 14 ++
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index cb0ba9f..fa7fd03 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -44,6 +44,7 @@ struct list_lru_node {
/* for cgroup aware lrus points to per cgroup lists, otherwise NULL */
struct list_lru_memcg   *memcg_lrus;
 #endif
+   long nr_items;
 } cacheline_aligned_in_smp;
 
 struct list_lru {
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 234676e..7a40fa2 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -117,6 +117,7 @@ bool list_lru_add(struct list_lru *lru, struct list_head 
*item)
l = list_lru_from_kmem(nlru, item);
list_add_tail(item, >list);
l->nr_items++;
+   nlru->nr_items++;
spin_unlock(>lock);
return true;
}
@@ -136,6 +137,7 @@ bool list_lru_del(struct list_lru *lru, struct list_head 
*item)
l = list_lru_from_kmem(nlru, item);
list_del_init(item);
l->nr_items--;
+   nlru->nr_items--;
spin_unlock(>lock);
return true;
}
@@ -183,15 +185,10 @@ unsigned long list_lru_count_one(struct list_lru *lru,
 
 unsigned long list_lru_count_node(struct list_lru *lru, int nid)
 {
-   long count = 0;
-   int memcg_idx;
+   struct list_lru_node *nlru;
 
-   count += __list_lru_count_one(lru, nid, -1);
-   if (list_lru_memcg_aware(lru)) {
-   for_each_memcg_cache_index(memcg_idx)
-   count += __list_lru_count_one(lru, nid, memcg_idx);
-   }
-   return count;
+   nlru = >node[nid];
+   return nlru->nr_items;
 }
 EXPORT_SYMBOL_GPL(list_lru_count_node);
 
@@ -226,6 +223,7 @@ unsigned long list_lru_count_node(struct list_lru *lru, int 
nid)
assert_spin_locked(>lock);
case LRU_REMOVED:
isolated++;
+   nlru->nr_items--;
/*
 * If the lru lock has been dropped, our list
 * traversal is now invalid and so we have to
-- 
Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.

Re: [PATCH] [net-next] net/mlx5e: select CONFIG_MLXFW

2017-06-28 Thread Or Gerlitz

On Wed, Jun 28, 2017 at 11:10 PM, Arnd Bergmann  wrote:
> With the introduction of mlx5 firmware flash support, we get a link
> error with CONFIG_MLXFW=m and CONFIG_MLX5_CORE=y:
>
> drivers/net/ethernet/mellanox/mlx5/core/fw.o: In function 
> `mlx5_firmware_flash':
> fw.c:(.text+0x9d4): undefined reference to `mlxfw_firmware_flash'

Thanks Arnd, I got a report on that from Jakub but you were before me here..

> We could have a more elaborate method to force MLX5 to be a loadable
> module in this case, but the easiest fix seems to be to always enable
> MLXFW as well, like we do for CONFIG_MLXSW_SPECTRUM, which is the other
> user of mlxfw_firmware_flash.

We would not want to force mlx5 users to build mlxfw.

So lets either use the more elaborate method or maybe instead of using
IS_ENABLED in mlxfw.h use IS_REACHABLE (this was suggested by Jakub)

Or.

Re: [PATCH v3 net-next 03/12] nfp: change bpf verifier hooks to match new verifier data structures

2017-06-28 Thread Jakub Kicinski

On Tue, 27 Jun 2017 13:57:34 +0100, Edward Cree wrote:
> Signed-off-by: Edward Cree 

Acked-by: Jakub Kicinski 

Sorry about the delay.

Re: [PATCH v3 2/3] dt-bindings: input: Add R_LRADC support for A83T

2017-06-28 Thread Ziping Chen

2017-06-28 1:31 GMT+08:00 Maxime Ripard :
> On Tue, Jun 27, 2017 at 11:18:17PM +0800, Ziping Chen wrote:
>> 2017-06-27 1:15 GMT+08:00 Maxime Ripard :
>> > Hi,
>> >
>> > On Sat, Jun 24, 2017 at 10:45:14AM +0800, Ziping Chen wrote:
>> >> From: Ziping Chen 
>> >>
>> >> Allwinner A83T SoC has a low res adc like the one
>> >> in Allwinner A10 SoC.
>> >>
>> >> Add binding for it.
>> >>
>> >> Signed-off-by: Ziping Chen 
>> >> Acked-by: Rob Herring 
>> >> ---
>> >>  Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt | 6 --
>> >>  1 file changed, 4 insertions(+), 2 deletions(-)
>> >>
>> >> diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt 
>> >> b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
>> >> index 4357e498ef04..525d85e3043f 100644
>> >> --- a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
>> >> +++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
>> >> @@ -2,12 +2,14 @@ Allwinner sun4i low res adc attached tablet keys
>> >>  
>> >>
>> >>  Required properties:
>> >> - - compatible: "allwinner,sun4i-a10-lradc-keys"
>> >> + - compatible: should be one of the following string:
>> >> + "allwinner,sun4i-a10-lradc-keys"
>> >> + "allwinner,sun8i-a83t-r-lradc-keys"
>> >
>> > This doesn't really have anything related to keys, and can be used
>> > purely as an ADC.
>> >
>> > I know this is the compatible that was used for the A10, but I'd
>> > rather drop the keys for the the new compatible.
>> >
>>
>> LRADC is named KEYADC in some other SoCs' User Manual(such as R40 User
>> Manual V1.0), thus LRADC is related to keys.
>
> It's still called LRADC on the A83T, so we'll use that name. If we
> ever need another compatible for the R40, then yes, keyadc would make
> sense in that case.

Hi,

So... whether to use "sun4i-lradc" to replace "sun4i-lradc-keys"?

I think it should be separated, one is the input subsystem, the other
is the iio subsystem.

>
> Maxime
>
> --
> Maxime Ripard, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com

Re: [PATCH] futex: avoid undefined behaviour when shift exponent is negative

2017-06-28 Thread hpa

On June 28, 2017 7:12:04 PM PDT, zhong jiang  wrote:
>On 2017/6/29 5:43, h...@zytor.com wrote:
>> On June 27, 2017 9:35:10 PM PDT, zhong jiang 
>wrote:
>>> Hi,  Ingo
>>>
>>> Thank you for the comment.
>>> On 2017/6/22 0:40, Ingo Molnar wrote:
 * zhong jiang  wrote:

> when shift expoment is negative, left shift alway zero. therefore,
>>> we
> modify the logic to avoid the warining.
>
> Signed-off-by: zhong jiang 
> ---
>  arch/x86/include/asm/futex.h | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/futex.h
>>> b/arch/x86/include/asm/futex.h
> index b4c1f54..2425fca 100644
> --- a/arch/x86/include/asm/futex.h
> +++ b/arch/x86/include/asm/futex.h
> @@ -49,8 +49,12 @@ static inline int futex_atomic_op_inuser(int
>>> encoded_op, u32 __user *uaddr)
>   int cmparg = (encoded_op << 20) >> 20;
>   int oldval = 0, ret, tem;
>  
> - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
> - oparg = 1 << oparg;
> + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) {
> + if (oparg >= 0)
> + oparg = 1 << oparg;
> + else
> + oparg = 0;
> + }
 Could we avoid all these complications by using an unsigned type?
>>> I think it is not feasible.  a negative shift exponent is likely
>>> existence and reasonable.
>>>  as the above case,  oparg is a negative is common. 
>>>
>>> I think it can be avoided by following change. 
>>>
>>> diff --git a/arch/x86/include/asm/futex.h
>>> b/arch/x86/include/asm/futex.h
>>> index b4c1f54..3205e86 100644
>>> --- a/arch/x86/include/asm/futex.h
>>> +++ b/arch/x86/include/asm/futex.h
>>> @@ -50,7 +50,7 @@ static inline int futex_atomic_op_inuser(int
>>> encoded_op, u32 __user *uaddr)
>>>int oldval = 0, ret, tem;
>>>
>>>if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
>>> -   oparg = 1 << oparg;
>>> +   oparg = safe_shift(1, oparg);
>>>
>>>if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
>>>return -EFAULT;
>>> diff --git a/drivers/video/fbdev/core/fbmem.c
>>> b/drivers/video/fbdev/core/fbmem.c
>>> index 069fe79..b4edda3 100644
>>> --- a/drivers/video/fbdev/core/fbmem.c
>>> +++ b/drivers/video/fbdev/core/fbmem.c
>>> @@ -190,11 +190,6 @@ char* fb_get_buffer_offset(struct fb_info
>*info,
>>> struct fb_pixmap *buf, u32 size
>>>
>>> #ifdef CONFIG_LOGO
>>>
>>> -static inline unsigned safe_shift(unsigned d, int n)
>>> -{
>>> -   return n < 0 ? d >> -n : d << n;
>>> -}
>>> -
>>> static void fb_set_logocmap(struct fb_info *info,
>>>   const struct linux_logo *logo)
>>> {
>>> diff --git a/include/linux/kernel.h b/include/linux/kernel.h
>>> index d043ada..f3b8856 100644
>>> --- a/include/linux/kernel.h
>>> +++ b/include/linux/kernel.h
>>> @@ -841,6 +841,10 @@ static inline void ftrace_dump(enum
>>> ftrace_dump_mode oops_dump_mode) { }
>>>  */
>>> #define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
>>>
>>> +static inline unsigned safe_shift(unsigned d, int n)
>>> +{
>>> +   return n < 0 ? d >> -n : d << n;
>>> +}
>>>
>>> Thansk
>>> zhongjiang
>>>
 Thanks,

Ingo

 .

>> What makes it reasonable?  It is totally ill-defined and doesn't do
>anything useful now?
> Thanks you for comments.
> 
>Maybe I mismake the meaning. I test the negative cases in x86 , all
>case is zero. so I come to a conclusion.
> 
>zj.c:15:8: warning: left shift count is negative
>[-Wshift-count-negative]
>  j = 1 << -2048;
>^
>[root@localhost zhongjiang]# ./zj
>j = 0
>j.c:15:8: warning: left shift count is negative
>[-Wshift-count-negative]
>  j = 1 << -2047;
>^
>[root@localhost zhongjiang]# ./zj
>j = 0
>
>I insmod a module into kernel to test the testcasts, all of the result
>is zero.
>
>I wonder whether I miss some point or not. Do you point out to me?
>please
>
>Thanks
>zhongjiang
> 
> 

When you use compile-time constants, the compiler generates the value at 
compile time, which can be totally different.
-- 
Sent from my Android device with K-9 Mail. Please excuse my brevity.

< 6 7 8 9 10 11 12 13 14 15 >

1001 - 1100 of 2104 matches

Mail list logo