[PATCH] KVM: hyperv: split lock to protect struct kvm_hv

2016-12-09 Thread Paolo Bonzini
Otherwise, there is an AB-BA deadlock between kvm->lock and
vcpu->mutex.

Reported-by: Dmitry Vyukov 
Signed-off-by: Paolo Bonzini 
---
Compile-tested only.

 Documentation/virtual/kvm/locking.txt |  2 ++
 arch/x86/include/asm/kvm_host.h   |  1 +
 arch/x86/kvm/hyperv.c | 10 +-
 arch/x86/kvm/x86.c|  1 +
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/virtual/kvm/locking.txt 
b/Documentation/virtual/kvm/locking.txt
index e5dd9f4d6100..5dd06289ce59 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -16,6 +16,8 @@ The acquisition orders for mutexes are as follows:
 For spinlocks, kvm_lock is taken outside kvm->mmu_lock.  Everything
 else is a leaf: no other lock is taken inside the critical sections.
 
+In particular, on x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
+
 2: Exception
 
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7892530cbacf..2e25038dbd93 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -704,6 +704,7 @@ struct kvm_apic_map {
 
 /* Hyper-V emulation context */
 struct kvm_hv {
+   struct mutex hv_lock;
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 99cde5220e07..021abafabc12 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1142,9 +1142,9 @@ int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 data, bool host)
if (kvm_hv_msr_partition_wide(msr)) {
int r;
 
-   mutex_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
-   mutex_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
return kvm_hv_set_msr(vcpu, msr, data, host);
@@ -1155,9 +1155,9 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 *pdata)
if (kvm_hv_msr_partition_wide(msr)) {
int r;
 
-   mutex_lock(&vcpu->kvm->lock);
+   mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock);
r = kvm_hv_get_msr_pw(vcpu, msr, pdata);
-   mutex_unlock(&vcpu->kvm->lock);
+   mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock);
return r;
} else
return kvm_hv_get_msr(vcpu, msr, pdata);
@@ -1165,7 +1165,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 *pdata)
 
 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
 {
-   return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE;
+   return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & 
HV_X64_MSR_HYPERCALL_ENABLE;
 }
 
 static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f0aee98e7492..30fc403df802 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7861,6 +7861,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
+   mutex_init(&kvm->arch.hyperv.hv_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
 
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
-- 
2.9.3



Re: netlink: GPF in sock_sndtimeo

2016-12-09 Thread Cong Wang
On Fri, Dec 9, 2016 at 8:13 PM, Cong Wang  wrote:
> On Fri, Dec 9, 2016 at 3:01 AM, Richard Guy Briggs  wrote:
>> On 2016-12-08 22:57, Cong Wang wrote:
>>> On Thu, Dec 8, 2016 at 10:02 PM, Richard Guy Briggs  wrote:
>>> > I also tried to extend Cong Wang's idea to attempt to proactively respond 
>>> > to a
>>> > NETLINK_URELEASE on the audit_sock and reset it, but ran into a locking 
>>> > error
>>> > stack dump using mutex_lock(&audit_cmd_mutex) in the notifier callback.
>>> > Eliminating the lock since the sock is dead anways eliminates the error.
>>> >
>>> > Is it safe?  I'll resubmit if this looks remotely sane.  Meanwhile I'll 
>>> > try to
>>> > get the test case to compile.
>>>
>>> It doesn't look safe, because 'audit_sock', 'audit_nlk_portid' and 
>>> 'audit_pid'
>>> are updated as a whole and race between audit_receive_msg() and
>>> NETLINK_URELEASE.
>>
>> This is what I expected and why I originally added the mutex lock in the
>> callback...  The dumps I got were bare with no wrapper identifying the
>> process context or specific error, so I'm at a bit of a loss how to
>> solve this (without thinking more about it) other than instinctively
>> removing the mutex.
>
> Netlink notifier can safely be converted to blocking one, I will send
> a patch.
>
> But I seriously doubt you really need NETLINK_URELEASE here,
> it adds nothing but overhead, b/c the netlink notifier is called on
> every netlink socket in the system, but for net exit path, that is
> relatively a slow path.
>
> Also, kauditd_send_skb() needs audit_cmd_mutex too.

Please let me know what you think about the attached patch?

Thanks!
commit a12b43ee814625933ff155c20dc863c59cfcf240
Author: Cong Wang 
Date:   Fri Dec 9 17:56:42 2016 -0800

audit: close a race condition on audit_sock

Signed-off-by: Cong Wang 

diff --git a/kernel/audit.c b/kernel/audit.c
index f1ca116..ab947d8 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -423,6 +423,8 @@ static void kauditd_send_skb(struct sk_buff *skb)
snprintf(s, sizeof(s), "audit_pid=%d reset", 
audit_pid);
audit_log_lost(s);
audit_pid = 0;
+   audit_nlk_portid = 0;
+   sock_put(audit_sock);
audit_sock = NULL;
} else {
pr_warn("re-scheduling(#%d) write to 
audit_pid=%d\n",
@@ -899,6 +901,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct 
nlmsghdr *nlh)
audit_log_config_change("audit_pid", new_pid, 
audit_pid, 1);
audit_pid = new_pid;
audit_nlk_portid = NETLINK_CB(skb).portid;
+   sock_hold(skb->sk);
+   if (audit_sock)
+   sock_put(audit_sock);
audit_sock = skb->sk;
}
if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
@@ -1167,10 +1172,6 @@ static void __net_exit audit_net_exit(struct net *net)
 {
struct audit_net *aunet = net_generic(net, audit_net_id);
struct sock *sock = aunet->nlsk;
-   if (sock == audit_sock) {
-   audit_pid = 0;
-   audit_sock = NULL;
-   }
 
RCU_INIT_POINTER(aunet->nlsk, NULL);
synchronize_net();


Re: [PATCH] f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack

2016-12-09 Thread Chao Yu
Hi Jaegeuk,

Let me try to understand this, in some cases, we can write a checkpoint pack
which has wrong cp_ver, like in 1st cp pack which has even version number or 2nd
cp pack which has odd version number, so if we load that kind of cp pack during
fill_super, we may load wrong summary data from another older cp pack which cp
version pointed to, is that right?

Thanks,

On 2016/11/25 10:32, Jaegeuk Kim wrote:
> We don't guarantee cp_addr is fixed by cp_version.
> This is to sync with f2fs-tools.
> 
> Cc: sta...@vger.kernel.org
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/checkpoint.c |  5 +
>  fs/f2fs/f2fs.h   | 14 +++---
>  2 files changed, 8 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 54cc6a9..bf2f44c 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -789,6 +789,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
>   f2fs_put_page(cur_page, 1);
>   }
>  done:
> + if (cur_page == cp1)
> + sbi->cur_cp_pack = 1;
> + else
> + sbi->cur_cp_pack = 2;
> +
>   f2fs_put_page(cp1, 1);
>   f2fs_put_page(cp2, 1);
>   return 0;
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 62383d2..e22e7e1 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -792,6 +792,7 @@ struct f2fs_sb_info {
>  
>   /* for checkpoint */
>   struct f2fs_checkpoint *ckpt;   /* raw checkpoint pointer */
> + int cur_cp_pack;/* remain current cp pack */
>   spinlock_t cp_lock; /* for flag in ckpt */
>   struct inode *meta_inode;   /* cache meta blocks */
>   struct mutex cp_mutex;  /* checkpoint procedure lock */
> @@ -1352,19 +1353,10 @@ static inline void *__bitmap_ptr(struct f2fs_sb_info 
> *sbi, int flag)
>  
>  static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
>  {
> - block_t start_addr;
> - struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> - unsigned long long ckpt_version = cur_cp_version(ckpt);
> -
> - start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
> + block_t start_addr = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr);
>  
> - /*
> -  * odd numbered checkpoint should at cp segment 0
> -  * and even segment must be at cp segment 1
> -  */
> - if (!(ckpt_version & 1))
> + if (sbi->cur_cp_pack == 2)
>   start_addr += sbi->blocks_per_seg;
> -
>   return start_addr;
>  }
>  
> 



Re: [PATCH] driver core: flush async calls before testing driver removal

2016-12-09 Thread Greg Kroah-Hartman
On Sat, Dec 10, 2016 at 02:15:19AM +0200, Vladimir Zapolskiy wrote:
> If CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled a number of false
> positives are reported for ATA controller drivers, because ATA port
> probes are done asynchronously, and the same problem may also touch
> other asynchronously probed drivers.
> 
> To reduce the rate of false reports on boot call async_synchronize_full()
> before attempting to remove a driver, the same is done in delete_module()
> syscall for all possible drivers and in __device_release_driver() function
> for asynchronously probed drivers.

__device_release_driver() already calls this function, why call it
again?

thanks,

greg k-h


Re: [regression ?] kbuild: fix building bzImage with CONFIG_TRIM_UNUSED_KSYMS enabled

2016-12-09 Thread Sergey Senozhatsky
On (12/09/16 13:07), Nicolas Pitre wrote:
[..]
> > build:
> > make -j4 > build_log 2>&1
> > 
> > package:
> > make -j4 INSTALL_MOD_PATH="${pkgdir}" modules_install >> build_log 2>&1
> 
> Weird.

it is. sorry for long reply, it took me some time to track it down.
turned out, the script also does `prepare' and `kernelrelease'. so
the sequence of commands in my build script is

make prepare
make kernelrelease
# functon build
make -j4
# finction package
make -j4 INSTALL_MOD_PATH= modules_install


now. the problem here is that, apparently, and I didn't know that,
"make prepare" and "make kernelrelease" are executed twice.

- first time when I build the kernel
 make prepare
 make kernelrelease
 make -j4

- second time when I install the modules
 make prepare
 make kernelrelease
 make -j4 INSTALL_MOD_PATH= modules_install


so this will not install modules:
 make prepare; make kernelrelease; make -j4; make prepare; make kernelrelease; 
make -j4 INSTALL_MOD_PATH=/tmp/MODULES modules_install

and this will:
 make prepare; make kernelrelease; make -j4; make kernelrelease; make -j4 
INSTALL_MOD_PATH=/tmp/MODULES modules_install


> You must have CONFIG_TRIM_UNUSED_KSYMS=y in your .config, right?

yes.

> What if you set it to n instead without reverting 865563924022d8. Do you 
> still have the same issue?

!CONFIG_TRIM_UNUSED_KSYMS or !865563924022d8 builds just fine with extra
`make prepare'.
no `extra make prepare' builds ok regardless the state of Makefile/config
files.

I guess you don't need my .config any more.

-ss


[RFC PATCH] arm64: change from CONT_PMD_SHIFT to CONT_PTE_SHIFT

2016-12-09 Thread zhongjiang
From: zhong jiang 

I think that CONT_PTE_SHIFT is more reasonable even if they are some
value. and the patch is not any functional change.

Signed-off-by: zhong jiang 
---
 arch/arm64/mm/hugetlbpage.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 2e49bd2..0a4c97b 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -323,7 +323,7 @@ static __init int setup_hugepagesz(char *opt)
 static __init int add_default_hugepagesz(void)
 {
if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
-   hugetlb_add_hstate(CONT_PMD_SHIFT);
+   hugetlb_add_hstate(CONT_PTE_SHIFT);
return 0;
 }
 arch_initcall(add_default_hugepagesz);
-- 
1.8.3.1



[PATCH 1/1 linux-next] xfs: remove unnecessary return

2016-12-09 Thread Fabian Frederick
Commit f7a136aee3c1
("xfs: several xattr functions can be void")

updated 2 end of function return 0 to return in void
functions. Remove it.

Signed-off-by: Fabian Frederick 
---
 fs/xfs/xfs_attr_list.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 97c45b6..5aa46c3 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -433,7 +433,6 @@ xfs_attr3_leaf_list_int(
cursor->offset++;
}
trace_xfs_attr_list_leaf_end(context);
-   return;
 }
 
 /*
@@ -543,7 +542,6 @@ xfs_attr_put_listent(
alist->al_offset[context->count++] = context->firstu;
alist->al_count = context->count;
trace_xfs_attr_list_add(context);
-   return;
 }
 
 /*
-- 
2.7.4



Re: [kernel-hardening] Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers
On Sat, Dec 10, 2016 at 01:37:12PM +0800, Herbert Xu wrote:
> On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
> >
> > Herbert, how hard would it be to teach the crypto code to use a more
> > sensible data structure than scatterlist and to use coccinelle fix
> > this stuff for real?
> 
> First of all we already have a sync non-SG hash interface, it's
> called shash.
> 
> If we had enough sync-only users of skcipher then I'll consider
> adding an interface for it.  However, at this point in time it
> appears to more sense to convert such users over to the async
> interface rather than the other way around.
> 
> As for AEAD we never had a sync interface to begin with and I
> don't think I'm going to add one.
> 

Isn't the question of "should the API use physical or virtual addresses"
independent of the question of "should the API support asynchronous requests"?
You can already choose, via the flags and mask arguments when allocating a
crypto transform, whether you want it to be synchronous or asynchronous or
whether you don't care.  I don't see what that says about whether the API should
take in physical memory (e.g. scatterlists or struct pages) or virtual memory
(e.g. iov_iters or just regular pointers).

And while it's true that asynchronous algorithms are often provided by hardware
drivers that operate on physical memory, it's not always the case.  For example
some of the AES-NI algorithms are asynchronous only because they use the SSE
registers which can't always available to kernel code, so the request may need
to be processed by another thread.

Eric


Re: [kernel-hardening] Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers
On Sat, Dec 10, 2016 at 01:32:08PM +0800, Herbert Xu wrote:
> On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
> >
> > > The following crypto drivers initialize a scatterlist to point into an
> > > ablkcipher_request, which may have been allocated on the stack with
> > > SKCIPHER_REQUEST_ON_STACK():
> > >
> > > drivers/crypto/ccp/ccp-crypto-aes-xts.c:162
> > > drivers/crypto/ccp/ccp-crypto-aes.c:94
> > 
> > These are real, and I wish I'd known about them sooner.
> 
> Are you sure? Any instance of *_ON_STACK must only be used with
> sync algorithms and most drivers under drivers/crypto declare
> themselves as async.
> 

Why exactly is that?  Obviously, it wouldn't work if you returned from the stack
frame before the request completed, but does anything stop someone from using an
*_ON_STACK() request and then waiting for the request to complete before
returning from the stack frame?

Eric


Crypto Fixes for 4.9

2016-12-09 Thread Herbert Xu
Hi Linus:

This push fixes the following issues:

- Fix pointer size when caam is used with AArch64 boot loader on
  AArch32 kernel.
- Fix ahash state corruption in marvell driver.
- Fix buggy algif_aed tag handling.
- Prevent mcryptd from being used with incompatible algorithms
  which can cause crashes.


Please pull from

git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6.git linus


Horia Geantă (1):
  crypto: caam - fix pointer size for AArch64 boot loader, AArch32 kernel

Romain Perier (2):
  crypto: marvell - Don't copy hash operation twice into the SRAM
  crypto: marvell - Don't corrupt state of an STD req for re-stepped ahash

Stephan Mueller (2):
  crypto: algif_aead - fix AEAD tag memory handling
  crypto: algif_aead - fix uninitialized variable warning

tim (1):
  crypto: mcryptd - Check mcryptd algorithm compatibility

 crypto/algif_aead.c   |   59 ++---
 crypto/mcryptd.c  |   19 -
 drivers/crypto/caam/ctrl.c|5 ++--
 drivers/crypto/marvell/hash.c |   11 
 4 files changed, 57 insertions(+), 37 deletions(-)

Thanks,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers
On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
> > The following crypto drivers initialize a scatterlist to point into an
> > ahash_request, which may have been allocated on the stack with
> > AHASH_REQUEST_ON_STACK():
> >
> > drivers/crypto/bfin_crc.c:351
> > drivers/crypto/qce/sha.c:299
> > drivers/crypto/sahara.c:973,988
> > drivers/crypto/talitos.c:1910
> 
> This are impossible or highly unlikely on x86.
> 
> > drivers/crypto/ccp/ccp-crypto-aes-cmac.c:105,119,142
> > drivers/crypto/ccp/ccp-crypto-sha.c:95,109,124
> 
> These
> 
> > drivers/crypto/qce/sha.c:325
> 
> This is impossible on x86.
> 

Thanks for looking into these.  I didn't investigate who/what is likely to be
using each driver.

Of course I would not be surprised to see people want to start supporting
virtually mapped stacks on other architectures too.

> >
> > The "good" news with these bugs is that on x86_64 without CONFIG_DEBUG_SG=y 
> > or
> > CONFIG_DEBUG_VIRTUAL=y, you can still do virt_to_page() and then 
> > page_address()
> > on a vmalloc address and get back the same address, even though you aren't
> > *supposed* to be able to do this.  This will make things still work for most
> > people.  The bad news is that if you happen to have consumed just about 1 
> > page
> > (or N pages) of your stack at the time you call the crypto API, your stack
> > buffer may actually span physically non-contiguous pages, so the crypto
> > algorithm will scribble over some unrelated page.
> 
> Are you sure?  If it round-trips to the same virtual address, it
> doesn't matter if the buffer is contiguous.

You may be right, I didn't test this.  The hash_walk and blkcipher_walk code do
go page by page, but I suppose on x86_64 it would just step from one bogus
"struct page" to the adjacent one and still map it to the original virtual
address.

Eric


Re: [PATCH] scsi/qla2xxx: label endian-ness for many fields

2016-12-09 Thread Joe Perches
On Fri, 2016-12-09 at 22:45 +0200, Michael S. Tsirkin wrote:
> This adds endian-ness labels for lots of qla structs.
> Doing this cuts down number of sparse warnings from ~1700 to ~1400.
> Will help find and resolve some of real issues down the road.
> 
> Signed-off-by: Michael S. Tsirkin 
> 
> ---
> 
> Compile-tested only.
> 
> diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
> index 73b12e4..a4d3071 100644
> --- a/drivers/scsi/qla2xxx/qla_def.h
> +++ b/drivers/scsi/qla2xxx/qla_def.h
> @@ -1159,28 +1159,28 @@ typedef struct {
>*/
>   uint8_t  firmware_options[2];
>  
> - uint16_t frame_payload_size;
> - uint16_t max_iocb_allocation;
> - uint16_t execution_throttle;
> + __le16 frame_payload_size;
> + __le16 max_iocb_allocation;
> + __le16 execution_throttle;

Shouldn't all these _not_ have the leading __?
Perhaps the uint8_t uses should be converted to u8 as well.

[etc...]



Re: [PATCH v3 00/15] livepatch: hybrid consistency model

2016-12-09 Thread Balbir Singh
On Thu, 2016-12-08 at 12:08 -0600, Josh Poimboeuf wrote:
> Dusting the cobwebs off the consistency model again.  This is based on
> linux-next/master.
> 
> v1 was posted on 2015-02-09:
> 
>   https://lkml.kernel.org/r/cover.1423499826.git.jpoim...@redhat.com
> 
> v2 was posted on 2016-04-28:
> 
>   https://lkml.kernel.org/r/cover.1461875890.git.jpoim...@redhat.com
> 
> The biggest issue from v2 was finding a decent way to detect preemption
> and page faults on the stack of a sleeping task.  

Could you please elaborate on this? Preemption of a sleeping task and
faults as in the future (time) preemption and faults?

Balbir Singh.



Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Herbert Xu
On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
>
> Herbert, how hard would it be to teach the crypto code to use a more
> sensible data structure than scatterlist and to use coccinelle fix
> this stuff for real?

First of all we already have a sync non-SG hash interface, it's
called shash.

If we had enough sync-only users of skcipher then I'll consider
adding an interface for it.  However, at this point in time it
appears to more sense to convert such users over to the async
interface rather than the other way around.

As for AEAD we never had a sync interface to begin with and I
don't think I'm going to add one.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: [PATCH 7/7] hwrng: core: Remove two unused include

2016-12-09 Thread kbuild test robot
Hi Corentin,

[auto build test ERROR on char-misc/char-misc-testing]
[also build test ERROR on v4.9-rc8 next-20161209]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Corentin-Labbe/hwrng-core-do-not-use-multiple-blank-lines/20161210-072632
config: i386-randconfig-i0-201649 (attached as .config)
compiler: gcc-4.8 (Debian 4.8.4-1) 4.8.4
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/char/hw_random/core.c: In function 'rng_dev_open':
>> drivers/char/hw_random/core.c:169:11: error: dereferencing pointer to 
>> incomplete type
 if ((filp->f_mode & FMODE_READ) == 0)
  ^
   drivers/char/hw_random/core.c:169:22: error: 'FMODE_READ' undeclared (first 
use in this function)
 if ((filp->f_mode & FMODE_READ) == 0)
 ^
   drivers/char/hw_random/core.c:169:22: note: each undeclared identifier is 
reported only once for each function it appears in
   drivers/char/hw_random/core.c:171:10: error: dereferencing pointer to 
incomplete type
 if (filp->f_mode & FMODE_WRITE)
 ^
   drivers/char/hw_random/core.c:171:21: error: 'FMODE_WRITE' undeclared (first 
use in this function)
 if (filp->f_mode & FMODE_WRITE)
^
   drivers/char/hw_random/core.c: In function 'rng_dev_read':
   drivers/char/hw_random/core.c:221:11: error: dereferencing pointer to 
incomplete type
!(filp->f_flags & O_NONBLOCK));
  ^
   drivers/char/hw_random/core.c:221:23: error: 'O_NONBLOCK' undeclared (first 
use in this function)
!(filp->f_flags & O_NONBLOCK));
  ^
   drivers/char/hw_random/core.c:230:12: error: dereferencing pointer to 
incomplete type
   if (filp->f_flags & O_NONBLOCK) {
   ^
   drivers/char/hw_random/core.c: At top level:
   drivers/char/hw_random/core.c:272:21: error: variable 'rng_chrdev_ops' has 
initializer but incomplete type
static const struct file_operations rng_chrdev_ops = {
^
   drivers/char/hw_random/core.c:273:2: error: unknown field 'owner' specified 
in initializer
 .owner  = THIS_MODULE,
 ^
   In file included from include/linux/linkage.h:6:0,
from include/linux/kernel.h:6,
from include/linux/delay.h:10,
from drivers/char/hw_random/core.c:13:
   include/linux/export.h:37:30: warning: excess elements in struct initializer 
[enabled by default]
#define THIS_MODULE ((struct module *)0)
 ^
   drivers/char/hw_random/core.c:273:12: note: in expansion of macro 
'THIS_MODULE'
 .owner  = THIS_MODULE,
   ^
   include/linux/export.h:37:30: warning: (near initialization for 
'rng_chrdev_ops') [enabled by default]
#define THIS_MODULE ((struct module *)0)
 ^
   drivers/char/hw_random/core.c:273:12: note: in expansion of macro 
'THIS_MODULE'
 .owner  = THIS_MODULE,
   ^
   drivers/char/hw_random/core.c:274:2: error: unknown field 'open' specified 
in initializer
 .open  = rng_dev_open,
 ^
   drivers/char/hw_random/core.c:274:2: warning: excess elements in struct 
initializer [enabled by default]
   drivers/char/hw_random/core.c:274:2: warning: (near initialization for 
'rng_chrdev_ops') [enabled by default]
   drivers/char/hw_random/core.c:275:2: error: unknown field 'read' specified 
in initializer
 .read  = rng_dev_read,
 ^
   drivers/char/hw_random/core.c:275:2: warning: excess elements in struct 
initializer [enabled by default]
   drivers/char/hw_random/core.c:275:2: warning: (near initialization for 
'rng_chrdev_ops') [enabled by default]
   drivers/char/hw_random/core.c:276:2: error: unknown field 'llseek' specified 
in initializer
 .llseek  = noop_llseek,
 ^
   drivers/char/hw_random/core.c:276:13: error: 'noop_llseek' undeclared here 
(not in a function)
 .llseek  = noop_llseek,
^
   drivers/char/hw_random/core.c:276:2: warning: excess elements in struct 
initializer [enabled by default]
 .llseek  = noop_llseek,
 ^
   drivers/char/hw_random/core.c:276:2: warning: (near initialization for 
'rng_chrdev_ops') [enabled by default]

vim +169 drivers/char/hw_random/core.c

844dd05f Michael Buesch 2006-06-26  163 return 0;
844dd05f Michael Buesch 2006-06-26  164  }
844dd05f Michael Buesch 2006-06-26  165  
844dd05f Michael Buesch 2006-06-26  166  static int rng_dev_open(struct inode 
*inode, struct file *filp)
844dd05f Michael Buesch 2006-06-26  167  {
844dd05f Michael Buesch 2006-06-26  168 

Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Herbert Xu
On Fri, Dec 09, 2016 at 09:25:38PM -0800, Andy Lutomirski wrote:
>
> > The following crypto drivers initialize a scatterlist to point into an
> > ablkcipher_request, which may have been allocated on the stack with
> > SKCIPHER_REQUEST_ON_STACK():
> >
> > drivers/crypto/ccp/ccp-crypto-aes-xts.c:162
> > drivers/crypto/ccp/ccp-crypto-aes.c:94
> 
> These are real, and I wish I'd known about them sooner.

Are you sure? Any instance of *_ON_STACK must only be used with
sync algorithms and most drivers under drivers/crypto declare
themselves as async.

Cheers,
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Andy Lutomirski
On Fri, Dec 9, 2016 at 3:08 PM, Eric Biggers  wrote:
> In the 4.9 kernel, virtually-mapped stacks will be supported and enabled by
> default on x86_64.  This has been exposing a number of problems in which
> on-stack buffers are being passed into the crypto API, which to support crypto
> accelerators operates on 'struct page' rather than on virtual memory.
>
> Some of these problems have already been fixed, but I was wondering how many
> problems remain, so I briefly looked through all the callers of sg_set_buf() 
> and
> sg_init_one().  Overall I found quite a few remaining problems, detailed 
> below.
>
> The following crypto drivers initialize a scatterlist to point into an
> ahash_request, which may have been allocated on the stack with
> AHASH_REQUEST_ON_STACK():
>
> drivers/crypto/bfin_crc.c:351
> drivers/crypto/qce/sha.c:299
> drivers/crypto/sahara.c:973,988
> drivers/crypto/talitos.c:1910

This are impossible or highly unlikely on x86.

> drivers/crypto/ccp/ccp-crypto-aes-cmac.c:105,119,142
> drivers/crypto/ccp/ccp-crypto-sha.c:95,109,124

These

> drivers/crypto/qce/sha.c:325

This is impossible on x86.

>
> The following crypto drivers initialize a scatterlist to point into an
> ablkcipher_request, which may have been allocated on the stack with
> SKCIPHER_REQUEST_ON_STACK():
>
> drivers/crypto/ccp/ccp-crypto-aes-xts.c:162
> drivers/crypto/ccp/ccp-crypto-aes.c:94

These are real, and I wish I'd known about them sooner.

>
> And these other places do crypto operations on buffers clearly on the stack:
>
> drivers/net/wireless/intersil/orinoco/mic.c:72

Ick.

> drivers/usb/wusbcore/crypto.c:264

Well, crud.  I thought I had fixed this driver but I missed one case.
Will send a fix tomorrow.  But I'm still unconvinced that this
hardware ever shipped.

> net/ceph/crypto.c:182

Ick.

> net/rxrpc/rxkad.c:737,1000

Well, crud.  This was supposed to have been fixed in:

commit a263629da519b2064588377416e067727e2cbdf9
Author: Herbert Xu 
Date:   Sun Jun 26 14:55:24 2016 -0700

rxrpc: Avoid using stack memory in SG lists in rxkad


> security/keys/encrypted-keys/encrypted.c:500

That's a trivial one-liner.  Patch coming tomorrow.

> fs/cifs/smbencrypt.c:96

Ick.

>
> Note: I almost certainly missed some, since I excluded places where the use 
> of a
> stack buffer was not obvious to me.  I also excluded AEAD algorithms since 
> there
> isn't an AEAD_REQUEST_ON_STACK() macro (yet).
>
> The "good" news with these bugs is that on x86_64 without CONFIG_DEBUG_SG=y or
> CONFIG_DEBUG_VIRTUAL=y, you can still do virt_to_page() and then 
> page_address()
> on a vmalloc address and get back the same address, even though you aren't
> *supposed* to be able to do this.  This will make things still work for most
> people.  The bad news is that if you happen to have consumed just about 1 page
> (or N pages) of your stack at the time you call the crypto API, your stack
> buffer may actually span physically non-contiguous pages, so the crypto
> algorithm will scribble over some unrelated page.

Are you sure?  If it round-trips to the same virtual address, it
doesn't matter if the buffer is contiguous.

>  Also, hardware crypto drivers
> which actually do operate on physical memory will break too.

Those were already broken.  DMA has been illegal on the stack for
years and DMA debugging would have caught it.

>
> So I am wondering: is the best solution really to make all these crypto API
> algorithms and users use heap buffers, as opposed to something like 
> maintaining
> a lowmem alias for the stack, or introducing a more general function to 
> convert
> buffers (possibly in the vmalloc space) into scatterlists?  And if the current
> solution is desired, who is going to fix all of these bugs and when?

The *right* solution IMO is to fix crypto to stop using scatterlists.
Scatterlists are for DMA using physical addresses, and they're
inappropriate almost every user of them that's using them for crypto.
kiov would be much better -- it would make sense and it would be
faster.

I have a hack to make scatterlists pointing to the stack work (as long
as they're only one element), but that's seriously gross.

Herbert, how hard would it be to teach the crypto code to use a more
sensible data structure than scatterlist and to use coccinelle fix
this stuff for real?

In the mean time, we should patch the handful of drivers that matter.


Re: [bug report] perf jit: add source line info support

2016-12-09 Thread Stephane Eranian
On Fri, Dec 9, 2016 at 9:16 PM, Stephane Eranian  wrote:
> Hi Dan,
>
> On Wed, Nov 30, 2016 at 10:48 AM, Dan Carpenter
>  wrote:
>> Hello Stephane Eranian,
>>
>> The patch 598b7c6919c7: "perf jit: add source line info support" from
>> Nov 30, 2015, leads to the following static checker warning:
>>
>> ./tools/perf/util/genelf_debug.c:211 emit_signed_LEB128()
>> warn: potential left shift more than type allows '57'
>>
>> ./tools/perf/util/genelf_debug.c
>>202  static void emit_signed_LEB128(struct buffer_ext *be, long data)
>>203  {
>>204  int more = 1;
>>205  int negative = data < 0;
>>206  int size = sizeof(long) * CHAR_BIT;
>>207  while (more) {
>>208  ubyte cur = data & 0x7F;
>>209  data >>= 7;
>>210  if (negative)
>>211  data |= - (1 << (size - 7));
>> ^^^
>> This is a no-op on 64 bit systems.  I suspect it's not intentional?
>>
> I suspect size should be: size = sizeof(int) * CHAR_BIT;
> I will test that.
Sorry, I think the issue is on the 1 shift, should be 1UL instead.
>
>>212  if ((data == 0 && !(cur & 0x40)) ||
>>213  (data == -1l && (cur & 0x40)))
>>214  more = 0;
>>215  else
>>216  cur |= 0x80;
>>217  buffer_ext_add(be, &cur, 1);
>>218  }
>>219  }
>>
>> regards,
>> dan carpenter


Re: [PATCH] x86/tsc: RFC: re-synchronize TSCs to boot cpu TSC

2016-12-09 Thread Roland Scheidegger
Am 10.12.2016 um 02:55 schrieb Roland Scheidegger:
> Am 09.12.2016 um 23:59 schrieb Thomas Gleixner:
>> On Fri, 9 Dec 2016, Roland Scheidegger wrote:
>>
>> Cc'ed someone from Dell. 
>>
>>> Am 09.12.2016 um 18:33 schrieb Thomas Gleixner:
 Can you add the patch below to gather more information? There is a hunk in
 there with an '#if 0' which sets the TSC ADJUST to 0 on boot, which you can
 turn on as second step.
>>>
>>> Ok, here's the results:
>>> ...
>>> TSC ADJUST synchronize: Reference CPU0: -2820267100 CPU1: -2822498296
>>> TSC target sync skipped
>>> smpboot: Vector locked
>>> smpboot: Vector setup done
>>> smpboot: Clock setup
>>> TSC source sync skipped
>>> smpboot: Target CPU is online
>>
>> I did not expect that to happen. Now I'm puzzled and curious where the
>> machine gets lost after that. See below.
>>
>>> With the #if 0 block activated, it boots up fine, the output was:
>>
>> That does not make any sense at all, but yes, nothing in this context makes
>> sense.
>>
>>> [1.038892] x86: Booting SMP configuration:
>>> [1.038930]  node  #0, CPUs:#1
>>> [0.171851] TSC ADJUST: CPU1: -2830353064 218577682002
>>> [1.117495] TSC source sync 0 -> 1 runs 3
>>> [0.171852] TSC ADJUST differs: Reference CPU0: -2828600940 CPU1:
>>> -2830353064
>>> [0.171853] TSC ADJUST synchronize: Reference CPU0: 0 CPU1: -2830353064
>>> [1.117497] TSC target sync skip
>>
>>> (And fwiw with my quick hack the lockups disappear to when I change that
>>> back to blast a zero into TSC_ADJ for all cpus.)
>>
>> Right, That's what that hunk does as well.
>>
>> Now what's interesting is that the adjustement of CPU1 in the non write to
>> zero case results in the following:
>>
>> TSC ADJUST: CPU1: -2830353064 218577682002 <-- TSC value
>> TSC ADJUST differs: Reference CPU0: -2828600940 CPU1: -2830353064
>>
>> We write CPU1 adjust register to -2828600940 which makes the TSC on CPU1
>> jump forwards by -2828600940 - -2830353064 = 1752124 cycles.
>>
>> In the write to zero case the jump is forward as well, but this time it's
>> huge, i.e. 2830353064 cycles.
>>
>> I tried to wreckage the TSC by writing similar values to the adjust MSR on
>> early boot, but independent of the values and independent of the write to
>> zero part the machine comes up happily all the time.
>>
>> The only difference is that my machine has a somewhat saner BIOS. So the
>> thing might just die in the value add SMM crap, but who knows.
>>
>> In the patch below is another bunch of debug prints which emit the state
>> information of CPU1 during bringup. Maybe that gives a hint where the
>> system gets stuck when you disable the 'write to zero' magic again.
>>
>> The NMI watchdog does not catch anything, right?
> Nope. (Though as mentioned earlier, with my hack when not writing zero
> it did - but the lockup there was later after all 16 cpus were online,
> and I only really tried that with the ubuntu 4.4 kernel. I never got to
> see the full output from that NMI though due to limited screen space, my
> attempts to try anything different than text mode were met with a blank
> screen, and from the parts I did see I didn't really see anything
> interesting albeit that's not saying much as I really have no idea about
> that code...)
> 
> With the new patch here's the output (albeit the typing gets a bit
> annoying...)
> ...
> Invoking state 32 CB replay_prepare_cpu+0x0/0xe0
> CB returned 0
> Invoking state 35 CB rcutree_prepare_cpu+0x0/0x50
> CB returned 0
> Invoking state 41 CB notify_prepare+0x0/0xa0
> CBreturned 0
> Invoking state 48 CB bringup_cpu+0x0/0x90
> x86: Booting SMP configuration:
>  node  #0, CPUs:   #1
> Invoking state 51 CB sched_cpu_starting+0x0/0x60
> CB returned 0
> Invoking state 62 CB x86_pmu_starting_cpu+0x0/0x20
> CB returned 0
> TSC ADJUST: CPU1: -2846131604 175264328618
> TSC ADJUST differs: Reference CPU0: -2843967660 CPU1: -2846131604
> TSC ADJUST synchronize: Reference CPU0: -2843967660 CPU1: -2846131604
> TSC target sync skip
> smpboot: Vector locked
> TSC source sync 0 -> 1 runs 3
> smpboot: Vector setup done
> smpboot: Clock setup
> TSC source sync skipped
> smpboot: Target CPU is online


Ok I did some more digging. Since it appeared it never returned from
x86_cpuinit.setup_percpu_clockev() I followed that a bit more. This is
using the tsc deadline timer, ending up in clockevents_register_device()
finally. This executes all well except the raw_spin_unlock_irqrestore()
at the end which we never get past.

I disabled the tsc deadline timer (lapic=notscdeadline) and indeed, no
more lockups!
So could there be something be wrong with setting this up? Warping past
some event due to resynchronization or something?
Or hitting some bugs with TSC deadline interrupts?
Anyway, that's definitely out of my area of knowledge, hope it helps...

Roland



Re: [PATCH] Add +~800M crashkernel explaination

2016-12-09 Thread Robert LeBlanc
On Fri, Dec 9, 2016 at 7:49 PM, Baoquan He  wrote:
> On 12/09/16 at 05:22pm, Robert LeBlanc wrote:
>> When trying to configure crashkernel greater than about 800 MB, the
>> kernel fails to allocate memory on x86 and x86_64. This is due to an
>> undocumented limit that the crashkernel and other low memory items must
>> be allocated below 896 MB unless the ",high" option is given. This
>> updates the documentation to explain this and what I understand the
>> limitations to be on the option.
>
> This is true, but not very accurate. You found it's about 800M, it's
> becasue usually the current kernel need about 40M space to run, and some
> extra reservation before reserve_crashkernel invocation, another ~10M.
> However it's normal case, people may build modules into or have some
> special code to bloat kernel. This patch makes sense to address the
> low|high issue, it might be not good so determined to say ~800M.

My testing showed that I could go anywhere from about 830M to 880M,
depending on distro, kernel version, and stuff that you mentioned. I
just thought some rule of thumb of when to consider using high would
be good. People may not think that 800 MB is 'large' when you have 512
GB of RAM for instance. I thought about making 512 MB be the rule of
thumb, but you can do a lot with ~300 MB.

I'm happy to adjust the wording, what would you recommend? Also, I'm
not 100% sure that I got the cases covered correctly. I was surprised
that I could not get it to work with the "new" format with the
multiple ranges, and that specifying an offset would't work either,
although the offset kind of makes sense. Do you know for sure that it
doesn't work with ranges?

I tried,

crashkernel=256M-1G:128M,high,1G-4G:256M,high,4G-:512M,high

and

crashkernel=256M-1G:128M,1G-4G:256M,4G-:512M,high

and neither worked. It seems that a better separator would be ';'
instead of ',' for ranges, then you could specify options better. Kind
of hard to change now.

>
>>
>> Signed-off-by: Robert LeBlanc 
>> ---
>>  Documentation/kdump/kdump.txt | 22 +-
>>  1 file changed, 17 insertions(+), 5 deletions(-)
>>
>> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
>> index b0eb27b..aa3efa8 100644
>> --- a/Documentation/kdump/kdump.txt
>> +++ b/Documentation/kdump/kdump.txt
>> @@ -256,7 +256,9 @@ While the "crashkernel=size[@offset]" syntax is 
>> sufficient for most
>>  configurations, sometimes it's handy to have the reserved memory dependent
>>  on the value of System RAM -- that's mostly for distributors that pre-setup
>>  the kernel command line to avoid a unbootable system after some memory has
>> -been removed from the machine.
>> +been removed from the machine. If you need to allocate more than ~800M
>> +for x86 or x86_64 then you must use the simple format as the format
>> +',high' conflicts with the separators of ranges.
>>
>>  The syntax is:
>>
>> @@ -282,11 +284,21 @@ Boot into System Kernel
>>  1) Update the boot loader (such as grub, yaboot, or lilo) configuration
>> files as necessary.
>>
>> -2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
>> +2) Boot the system kernel with the boot parameter "crashkernel=Y[@X | 
>> ,high]",
>> where Y specifies how much memory to reserve for the dump-capture kernel
>> -   and X specifies the beginning of this reserved memory. For example,
>> -   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
>> -   starting at physical address 0x0100 (16MB) for the dump-capture 
>> kernel.
>> +   and X specifies the beginning of this reserved memory or ',high' to load 
>> in
>> +   high memory. For example, "crashkernel=64M@16M" tells the system
>> +   kernel to reserve 64 MB of memory starting at physical address
>> +   0x0100 (16MB) for the dump-capture kernel.
>> +
>> +   Specifying "crashkernel=1G,high" tells the system kernel to reserve 1 GB
>> +   of memory using high memory for the dump-capture kernel, there may also
>> +   be some low memory allocated as well. If you need more than ~800M for
>> +   the crash kernel to operate (volumes on FC/iSCSI, large volumes, systemd
>> +   added to the previous, etc), you need to specify ',high' since without
>> +   it crashkerenel has to try and fit under 896M along with some other
>> +   items and will fail to allocate memory. High memory may only be relevant
>> +   on x86 and x86_64.
>>
>> On x86 and x86_64, use "crashkernel=64M@16M".
>>
>> --
>> 2.10.2
>>
>>
>> ___
>> kexec mailing list
>> ke...@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/kexec


Robert LeBlanc
PGP Fingerprint 79A2 9CA4 6CC4 45DD A904  C70E E654 3BB2 FA62 B9F1


Re: [PATCH 1/1] arm64: mm: add config options for page table configuration

2016-12-09 Thread Scott Branden

Hi Will,

On 16-12-09 02:57 AM, Will Deacon wrote:

On Thu, Dec 08, 2016 at 11:33:39AM -0800, Scott Branden wrote:

Since I currently have your attention:  I do think there is fundamental bug
in the ARM64 mm implementation.  If you look at /sys/devices/system/memory
it only shows the last memoryX section created after init.


That directory doesn't seem to exist on my arm64 systems. Do I have to
enable something specific in the .config?
I looked in the /sys/devices/system/memory at it doesn't look like it 
appears until memory hotplug is enabled in the system.  This is another 
patch I'm trying to work through at the same time:

https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1284943.html

The internals of the memory management subsystem is not something I'm 
too familiar with at this point.


Will


Regards,
Scott


Re: [bug report] perf jit: add source line info support

2016-12-09 Thread Stephane Eranian
Hi Dan,

On Wed, Nov 30, 2016 at 10:48 AM, Dan Carpenter
 wrote:
> Hello Stephane Eranian,
>
> The patch 598b7c6919c7: "perf jit: add source line info support" from
> Nov 30, 2015, leads to the following static checker warning:
>
> ./tools/perf/util/genelf_debug.c:211 emit_signed_LEB128()
> warn: potential left shift more than type allows '57'
>
> ./tools/perf/util/genelf_debug.c
>202  static void emit_signed_LEB128(struct buffer_ext *be, long data)
>203  {
>204  int more = 1;
>205  int negative = data < 0;
>206  int size = sizeof(long) * CHAR_BIT;
>207  while (more) {
>208  ubyte cur = data & 0x7F;
>209  data >>= 7;
>210  if (negative)
>211  data |= - (1 << (size - 7));
> ^^^
> This is a no-op on 64 bit systems.  I suspect it's not intentional?
>
I suspect size should be: size = sizeof(int) * CHAR_BIT;
I will test that.

>212  if ((data == 0 && !(cur & 0x40)) ||
>213  (data == -1l && (cur & 0x40)))
>214  more = 0;
>215  else
>216  cur |= 0x80;
>217  buffer_ext_add(be, &cur, 1);
>218  }
>219  }
>
> regards,
> dan carpenter


perf/jit doesn't cope well with mprotect() to jit containing pages

2016-12-09 Thread Andres Freund
Hi,

While working on optionally jit-compiling parts of postgres using llvm
(MCJIT currently, but Orc would have the same issue afaics), I'm trying
to use perf jit support to make profiling of those JITed parts easier.

Turns out the current jit support in perf doesn't work that well for
LLVM - but it doesn't primarily look like LLVM's fault. Syscall-wise
llvm does (heavily filtered):

mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7efd3866e000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 
0x7efd3866d000
mprotect(0x7efd3866e000, 4096, PROT_READ|PROT_EXEC) = 0
mprotect(0x7efd3866d000, 4096, PROT_READ|PROT_EXEC) = 0
write(2, "Function loaded: evalexpr0 at 139626038091776 0x7efd3866e000 len 69", 
68) = 68

mmap(0x7efd3866f000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 
0) = 0x7efd3866c000
mmap(0x7efd3866e000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 
0) = 0x7efd3866b000
mprotect(0x7efd3866c000, 4096, PROT_READ|PROT_EXEC) = 0
mprotect(0x7efd3866b000, 4096, PROT_READ|PROT_EXEC) = 0
write(2, "Function loaded: evalexpr1 at 139626038083584 0x7efd3866c000 len 69", 
68) = 68

...

i.e. it mmaps single pages for the each JITed function's sections. Which
makes sense, because the first function is JITed independently from the
second one.

The corresponding MMAP2 records according to perf perf script
--show-mmap-events are:
postgres  4107 595444.867737: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866e000(0x1000) @ 0x7efd3866e000 00:00 0 0]: ---p //anon
postgres  4107 595444.867825: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866d000(0x2000) @ 0x7efd3866d000 00:00 0 0]: ---p //anon
postgres  4107 595444.884090: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866c000(0x3000) @ 0x7efd3866c000 00:00 0 0]: ---p //anon
postgres  4107 595444.884113: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866b000(0x4000) @ 0x7efd3866b000 00:00 0 0]: ---p //anon
Note how the size of the mapping continually increases, so that the each
MMAP2 record covers previous sections.

If one perf inject --jit into that it looks like:
postgres  4107 595444.867737: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866e000(0x1000) @ 0x7efd3866e000 00:00 0 0]: ---p //anon
postgres  4107 595444.867825: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866d000(0x2000) @ 0x7efd3866d000 00:00 0 0]: ---p //anon
postgres  4107 595444.868140: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866e000(0x45) @ 0x40 fd:02 33434534 1]: --xs 
/home/andres/.debug/jit/llvm-IR-jit-20161209.XXfN0K3O/jitted-4107-1.so
postgres  4107 595444.884090: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866c000(0x3000) @ 0x7efd3866c000 00:00 0 0]: ---p //anon
postgres  4107 595444.884113: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866b000(0x4000) @ 0x7efd3866b000 00:00 0 0]: ---p //anon
postgres  4107 595444.884232: PERF_RECORD_MMAP2 4107/4107: 
[0x7efd3866c000(0x45) @ 0x40 fd:02 33434599 1]: --xs 
/home/andres/.debug/jit/llvm-IR-jit-20161209.XXfN0K3O/jitted-4107-2.so

Note how the first injected record is also covered by the following
"//anon" event.  This leads to the the curious effect that samples for
the first function (evalexpr0) are associated with the right generated
.so, until the second function is JITed.

I hacked up perf inject to omit such MMAP2 records by adding
if (event->mmap2.prot == 0)
return 0;
to perf_event__jit_repipe_mmap2() and suddenly things work.

I presume the increasing MMAP2 size is triggered by the consecutive
pages being represented as a single page-range in the kernel?

If I, to work around such consecutive pages, force another page to be
mmap()ed inbetween, and avoid using MAP_ANONYMOUS, the problem also goes
away.

Am I doing something wrong, or is there a bug here?

FWIW, this is on linux 4.8.8, with perf from master
(v4.9-rc8-108-g810ac7b7558d).

BTW, it's also a bit weird that those MMAP2 records triggered by
mprotect/mmap, have prot set to 0...

Regards,

Andres



[GIT] Networking

2016-12-09 Thread David Miller

1) Limit the number of can filters to avoid > MAX_ORDER allocations.
   Fix from Marc Kleine-Budde.

2) Limit GSO max size in netvsc driver to avoid problems with
   NVGRE configurations.  From Stephen Hemminger.

3) Return proper error when memory allocation fails in
   ser_gigaset_init(), from Dan Carpenter.

4) Missing linkage undo in error paths of ipvlan_link_new(), from Gao
   Feng.

5) Missing necessayr SET_NETDEV_DEV in lantiq and cpmac drivers,
   from Florian Fainelli.

6) Handle probe deferral properly in smsc911x driver.

Please pull, thanks a lot!

The following changes since commit bc3913a5378cd0ddefd1dfec6917cc12eb23a946:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc (2016-12-06 
09:24:11 -0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git 

for you to fetch changes up to d33695fbfab73a4a6550fa5c2d0bacc68d7c5901:

  net: mlx5: Fix Kconfig help text (2016-12-09 23:08:32 -0500)


Alex (1):
  drivers: net: cpsw-phy-sel: Clear RGMII_IDMODE on "rgmii" links

Arjun V (1):
  cxgb4/cxgb4vf: Assign netdev->dev_port with port ID

Christopher Covington (1):
  net: mlx5: Fix Kconfig help text

Dan Carpenter (1):
  ser_gigaset: return -ENOMEM on error instead of success

Daniele Palmas (1):
  NET: usb: cdc_mbim: add quirk for supporting Telit LE922A

David S. Miller (3):
  Merge tag 'linux-can-fixes-for-4.9-20161207' of 
git://git.kernel.org/.../mkl/linux-can
  Merge tag 'linux-can-fixes-for-4.9-20161208' of 
git://git.kernel.org/.../mkl/linux-can
  Merge branch 'ethernet-missing-netdev-parent'

Florian Fainelli (3):
  phy: Don't increment MDIO bus refcount unless it's a different owner
  net: ethernet: lantiq_etop: Call SET_NETDEV_DEV()
  net: ethernet: cpmac: Call SET_NETDEV_DEV()

Gao Feng (1):
  driver: ipvlan: Unlink the upper dev when ipvlan_link_new failed

Linus Walleij (1):
  net: smsc911x: back out silently on probe deferrals

Marc Kleine-Budde (1):
  can: raw: raw_setsockopt: limit number of can_filter that can be set

Peng Tao (1):
  vhost-vsock: fix orphan connection reset

Thomas Falcon (1):
  ibmveth: set correct gso_size and gso_type

stephen hemminger (1):
  netvsc: reduce maximum GSO size

추지호 (1):
  can: peak: fix bad memory access and free sequence

 drivers/isdn/gigaset/ser-gigaset.c  |  4 +++-
 drivers/net/can/usb/peak_usb/pcan_usb_core.c|  6 --
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c  |  1 -
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c |  1 +
 drivers/net/ethernet/ibm/ibmveth.c  | 65 
+++--
 drivers/net/ethernet/ibm/ibmveth.h  |  1 +
 drivers/net/ethernet/lantiq_etop.c  |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig |  2 --
 drivers/net/ethernet/smsc/smsc911x.c|  9 -
 drivers/net/ethernet/ti/cpmac.c |  1 +
 drivers/net/ethernet/ti/cpsw-phy-sel.c  |  1 +
 drivers/net/hyperv/netvsc_drv.c |  5 +
 drivers/net/ipvlan/ipvlan_main.c|  4 +++-
 drivers/net/phy/phy_device.c| 16 +---
 drivers/net/usb/cdc_mbim.c  | 21 +
 drivers/net/usb/cdc_ncm.c   | 14 +-
 drivers/vhost/vsock.c   |  2 +-
 include/linux/usb/cdc_ncm.h |  3 ++-
 include/uapi/linux/can.h|  1 +
 net/can/raw.c   |  3 +++
 21 files changed, 142 insertions(+), 20 deletions(-)


Re: [PATCH 1/1] of: of_reserved_mem: Ensure cma reserved region not cross the low/high memory

2016-12-09 Thread Jason Liu
2016-11-23 19:37 GMT+08:00 Jason Liu :
> Need ensure the cma reserved region not cross the low/high memory boundary
> when using the dynamic allocation methond through device-tree, otherwise,
> kernel will fail to boot up when cma reserved region cross how/high mem.
>
> Signed-off-by: Jason Liu 
> Cc: Laura Abbott 
> Cc: Frank Rowand 
> Cc: Rob Herring 
> Cc: sta...@vger.kernel.org
> ---
>  drivers/of/of_reserved_mem.c| 42 
> +++--
>  include/linux/of_reserved_mem.h |  3 ++-
>  2 files changed, 34 insertions(+), 11 deletions(-)


Rob, any comments about this patch?


Jason Liu

>
> diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
> index 366d8c3..852345a 100644
> --- a/drivers/of/of_reserved_mem.c
> +++ b/drivers/of/of_reserved_mem.c
> @@ -31,11 +31,15 @@
>
>  #if defined(CONFIG_HAVE_MEMBLOCK)
>  #include 
> -int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
> -   phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
> -   phys_addr_t *res_base)
> +int __init __weak early_init_dt_alloc_reserved_memory_arch(unsigned long 
> node,
> +   phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t 
> end,
> +   bool nomap, phys_addr_t *res_base)
>  {
> phys_addr_t base;
> +   phys_addr_t highmem_start;
> +
> +   highmem_start = __pa(high_memory - 1) + 1;
> +
> /*
>  * We use __memblock_alloc_base() because memblock_alloc_base()
>  * panic()s on allocation failure.
> @@ -53,15 +57,33 @@ int __init __weak 
> early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
> return -ENOMEM;
> }
>
> +   /*
> +* Sanity check for the cma reserved region:If the reserved region
> +* crosses the low/high memory boundary, try to fix it up and then
> +* fall back to allocate the cma region from the low mememory space.
> +*/
> +
> +   if (IS_ENABLED(CONFIG_CMA)
> +   && of_flat_dt_is_compatible(node, "shared-dma-pool")
> +   && of_get_flat_dt_prop(node, "reusable", NULL) && !nomap) {
> +   if (base < highmem_start && (base + size) > highmem_start) {
> +   memblock_free(base, size);
> +   base = memblock_alloc_range(size, align, start,
> +   highmem_start, MEMBLOCK_NONE);
> +   if (!base)
> +   return -ENOMEM;
> +   }
> +   }
> +
> *res_base = base;
> if (nomap)
> return memblock_remove(base, size);
> return 0;
>  }
>  #else
> -int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
> -   phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
> -   phys_addr_t *res_base)
> +int __init __weak early_init_dt_alloc_reserved_memory_arch(unsigned long 
> node,
> +   phys_addr_t size, phys_addr_t align, phys_addr_t start, phys_addr_t 
> end,
> +   bool nomap, phys_addr_t *res_base)
>  {
> pr_err("Reserved memory not supported, ignoring region 0x%llx%s\n",
>   size, nomap ? " (nomap)" : "");
> @@ -155,8 +177,8 @@ static int __init __reserved_mem_alloc_size(unsigned long 
> node,
> end = start + dt_mem_next_cell(dt_root_size_cells,
>&prop);
>
> -   ret = early_init_dt_alloc_reserved_memory_arch(size,
> -   align, start, end, nomap, &base);
> +   ret = early_init_dt_alloc_reserved_memory_arch(node,
> +   size, align, start, end, nomap, 
> &base);
> if (ret == 0) {
> pr_debug("allocated memory for '%s' node: 
> base %pa, size %ld MiB\n",
> uname, &base,
> @@ -167,8 +189,8 @@ static int __init __reserved_mem_alloc_size(unsigned long 
> node,
> }
>
> } else {
> -   ret = early_init_dt_alloc_reserved_memory_arch(size, align,
> -   0, 0, nomap, &base);
> +   ret = early_init_dt_alloc_reserved_memory_arch(node,
> +   size, align, 0, 0, nomap, &base);
> if (ret == 0)
> pr_debug("allocated memory for '%s' node: base %pa, 
> size %ld MiB\n",
> uname, &base, (unsigned long)size / SZ_1M);
> diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
> index f8e1992..a6ee451 100644
> --- a/include/linux/of_reserved_mem.h
> +++ b/include/linux/of_reserved_mem.h
> @@ -34,7 +34,8 @@ int of_reserved_mem_device_init_by_idx(struct device *dev,
>struct device_node *np, int idx);
>  void of_reserved_mem

Re: [RFC 0/5] rcu: Introduce leaf_node_for_each_mask_possible_cpu() and its friend

2016-12-09 Thread Paul E. McKenney
On Sat, Dec 10, 2016 at 08:45:38AM +0800, Boqun Feng wrote:
> On Fri, Dec 09, 2016 at 03:49:45PM -0800, Paul E. McKenney wrote:
> > On Fri, Dec 09, 2016 at 04:48:22PM +0800, Boqun Feng wrote:
> > > Hi Paul,
> > > 
> > > While reading the discussion at:
> > > 
> > > https://marc.info/?l=linux-kernel&m=148044253400769
> > 
> > This discussion was for stalls specifically, rather than for routine
> > scans of the bitmasks.
> > 
> > But it does look to save some code, so worth looking into.
> > 
> > > I figured we might use this fact to save some extra checks in RCU core 
> > > code,
> > > currently we iterate over all the possible CPUs on a leaf node, check 
> > > whether
> > > they were masked in a certain mask and do something. However, given the 
> > > fact
> > > that the masks on a leaf node should always be sparse than the 
> > > corresponding
> > > part of cpu_possible_mask, we'd better iterate over all bits in a mask and
> > > check whether the corresponding CPU is possible or not.
> > > 
> > > So I made this RFC, I did a simple build/boot/rcutorture test on my box 
> > > with
> > > SMP=4, nothing bad happens. Currently I'm waiting for the 0day and trying 
> > > to
> > > test this one a bigger system, in the meanwhile, looking forwards to any
> > > comment and suggestion.
> > > 
> > > So thoughts?
> > 
> > By analogy with for_each_cpu() and for_each_possible_cpu(), the name
> > should instead be for_each_leaf_node_cpu(), the tradition of excessively
> > long names in RCU notwithstanding.  ;-)
> > 
> 
> Make sense ;-)
> 
> I think it's more appropriate to call it for_each_leaf_node_mask_cpu(),
> because we don't iterate all cpus of a leaf node. The word "possible"
> could be dropped because obviously we won't iterate over "impossible"
> cpus in a leaf node ;-)

C'mon, Boqun!  The for_each_leaf_node_cpu() is not only consistent
with the for_each_cpu() family, it is shorter!  ;-)

Thanx, Paul

> Will modify that in next version.
> 
> Regards,
> Boqun
> 
> > Thanx, Paul
> > 




Re: netlink: GPF in sock_sndtimeo

2016-12-09 Thread Cong Wang
On Fri, Dec 9, 2016 at 3:01 AM, Richard Guy Briggs  wrote:
> On 2016-12-08 22:57, Cong Wang wrote:
>> On Thu, Dec 8, 2016 at 10:02 PM, Richard Guy Briggs  wrote:
>> > I also tried to extend Cong Wang's idea to attempt to proactively respond 
>> > to a
>> > NETLINK_URELEASE on the audit_sock and reset it, but ran into a locking 
>> > error
>> > stack dump using mutex_lock(&audit_cmd_mutex) in the notifier callback.
>> > Eliminating the lock since the sock is dead anways eliminates the error.
>> >
>> > Is it safe?  I'll resubmit if this looks remotely sane.  Meanwhile I'll 
>> > try to
>> > get the test case to compile.
>>
>> It doesn't look safe, because 'audit_sock', 'audit_nlk_portid' and 
>> 'audit_pid'
>> are updated as a whole and race between audit_receive_msg() and
>> NETLINK_URELEASE.
>
> This is what I expected and why I originally added the mutex lock in the
> callback...  The dumps I got were bare with no wrapper identifying the
> process context or specific error, so I'm at a bit of a loss how to
> solve this (without thinking more about it) other than instinctively
> removing the mutex.

Netlink notifier can safely be converted to blocking one, I will send
a patch.

But I seriously doubt you really need NETLINK_URELEASE here,
it adds nothing but overhead, b/c the netlink notifier is called on
every netlink socket in the system, but for net exit path, that is
relatively a slow path.

Also, kauditd_send_skb() needs audit_cmd_mutex too.

I will send a formal patch.

Thanks.


[PATCH] llist: Clarify comments about when locking is needed

2016-12-09 Thread Joel Fernandes
llist.h comments are a bit confusing about when locking is needed versus when
it isn't. Clarify these comments a bit more and be a bit more descriptive about
why locking is needed for llist_del_first.

Cc: Huang Ying 
Cc: Ingo Molnar 
Cc: Will Deacon 
Cc: Paul McKenney 
Cc: Mathieu Desnoyers 
Signed-off-by: Joel Fernandes 
---
 include/linux/llist.h | 37 +
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/include/linux/llist.h b/include/linux/llist.h
index fd4ca0b..31822bb 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -3,28 +3,33 @@
 /*
  * Lock-less NULL terminated single linked list
  *
- * If there are multiple producers and multiple consumers, llist_add
- * can be used in producers and llist_del_all can be used in
- * consumers.  They can work simultaneously without lock.  But
- * llist_del_first can not be used here.  Because llist_del_first
- * depends on list->first->next does not changed if list->first is not
- * changed during its operation, but llist_del_first, llist_add,
- * llist_add (or llist_del_all, llist_add, llist_add) sequence in
- * another consumer may violate that.
- *
- * If there are multiple producers and one consumer, llist_add can be
- * used in producers and llist_del_all or llist_del_first can be used
- * in the consumer.
- *
- * This can be summarized as follow:
+ * Cases where locking is not needed:
+ * If there are multiple producers and multiple consumers, llist_add can be
+ * used in producers and llist_del_all can be used in consumers simultaneously
+ * without locking. Also a single consumer can use llist_del_first while 
multiple
+ * producers simultaneously use llist_add, without any locking.
+ *
+ * Cases where locking is needed:
+ * If we have multiple consumers with llist_del_first used in one consumer, and
+ * llist_del_first or llist_del_all used in other consumers, then a lock is
+ * needed.  This is because llist_del_first depends on list->first->next not
+ * changing, but without lock protection, there's no way to be sure about that
+ * if a preemption happens in the middle of the delete operation and on being
+ * preempted back, the list->first is the same as before causing the cmpxchg in
+ * llist_del_first to succeed. For example, while a llist_del_first operation
+ * is in progress in one consumer, then - a llist_del_first, llist_add,
+ * llist_add (or llist_del_all, llist_add, llist_add) sequence in another
+ * consumer may cause violations.
+ *
+ * This can be summarized as follows:
  *
  *   |   add| del_first |  del_all
  * add   |- | - | -
  * del_first |  | L | L
  * del_all   |  |   | -
  *
- * Where "-" stands for no lock is needed, while "L" stands for lock
- * is needed.
+ * Where, a particular row's operation can happen concurrently with a column's
+ * operation, with "-" being no lock needed, while "L" being lock is needed.
  *
  * The list entries deleted via llist_del_all can be traversed with
  * traversing function such as llist_for_each etc.  But the list
-- 
2.8.0.rc3.226.g39d4020



Re: [PATCH] net: mlx5: Fix Kconfig help text

2016-12-09 Thread David Miller
From: Christopher Covington 
Date: Fri,  9 Dec 2016 16:53:05 -0500

> Since the following commit, Infiniband and Ethernet have not been
> mutually exclusive.
> 
> Fixes: 4aa17b28 mlx5: Enable mutual support for IB and Ethernet
> 
> Signed-off-by: Christopher Covington 

Applied.


Re: [PATCH net-next] net: macb: Added PCI wrapper for Platform Driver.

2016-12-09 Thread David Miller
From: Bartosz Folta 
Date: Fri, 9 Dec 2016 10:05:46 +

> There are hardware PCI implementations of Cadence GEM network controller. 
> This patch will allow to use such hardware with reuse of existing Platform 
> Driver.

Please properly format your commit message text to 80 columns.

> 
> Signed-off-by: Bartosz Folta 
> ---
>  drivers/net/ethernet/cadence/Kconfig|   9 ++
>  drivers/net/ethernet/cadence/Makefile   |   1 +
>  drivers/net/ethernet/cadence/macb.c |  31 +--
>  drivers/net/ethernet/cadence/macb_pci.c | 152 
> 
>  include/linux/platform_data/macb.h  |   6 ++
>  5 files changed, 194 insertions(+), 5 deletions(-)  create mode 100644 
> drivers/net/ethernet/cadence/macb_pci.c

This patch doesn't apply to net-next, please respin.


Re: [PATCH] x86/smpboot: Make logical package management more robust

2016-12-09 Thread Boris Ostrovsky



On 12/09/2016 06:00 PM, Thomas Gleixner wrote:

On Fri, 9 Dec 2016, Boris Ostrovsky wrote:

On 12/09/2016 05:06 PM, Thomas Gleixner wrote:

On Thu, 8 Dec 2016, Thomas Gleixner wrote:

Boris, can you please verify if that makes the
topology_update_package_map() call which you placed into the Xen cpu
starting code obsolete ?


Will do. I did test your patch but without removing
topology_update_package_map() call. It complained about package IDs
being wrong, but that's expected until I fix Xen part.


That should not longer be the case as I changed the approach to that
management thing.



I didn't notice this email before I sent the earlier message.

Is these anything else besides this patch that I should use? I applied 
it to Linus tree and it didn't apply cleanly (there was some fuzz and 
such) so I wonder whether I am missing something.


-boris


Re: [PATCH] x86/smpboot: Make logical package management more robust

2016-12-09 Thread Boris Ostrovsky



On 12/09/2016 06:02 PM, Boris Ostrovsky wrote:

On 12/09/2016 05:06 PM, Thomas Gleixner wrote:

On Thu, 8 Dec 2016, Thomas Gleixner wrote:

Boris, can you please verify if that makes the
topology_update_package_map() call which you placed into the Xen cpu
starting code obsolete ?


Will do. I did test your patch but without removing
topology_update_package_map() call. It complained about package IDs
being wrong, but that's expected until I fix Xen part.


Ignore my statement about earlier testing --- it was all on single-node 
machines.


Something is broken with multi-node on Intel, but failure modes are 
different. Prior to this patch build_sched_domain() reports an error and 
pretty soon we crash in scheduler (don't remember off the top of my 
head). With patch applied I crash mush later, when one of the drivers 
does kmalloc_node(.., cpu_to_node(cpu)) and cpu_to_node() returns 1, 
which should never happen ("x86: Booted up 1 node, 32 CPUs" is reported, 
for example).


2-node AMD box doesn't have these problems.

I haven't upgraded the Intel machine for about a month but this all must 
have happened in 4.9 timeframe.


So I can't answer your question since we clearly have other problems on 
Xen. I will be looking into this.


-boris


Re: [RFC][PATCHv5 3/7] printk: introduce per-cpu safe_print seq buffer

2016-12-09 Thread Sergey Senozhatsky
On (12/09/16 17:46), Petr Mladek wrote:
> > -/*
> > - * Safe printk() for NMI context. It uses a per-CPU buffer to
> > - * store the message. NMIs are not nested, so there is always only
> > - * one writer running. But the buffer might get flushed from another
> > - * CPU, so we need to be careful.
> > - */
> 
> We should keep/create a good description here because the function
> has a non-trivial code. What about something like?
> 

which is really not related to this patch set.


> >  * Make sure that all old data have been read before the buffer was
> > @@ -261,14 +263,95 @@ void printk_safe_flush_on_panic(void)
> > printk_safe_flush();
> >  }
> >  
> > +#ifdef CONFIG_PRINTK_NMI
> > +/*
> > + * Safe printk() for NMI context. It uses a per-CPU buffer to
> > + * store the message. NMIs are not nested, so there is always only
> > + * one writer running. But the buffer might get flushed from another
> > + * CPU, so we need to be careful.
> > + */
> 
> Hmm, I wanted to describe why we need another per-CPU buffer in NMI
> and I am not sure that we really need it.

NMI-printk can interrupt safe-printk's vsnprintf() in the middle of
the "while (*fmt)" loop: safe-priNMI-PRINTK


-ss


[PATCH] drm/bridge: analogix_dp: set the DPCD600 during disabling the psr

2016-12-09 Thread Caesar Wang
Look likes, the BOE panel FW didn't ack the DPCD600 signal from the host
device, that will cause the panel hang on the startup display.
The root cause we use the fast link mode during enter and exit the psr,
this issue is gone if switching the fast link to main link mode.

Signed-off-by: Caesar Wang 
---

 drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c 
b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 6e0447f..6a5347b 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -133,6 +133,7 @@ int analogix_dp_disable_psr(struct device *dev)
 {
struct analogix_dp_device *dp = dev_get_drvdata(dev);
struct edp_vsc_psr psr_vsc;
+   int ret;
 
if (!dp->psr_support)
return -EINVAL;
@@ -147,6 +148,10 @@ int analogix_dp_disable_psr(struct device *dev)
psr_vsc.DB0 = 0;
psr_vsc.DB1 = 0;
 
+   ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D0);
+   if (ret != 1)
+   dev_err(dp->dev, "Failed to set DP Power0 %d\n", ret);
+
analogix_dp_send_psr_spd(dp, &psr_vsc);
return 0;
 }
-- 
2.7.4



Re: [PATCH] Add +~800M crashkernel explaination

2016-12-09 Thread Baoquan He
On 12/09/16 at 05:22pm, Robert LeBlanc wrote:
> When trying to configure crashkernel greater than about 800 MB, the
> kernel fails to allocate memory on x86 and x86_64. This is due to an
> undocumented limit that the crashkernel and other low memory items must
> be allocated below 896 MB unless the ",high" option is given. This
> updates the documentation to explain this and what I understand the
> limitations to be on the option.

This is true, but not very accurate. You found it's about 800M, it's
becasue usually the current kernel need about 40M space to run, and some
extra reservation before reserve_crashkernel invocation, another ~10M.
However it's normal case, people may build modules into or have some
special code to bloat kernel. This patch makes sense to address the
low|high issue, it might be not good so determined to say ~800M.

> 
> Signed-off-by: Robert LeBlanc 
> ---
>  Documentation/kdump/kdump.txt | 22 +-
>  1 file changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
> index b0eb27b..aa3efa8 100644
> --- a/Documentation/kdump/kdump.txt
> +++ b/Documentation/kdump/kdump.txt
> @@ -256,7 +256,9 @@ While the "crashkernel=size[@offset]" syntax is 
> sufficient for most
>  configurations, sometimes it's handy to have the reserved memory dependent
>  on the value of System RAM -- that's mostly for distributors that pre-setup
>  the kernel command line to avoid a unbootable system after some memory has
> -been removed from the machine.
> +been removed from the machine. If you need to allocate more than ~800M
> +for x86 or x86_64 then you must use the simple format as the format
> +',high' conflicts with the separators of ranges.
>  
>  The syntax is:
>  
> @@ -282,11 +284,21 @@ Boot into System Kernel
>  1) Update the boot loader (such as grub, yaboot, or lilo) configuration
> files as necessary.
>  
> -2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
> +2) Boot the system kernel with the boot parameter "crashkernel=Y[@X | 
> ,high]",
> where Y specifies how much memory to reserve for the dump-capture kernel
> -   and X specifies the beginning of this reserved memory. For example,
> -   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
> -   starting at physical address 0x0100 (16MB) for the dump-capture 
> kernel.
> +   and X specifies the beginning of this reserved memory or ',high' to load 
> in
> +   high memory. For example, "crashkernel=64M@16M" tells the system
> +   kernel to reserve 64 MB of memory starting at physical address
> +   0x0100 (16MB) for the dump-capture kernel.
> +
> +   Specifying "crashkernel=1G,high" tells the system kernel to reserve 1 GB
> +   of memory using high memory for the dump-capture kernel, there may also
> +   be some low memory allocated as well. If you need more than ~800M for
> +   the crash kernel to operate (volumes on FC/iSCSI, large volumes, systemd
> +   added to the previous, etc), you need to specify ',high' since without
> +   it crashkerenel has to try and fit under 896M along with some other
> +   items and will fail to allocate memory. High memory may only be relevant
> +   on x86 and x86_64.
>  
> On x86 and x86_64, use "crashkernel=64M@16M".
>  
> -- 
> 2.10.2
> 
> 
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec


Re: [PATCH 1/2] net: ethernet: sxgbe: remove private tx queue lock

2016-12-09 Thread Lino Sanfilippo
Hi,

On 09.12.2016 12:21, Pavel Machek wrote:
> On Fri 2016-12-09 00:19:43, Francois Romieu wrote:
>> Lino Sanfilippo  :
>> [...]
>> > OTOH Pavel said that he actually could produce a deadlock. Now I wonder if
>> > this is caused by that locking scheme (in a way I have not figured out yet)
>> > or if it is a different issue.
>> 
>> stmmac_tx_err races with stmmac_xmit.
> 
> Umm, yes, that looks real.
> 
> And that means that removing tx_lock will not be completely trivial
> :-(. Lino, any ideas there?
> 

Ok, the race is there but it looks like a problem that is not related to 
the use or removal of the private lock.
By a glimpse into other drivers (e.g sky2 or e1000), a possible way to handle a 
tx error is to start a separate task and restart the tx path in that task 
instead
the irq handler (or timer in case of the watchdog).

In that task we could do:
1. deactivate napi
2. deactivate irqs
3. wait for running napi/irqs do complete (_sync)
4. call stmmac_tx_err()
5. reenable napi
6. reenable irqs

We have to ensure that no xmit() is executing while stmmac_tx_err() does the 
cleanup,
so stmmac_tx_err() should IMO rather call netif_tx_disable() instead of 
netif_stop_queue()
(the former grabs the xmit lock before it sets __QUEUE_STATE_DRV_XOFF to disable
the queue).

Regards,
Lino


Re: [PATCH net-next 1/2] net: phy: add extension of phy-mode for XLGMII

2016-12-09 Thread Jie Deng


On 2016/12/10 0:39, Andrew Lunn wrote:
> On Fri, Dec 09, 2016 at 01:19:07PM +0800, Jie Deng wrote:
>>
>> On 2016/12/9 6:15, Florian Fainelli wrote:
>>> On 12/06/2016 07:57 PM, Jie Deng wrote:
 This patch adds phy-mode support for Synopsys XLGMAC
>>> The functional changes look good, but I would like to see some
>>> description of what the XL part stands for here.
>>>
>>> While you are modifying this, do you also mind submitting a Device Tree
>>> specification change:
>>>
>>> https://www.devicetree.org/specifications/
>>>
>>> Thanks!
>> Thank you for the information.
>>
>> Currenlty, the XLGMAC is a new IP from Synopsys.
> I think Florian wants to know about the IEEE standard or what ever
> which defines what the phy-mode XLGMAC is, in the same way there are
> standards for RGMII, SGMII, etc.
>
> Andrew
Understood! Thank you !


Re: [PATCH v2 2/2] staging: iio: ad7606: move out of staging

2016-12-09 Thread kbuild test robot
Hi Eva,

[auto build test WARNING on iio/togreg]
[also build test WARNING on next-20161209]
[cannot apply to v4.9-rc8]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Eva-Rachel-Retuya/staging-iio-ad7606-move-driver-out-of-staging/20161210-041408
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git togreg
config: xtensa-allyesconfig (attached as .config)
compiler: xtensa-linux-gcc (GCC) 4.9.0
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=xtensa 

All warnings (new ones prefixed by >>):

   In file included from ./arch/xtensa/include/generated/asm/div64.h:1:0,
from include/linux/kernel.h:142,
from include/linux/interrupt.h:5,
from drivers/iio/adc/ad7606.c:9:
   drivers/iio/adc/ad7606.c: In function 'ad7606_probe':
   include/asm-generic/div64.h:207:28: warning: comparison of distinct pointer 
types lacks a cast
 (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
   ^
   drivers/iio/adc/ad7606.c:440:27: note: in expansion of macro 'do_div'
  st->scale_avail[i][1] = do_div(scale, 1) * 10;
  ^
>> drivers/iio/adc/ad7606.c:440:3: warning: right shift count >= width of type
  st->scale_avail[i][1] = do_div(scale, 1) * 10;
  ^
   In file included from ./arch/xtensa/include/generated/asm/div64.h:1:0,
from include/linux/kernel.h:142,
from include/linux/interrupt.h:5,
from drivers/iio/adc/ad7606.c:9:
   include/asm-generic/div64.h:224:11: warning: passing argument 1 of 
'__div64_32' from incompatible pointer type
  __rem = __div64_32(&(n), __base); \
  ^
   drivers/iio/adc/ad7606.c:440:27: note: in expansion of macro 'do_div'
  st->scale_avail[i][1] = do_div(scale, 1) * 10;
  ^
   include/asm-generic/div64.h:198:17: note: expected 'uint64_t *' but argument 
is of type 'unsigned int *'
extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
^

vim +440 drivers/iio/adc/ad7606.c

b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
424  
e61181d0 drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-05-18  
425   st = iio_priv(indio_dev);
e61181d0 drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-05-18  
426  
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
427   st->dev = dev;
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
428   st->bops = bops;
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
429   st->base_address = base_address;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
430   /* tied to logic low, analog input range is +/- 5V */
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
431   st->range = 0;
e79e8027 drivers/staging/iio/adc/ad7606_core.c Lars-Peter Clausen 2016-10-19  
432   st->oversampling = 1;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
433   /* Populate the scales, 2.5/2**16 then 5/2**16 */
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
434   range = 5000;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
435   for (i = 0, j = 1; i < ARRAY_SIZE(st->scale_avail); i++, j--) {
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
436   scale = ((u64)range * 1) >>
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
437   ad7606_channels[1].scan_type.realbits;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
438   scale >>= j;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
439  
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09 
@440   st->scale_avail[i][1] = do_div(scale, 1) * 10;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
441   st->scale_avail[i][0] = scale;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
442   }
fa23105f drivers/staging/iio/adc/ad7606.c  Lars-Peter Clausen 2016-10-19  
443   INIT_WORK(&st->poll_work, &ad7606_poll_bh_to_ring);
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
444  
12

Re: [PATCH] Input: i8042-x86ia64io.h - Comment else/endif of CONFIG_PNP

2016-12-09 Thread Dmitry Torokhov
On Fri, Dec 09, 2016 at 09:55:09PM -0200, Marcos Paulo de Souza wrote:
> As this define check if huge, this makes easier to read the code.
> 
> Signed-off-by: Marcos Paulo de Souza 

Applied, thank you.

> ---
>  While reviewing patches from Dmitry about presence of 8042, it makes it
>  much easier to understand the ifdefs...
> 
>  drivers/input/serio/i8042-x86ia64io.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/input/serio/i8042-x86ia64io.h 
> b/drivers/input/serio/i8042-x86ia64io.h
> index 073246c..ddd3132 100644
> --- a/drivers/input/serio/i8042-x86ia64io.h
> +++ b/drivers/input/serio/i8042-x86ia64io.h
> @@ -1131,10 +1131,10 @@ static int __init i8042_pnp_init(void)
>   return 0;
>  }
>  
> -#else
> +#else  /* !CONFIG_PNP */
>  static inline int i8042_pnp_init(void) { return 0; }
>  static inline void i8042_pnp_exit(void) { }
> -#endif
> +#endif /* CONFIG_PNP */
>  
>  static int __init i8042_platform_init(void)
>  {
> -- 
> 2.9.3
> 

-- 
Dmitry


Re: [PATCH] driver core: flush async calls before testing driver removal

2016-12-09 Thread Dmitry Torokhov
On Fri, Dec 9, 2016 at 4:15 PM, Vladimir Zapolskiy  wrote:
> If CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled a number of false
> positives are reported for ATA controller drivers, because ATA port
> probes are done asynchronously, and the same problem may also touch
> other asynchronously probed drivers.
>
> To reduce the rate of false reports on boot call async_synchronize_full()
> before attempting to remove a driver, the same is done in delete_module()
> syscall for all possible drivers and in __device_release_driver() function
> for asynchronously probed drivers.

I'd say CONFIG_DEBUG_TEST_DRIVER_REMOVE did what it was supposed to do
and uncovered a big in ATA drivers. Since driver core did not
asynchronously scheduled those actions it should not wait for their
completion either, but either ATA core or drivers should wait for
probing to complete before allowing remove() methods to run.

>
> Fixes: bea5b158ff0d ("driver core: add test of driver remove calls during 
> probe")
> Suggested-by: Tejun Heo 
> Signed-off-by: Vladimir Zapolskiy 
> ---
> Some time ago the issue was discussed on the linux-ide mailing list, see
>
>   https://www.spinics.net/lists/linux-ide/msg53481.html
>
>  drivers/base/dd.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index d76cd97..a4feecf 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -384,6 +384,8 @@ static int really_probe(struct device *dev, struct 
> device_driver *drv)
> if (test_remove) {
> test_remove = false;
>
> +   async_synchronize_full();
> +
> if (dev->bus->remove)
> dev->bus->remove(dev);
> else if (drv->remove)
> --
> 2.10.2
>

Thanks.

-- 
Dmitry


Re: [PATCH] x86/tsc: RFC: re-synchronize TSCs to boot cpu TSC

2016-12-09 Thread Roland Scheidegger
Am 09.12.2016 um 23:59 schrieb Thomas Gleixner:
> On Fri, 9 Dec 2016, Roland Scheidegger wrote:
> 
> Cc'ed someone from Dell. 
> 
>> Am 09.12.2016 um 18:33 schrieb Thomas Gleixner:
>>> Can you add the patch below to gather more information? There is a hunk in
>>> there with an '#if 0' which sets the TSC ADJUST to 0 on boot, which you can
>>> turn on as second step.
>>
>> Ok, here's the results:
>> ...
>> TSC ADJUST synchronize: Reference CPU0: -2820267100 CPU1: -2822498296
>> TSC target sync skipped
>> smpboot: Vector locked
>> smpboot: Vector setup done
>> smpboot: Clock setup
>> TSC source sync skipped
>> smpboot: Target CPU is online
> 
> I did not expect that to happen. Now I'm puzzled and curious where the
> machine gets lost after that. See below.
> 
>> With the #if 0 block activated, it boots up fine, the output was:
> 
> That does not make any sense at all, but yes, nothing in this context makes
> sense.
> 
>> [1.038892] x86: Booting SMP configuration:
>> [1.038930]  node  #0, CPUs:#1
>> [0.171851] TSC ADJUST: CPU1: -2830353064 218577682002
>> [1.117495] TSC source sync 0 -> 1 runs 3
>> [0.171852] TSC ADJUST differs: Reference CPU0: -2828600940 CPU1:
>> -2830353064
>> [0.171853] TSC ADJUST synchronize: Reference CPU0: 0 CPU1: -2830353064
>> [1.117497] TSC target sync skip
> 
>> (And fwiw with my quick hack the lockups disappear to when I change that
>> back to blast a zero into TSC_ADJ for all cpus.)
> 
> Right, That's what that hunk does as well.
> 
> Now what's interesting is that the adjustement of CPU1 in the non write to
> zero case results in the following:
> 
> TSC ADJUST: CPU1: -2830353064 218577682002 <-- TSC value
> TSC ADJUST differs: Reference CPU0: -2828600940 CPU1: -2830353064
> 
> We write CPU1 adjust register to -2828600940 which makes the TSC on CPU1
> jump forwards by -2828600940 - -2830353064 = 1752124 cycles.
> 
> In the write to zero case the jump is forward as well, but this time it's
> huge, i.e. 2830353064 cycles.
> 
> I tried to wreckage the TSC by writing similar values to the adjust MSR on
> early boot, but independent of the values and independent of the write to
> zero part the machine comes up happily all the time.
> 
> The only difference is that my machine has a somewhat saner BIOS. So the
> thing might just die in the value add SMM crap, but who knows.
> 
> In the patch below is another bunch of debug prints which emit the state
> information of CPU1 during bringup. Maybe that gives a hint where the
> system gets stuck when you disable the 'write to zero' magic again.
> 
> The NMI watchdog does not catch anything, right?
Nope. (Though as mentioned earlier, with my hack when not writing zero
it did - but the lockup there was later after all 16 cpus were online,
and I only really tried that with the ubuntu 4.4 kernel. I never got to
see the full output from that NMI though due to limited screen space, my
attempts to try anything different than text mode were met with a blank
screen, and from the parts I did see I didn't really see anything
interesting albeit that's not saying much as I really have no idea about
that code...)

With the new patch here's the output (albeit the typing gets a bit
annoying...)
...
Invoking state 32 CB replay_prepare_cpu+0x0/0xe0
CB returned 0
Invoking state 35 CB rcutree_prepare_cpu+0x0/0x50
CB returned 0
Invoking state 41 CB notify_prepare+0x0/0xa0
CBreturned 0
Invoking state 48 CB bringup_cpu+0x0/0x90
x86: Booting SMP configuration:
 node  #0, CPUs:   #1
Invoking state 51 CB sched_cpu_starting+0x0/0x60
CB returned 0
Invoking state 62 CB x86_pmu_starting_cpu+0x0/0x20
CB returned 0
TSC ADJUST: CPU1: -2846131604 175264328618
TSC ADJUST differs: Reference CPU0: -2843967660 CPU1: -2846131604
TSC ADJUST synchronize: Reference CPU0: -2843967660 CPU1: -2846131604
TSC target sync skip
smpboot: Vector locked
TSC source sync 0 -> 1 runs 3
smpboot: Vector setup done
smpboot: Clock setup
TSC source sync skipped
smpboot: Target CPU is online


> 
>> The system also came back up fine from suspend with this (well - still
>> minus graphics...), however disabled tsc clocksource:
>>
>> [  579.931739] Enabling non-boot CPUs ...
>> [  579.943107] smpboot: Booting Node 0 Processor 1 APIC 0x2
>> [  579.943189] TSC ADJUST: CPU1: -1504429974 21601834126
> 
> Fun, yet another adjust value. Are they set by a random number generator?
> 
>> [  579.944093] CPU1 is up
> 
>> [  580.458983] clocksource: timekeeping watchdog on CPU1: Marking
>> clocksource 'tsc' as unstable because the skew is too large:
>> [  580.458985] clocksource:   'hpet' wd_now: 587c1
>> wd_last: 437c7 mask: 
>> [  580.458986] clocksource:   'tsc' cs_now:
>> 563963cd8 cs_last: 508f5a02a mask: 
> 
> Ok, that's caused by the fact that we do not sanitize the TSC adjust
> register on the boot CPU in the resume path.
Yep. (Which is why my hack hacked the restore path as well - I am still
really

Re: [PATCH 4/5] 9p: introduce async read requests

2016-12-09 Thread Al Viro
On Thu, Dec 08, 2016 at 12:59:05PM -0800, Stefano Stabellini wrote:


> + } else {
> + req = p9_client_get_req(clnt, P9_TREAD, "dqd", 
> fid->fid, offset, rsize);
> + if (IS_ERR(req)) {
> + *err = PTR_ERR(req);
> + break;
> + }
> + req->rsize = iov_iter_get_pages_alloc(to, 
> &req->pagevec, 
> + (size_t)rsize, &req->offset);
> + req->kiocb = iocb;
> + for (i = 0; i < req->rsize; i += PAGE_SIZE)
> + 
> page_cache_get_speculative(req->pagevec[i/PAGE_SIZE]);
> + req->callback = p9_client_read_complete;
> +
> + *err = clnt->trans_mod->request(clnt, req);
> + if (*err < 0) {
> + clnt->status = Disconnected;
> + release_pages(req->pagevec,
> + (req->rsize + PAGE_SIZE - 1) / 
> PAGE_SIZE,
> + true);
> + kvfree(req->pagevec);
> + p9_free_req(clnt, req);
> + break;
> + }
> +
> + *err = -EIOCBQUEUED;

IDGI.  AFAICS, your code will result in shitloads of short reads - every
time when you give it a multi-iovec array, only the first one will be
issued and the rest won't be even looked at.  Sure, it is technically
legal, but I very much doubt that aio users will be happy with that.

What am I missing here?


Re: [PATCH 09/10] s390/cputime: delayed accounting of system time

2016-12-09 Thread Frederic Weisbecker
On Tue, Dec 06, 2016 at 03:32:22AM +0100, Frederic Weisbecker wrote:
> From: Martin Schwidefsky 
> 
> The account_system_time() function is called with a cputime that
> occurred while running in the kernel. The function detects which
> context the CPU is currently running in and accounts the time to
> the correct bucket. This forces the arch code to account the
> cputime for hardirq and softirq immediately.
> 
> Such accounting function can be costly and perform unwelcome divisions
> and multiplications, among others.
> 
> The arch code can delay the accounting for system time. For s390
> the accounting is done once per timer tick and for each task switch.
> 
> Signed-off-by: Martin Schwidefsky 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Heiko Carstens 
> Cc: Martin Schwidefsky 
> Cc: Tony Luck 
> Cc: Fenghua Yu 
> Cc: Peter Zijlstra 
> Cc: Rik van Riel 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: Stanislaw Gruszka 
> Cc: Wanpeng Li 
> [rebase against latest cputime tree, massaged changelog accordingly]
> Signed-off-by: Frederic Weisbecker 

Looking at this patch again, I think I need to do another pass on it.
Comments below:

>  /*
>   * Update process times based on virtual cpu times stored by entry.S
>   * to the lowcore fields user_timer, system_timer & steal_clock.
>   */
>  static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
>  {
> - u64 timer, clock, user, system, steal;
> - u64 user_scaled, system_scaled;
> + u64 timer, clock, user, guest, system, hardirq, softirq, steal;
>  
>   timer = S390_lowcore.last_update_timer;
>   clock = S390_lowcore.last_update_clock;
> @@ -110,36 +119,57 @@ static int do_account_vtime(struct task_struct *tsk, 
> int hardirq_offset)
>  #endif
>   : "=m" (S390_lowcore.last_update_timer),
> "=m" (S390_lowcore.last_update_clock));
> - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
> - S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
> + clock = S390_lowcore.last_update_clock - clock;
> + timer -= S390_lowcore.last_update_timer;
> +
> + if ((tsk->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
> + S390_lowcore.guest_timer += timer;
> + else if (hardirq_count() - hardirq_offset)
> + S390_lowcore.hardirq_timer += timer;

We should get rid of the hardirq_offset argument, it doesn't really make sense
anymore. Also it makes the accounting buggy now. It's called from the tick
through account_user_time() with hardirq_offset=1, so the irq time is 
incorrectly
accumulated as system time. Guest time may be incorrect too.

In fact it may have been buggy even before this patchset because 
vtime_account_user()
isn't only called from the tick but also from task switch, and hardirq_offset 
remains 1
for those two cases. Not good.

> + else if (in_serving_softirq())
> + S390_lowcore.softirq_timer += timer;
> + else
> + S390_lowcore.system_timer += timer;
>  
>   /* Update MT utilization calculation */
>   if (smp_cpu_mtid &&
>   time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies)))
>   update_mt_scaling();
>  
> + /* Calculate cputime delta */
>   user = S390_lowcore.user_timer - tsk->thread.user_timer;
> - S390_lowcore.steal_timer -= user;
>   tsk->thread.user_timer = S390_lowcore.user_timer;
> -
> + guest = S390_lowcore.guest_timer - tsk->thread.guest_timer;
> + tsk->thread.guest_timer = S390_lowcore.guest_timer;
>   system = S390_lowcore.system_timer - tsk->thread.system_timer;
> - S390_lowcore.steal_timer -= system;
>   tsk->thread.system_timer = S390_lowcore.system_timer;
> + hardirq = S390_lowcore.hardirq_timer - tsk->thread.hardirq_timer;
> + tsk->thread.hardirq_timer = S390_lowcore.hardirq_timer;
> + softirq = S390_lowcore.softirq_timer - tsk->thread.softirq_timer;
> + tsk->thread.softirq_timer = S390_lowcore.softirq_timer;
> + S390_lowcore.steal_timer +=
> + clock - user - guest - system - hardirq - softirq;
>  
> - user_scaled = user;
> - system_scaled = system;
> - /* Do MT utilization scaling */
> - if (smp_cpu_mtid) {
> - u64 mult = __this_cpu_read(mt_scaling_mult);
> - u64 div = __this_cpu_read(mt_scaling_div);
> + /* Push account value */
> + if (user) {
> + account_user_time(tsk, user);
> + tsk->utimescaled += scale_vtime(user);
> + }
>  
> - user_scaled = (user_scaled * mult) / div;
> - system_scaled = (system_scaled * mult) / div;
> + if (guest) {
> + account_guest_time(tsk, guest);
> + tsk->utimescaled += scale_vtime(guest);
>   }
> - account_user_time(tsk, user);
> - tsk->utimescaled += user_scaled;
> - account_system_time(tsk, hardirq_offset, system);
> - tsk->stimescaled += system_scal

Re: [PATCH 0/2] Determine kernel text mapping size at runtime for x86_64

2016-12-09 Thread Baoquan He
On 12/08/16 at 02:00pm, Dave Anderson wrote:
> 
> 
> - Original Message -
> > On Wed, Dec 7, 2016 at 11:56 PM, Baoquan He  wrote:
> > > Dave Anderson ever told in Crash utility he makes judgement whether it's
> > > a kaslr kernel by size of KERNEL_IMAGE_SIZE. As long as it's 1G, it's
> > > recognized as kaslr. Then the current upstream kernel has a wrong 
> > > behaviour,
> > > it sets KERNEL_IMAGE_SIZE as 1G as long as CONFIG_RANDOMIZE_BASE is 
> > > enabled,
> > > though people specify "nokaslr" into cmdline to disable kaslr explicitly.
> > 
> > I'm not sure that's the correct solution to the Crash utility -- the
> > kaslr-ness of a kernel should be already exposed in the dump with the
> > kaslr_enabled variable yes?
> 
> The crash utility doesn't use KERNEL_IMAGE_SIZE to determine whether
> KASLR is in play, but rather to determine the base of the modules virtual
> address space (i.e, the same way the kernel does).  And then it uses that
> value in a couple other places.

Then I got it wrong.

The current code makes it the same:

#define MODULES_VADDR(__START_KERNEL_map + KERNEL_IMAGE_SIZE)

With change, Crash doesn't need to change.

Thanks
Baoquan

> 
> 
> > 
> > > So in this patchset, made changes to determine the size of kernel text
> > > mapping
> > > area at runtime. If "nokaslr" specified, kernel mapping size is 512M 
> > > though
> > > CONFIG_RANDOMIZE_BASE is enabled.
> > 
> > This seems to make the non-KASLR case more consistent, so I'm fine
> > with the idea. Once the build-bots are happy with everything, consider
> > the series:
> > 
> > Acked-by: Kees Cook 
> > 
> > Thanks!
> > 
> > -Kees
> > 
> > >
> > > Baoquan He (2):
> > >   x86/64: Make kernel text mapping always take one whole page table in
> > > early boot code
> > >   x86/KASLR/64: Determine kernel text mapping size at runtime
> > >
> > >  arch/x86/boot/compressed/kaslr.c| 15 ++-
> > >  arch/x86/include/asm/kaslr.h|  1 +
> > >  arch/x86/include/asm/page_64_types.h| 20 
> > >  arch/x86/include/asm/pgtable_64_types.h |  2 +-
> > >  arch/x86/kernel/head64.c| 11 ++-
> > >  arch/x86/kernel/head_64.S   | 16 +---
> > >  arch/x86/mm/dump_pagetables.c   |  3 ++-
> > >  arch/x86/mm/init_64.c   |  2 +-
> > >  arch/x86/mm/physaddr.c  |  6 +++---
> > >  9 files changed, 45 insertions(+), 31 deletions(-)
> > >
> > > --
> > > 2.5.5
> > >
> > 
> > 
> > 
> > --
> > Kees Cook
> > Nexus Security
> > 


Re: [PATCH 7/7] hwrng: core: Remove two unused include

2016-12-09 Thread kbuild test robot
Hi Corentin,

[auto build test ERROR on char-misc/char-misc-testing]
[also build test ERROR on v4.9-rc8 next-20161209]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Corentin-Labbe/hwrng-core-do-not-use-multiple-blank-lines/20161210-072632
config: i386-randconfig-x007-201649 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All error/warnings (new ones prefixed by >>):

   In file included from include/linux/linkage.h:4:0,
from include/linux/kernel.h:6,
from include/linux/delay.h:10,
from drivers/char/hw_random/core.c:13:
   drivers/char/hw_random/core.c: In function 'rng_dev_open':
>> drivers/char/hw_random/core.c:169:11: error: dereferencing pointer to 
>> incomplete type 'struct file'
 if ((filp->f_mode & FMODE_READ) == 0)
  ^
   include/linux/compiler.h:149:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> drivers/char/hw_random/core.c:169:2: note: in expansion of macro 'if'
 if ((filp->f_mode & FMODE_READ) == 0)
 ^~
>> drivers/char/hw_random/core.c:169:22: error: 'FMODE_READ' undeclared (first 
>> use in this function)
 if ((filp->f_mode & FMODE_READ) == 0)
 ^
   include/linux/compiler.h:149:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> drivers/char/hw_random/core.c:169:2: note: in expansion of macro 'if'
 if ((filp->f_mode & FMODE_READ) == 0)
 ^~
   drivers/char/hw_random/core.c:169:22: note: each undeclared identifier is 
reported only once for each function it appears in
 if ((filp->f_mode & FMODE_READ) == 0)
 ^
   include/linux/compiler.h:149:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
>> drivers/char/hw_random/core.c:169:2: note: in expansion of macro 'if'
 if ((filp->f_mode & FMODE_READ) == 0)
 ^~
>> drivers/char/hw_random/core.c:171:21: error: 'FMODE_WRITE' undeclared (first 
>> use in this function)
 if (filp->f_mode & FMODE_WRITE)
^
   include/linux/compiler.h:149:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^~~~
   drivers/char/hw_random/core.c:171:2: note: in expansion of macro 'if'
 if (filp->f_mode & FMODE_WRITE)
 ^~
   drivers/char/hw_random/core.c: In function 'rng_dev_read':
>> drivers/char/hw_random/core.c:221:23: error: 'O_NONBLOCK' undeclared (first 
>> use in this function)
!(filp->f_flags & O_NONBLOCK));
  ^~
   drivers/char/hw_random/core.c: At top level:
>> drivers/char/hw_random/core.c:272:21: error: variable 'rng_chrdev_ops' has 
>> initializer but incomplete type
static const struct file_operations rng_chrdev_ops = {
^~~
>> drivers/char/hw_random/core.c:273:2: error: unknown field 'owner' specified 
>> in initializer
 .owner  = THIS_MODULE,
 ^
   In file included from include/linux/linkage.h:6:0,
from include/linux/kernel.h:6,
from include/linux/delay.h:10,
from drivers/char/hw_random/core.c:13:
   include/linux/export.h:37:21: warning: excess elements in struct initializer
#define THIS_MODULE ((struct module *)0)
^
>> drivers/char/hw_random/core.c:273:12: note: in expansion of macro 
>> 'THIS_MODULE'
 .owner  = THIS_MODULE,
   ^~~
   include/linux/export.h:37:21: note: (near initialization for 
'rng_chrdev_ops')
#define THIS_MODULE ((struct module *)0)
^
>> drivers/char/hw_random/core.c:273:12: note: in expansion of macro 
>> 'THIS_MODULE'
 .owner  = THIS_MODULE,
   ^~~
>> drivers/char/hw_random/core.c:274:2: error: unknown field 'open' specified 
>> in initializer
 .open  = rng_dev_open,
 ^
>> drivers/char/hw_random/core.c:274:11: warning: excess elements in struct 
>> initializer
 .open  = rng_dev_open,
  ^~~~
   drivers/char/hw_random/core.c:274:11: note: (near initialization for 
'rng_chrdev_ops')
>> d

[PATCH] jump label: pass kbuild_cflags when checking for asm goto support

2016-12-09 Thread David Lin
Some versions of ARM GCC compiler such as Android toolchain throws in a
'-fpic' flag by default. This causes the gcc-goto check script to fail
although some config would have '-fno-pic' flag in the KBUILD_CFLAGS.

This patch passes the KBUILD_CFLAGS to the check script so that the
script does not rely on the default config from different compilers.

Signed-off-by: David Lin 
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 694111b..f667daa 100644
--- a/Makefile
+++ b/Makefile
@@ -790,7 +790,7 @@ KBUILD_CFLAGS   += $(call 
cc-option,-Werror=incompatible-pointer-types)
 KBUILD_ARFLAGS := $(call ar-option,D)
 
 # check for 'asm goto'
-ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
+ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) 
$(KBUILD_CFLAGS)), y)
KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
-- 
2.10.2



Re: [RFC 0/5] rcu: Introduce leaf_node_for_each_mask_possible_cpu() and its friend

2016-12-09 Thread Boqun Feng
On Fri, Dec 09, 2016 at 03:49:45PM -0800, Paul E. McKenney wrote:
> On Fri, Dec 09, 2016 at 04:48:22PM +0800, Boqun Feng wrote:
> > Hi Paul,
> > 
> > While reading the discussion at:
> > 
> > https://marc.info/?l=linux-kernel&m=148044253400769
> 
> This discussion was for stalls specifically, rather than for routine
> scans of the bitmasks.
> 
> But it does look to save some code, so worth looking into.
> 
> > I figured we might use this fact to save some extra checks in RCU core code,
> > currently we iterate over all the possible CPUs on a leaf node, check 
> > whether
> > they were masked in a certain mask and do something. However, given the fact
> > that the masks on a leaf node should always be sparse than the corresponding
> > part of cpu_possible_mask, we'd better iterate over all bits in a mask and
> > check whether the corresponding CPU is possible or not.
> > 
> > So I made this RFC, I did a simple build/boot/rcutorture test on my box with
> > SMP=4, nothing bad happens. Currently I'm waiting for the 0day and trying to
> > test this one a bigger system, in the meanwhile, looking forwards to any
> > comment and suggestion.
> > 
> > So thoughts?
> 
> By analogy with for_each_cpu() and for_each_possible_cpu(), the name
> should instead be for_each_leaf_node_cpu(), the tradition of excessively
> long names in RCU notwithstanding.  ;-)
> 

Make sense ;-)

I think it's more appropriate to call it for_each_leaf_node_mask_cpu(),
because we don't iterate all cpus of a leaf node. The word "possible"
could be dropped because obviously we won't iterate over "impossible"
cpus in a leaf node ;-)

Will modify that in next version.

Regards,
Boqun

>   Thanx, Paul
> 


signature.asc
Description: PGP signature


Re: [PATCH 01/22] m68k/atari: Modernize printing of kernel messages

2016-12-09 Thread Michael Schmitz
Hi Geert,

Am 09.12.2016 um 01:22 schrieb Geert Uytterhoeven:
> On Wed, Dec 7, 2016 at 11:36 PM, Finn Thain  
> wrote:
>> On Wed, 7 Dec 2016, Geert Uytterhoeven wrote:
>>>   - Convert from printk() to pr_*(),
>>>   - Add missing continuations, to fix user-visible breakage,
>>>   - Drop useless WARNING prefix,
>>>   - Move trailing spaces to start of continuations.
>>>
>>> Fixes: 4bcc595ccd80decb ("printk: reinstate KERN_CONT for printing 
>>> continuation lines")
>>> Signed-off-by: Geert Uytterhoeven 
>>> ---
>>>  arch/m68k/atari/atakeyb.c | 14 ++--
>>>  arch/m68k/atari/config.c  | 56 
>>> +++
>>>  2 files changed, 35 insertions(+), 35 deletions(-)
>>>
>>> diff --git a/arch/m68k/atari/atakeyb.c b/arch/m68k/atari/atakeyb.c
>>> index 264db11268039329..37091898adb3d3b5 100644
>>> --- a/arch/m68k/atari/atakeyb.c
>>> +++ b/arch/m68k/atari/atakeyb.c
>>> @@ -149,7 +149,7 @@ static irqreturn_t atari_keyboard_interrupt(int irq, 
>>> void *dummy)
>>>   if (acia_stat & ACIA_OVRN) {
>>>   /* a very fast typist or a slow system, give a warning */
>>>   /* ...happens often if interrupts were disabled for too long 
>>> */
>>> - printk(KERN_DEBUG "Keyboard overrun\n");
>>> + pr_debug("Keyboard overrun\n");
>>>   scancode = acia.key_data;
>>>   if (ikbd_self_test)
>>>   /* During self test, don't do resyncing, just process 
>>> the code */
>>
>> This is not equivalent (unless there is a DEBUG macro definition hinding
>> in a header file somewhere). Since the changelog doesn't mention
>> suppressing any output, perhaps you were deceived by the questionable API,
>> as I have been in the past (see 16b9d870a0 and d61c5427f6).
> 
> This is an actual message people want to see in the kernel log, even

No, it's not something people want to see - clutters up the screen, and
causes even more interrupt hogging disk IO from syslogd so exacerbates
the problem on slow systems.

But Finn is right in that output is now suppressed instead of given a
particular log level. IMO stating that the message will now only be
generated when the kernel has been compiled for debugging would be
perfectly fine.

Cheers,

Michael


> when not debugging?
> 
> Gr{oetje,eeting}s,
> 
> Geert
> 
> --
> Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- 
> ge...@linux-m68k.org
> 
> In personal conversations with technical people, I call myself a hacker. But
> when I'm talking to journalists I just say "programmer" or something like 
> that.
> -- Linus Torvalds
> --
> To unsubscribe from this list: send the line "unsubscribe linux-m68k" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


Re: [PATCH] PCI: pciehp: Optimize PCIe root resume time

2016-12-09 Thread Bjorn Helgaas
[+cc Yinghai, author of 2f5d8e4ff947]

On Fri, Dec 09, 2016 at 02:43:26PM -0800, Vaibhav Shankar wrote:
> On Apollolake platforms, PCIe rootport takes a long time to resume
> from S3. With 100ms delay before read pci conf, rootport takes
> ~200ms during resume.
> 
> commit 2f5d8e4ff947 ("PCI: pciehp: replace unconditional sleep with
> config space access check") is the one that added the 100ms delay
> before reading pci conf.
> 
> This patch removes the 100ms delay.By removing the delay, the
> PCIe root port takes ~16ms during resume. As per PCIe spec, we
> only require 1000ms delay. This delay is provide by
> pci_bus_check_dev() function.
> 
> With 100ms delay:
> [  155.102713] calling  :00:14.0+ @ 70, parent: pci:00, cb: 
> pci_pm_resume_noirq
> [  155.119337] call :00:14.0+ returned 0 after 16231 usecs
> [  155.119467] calling  :01:00.0+ @ 5845, parent: :00:14.0, cb: 
> pci_pm_resume_noirq
> [  155.321670] call :00:14.0+ returned 0 after 185327 usecs
> [  155.321743] calling  :01:00.0+ @ 5849, parent: :00:14.0, cb: 
> pci_pm_resume
> 
> After removing 100ms delay:
> [   36.624709] calling  :00:14.0+ @ 4434, parent: pci:00, cb: 
> pci_pm_resume_noirq
> [   36.641367] call :00:14.0+ returned 0 after 16263 usecs
> [   36.652458] calling  :00:14.0+ @ 4443, parent: pci:00, cb: 
> pci_pm_resume
> [   36.652673] call :00:14.0+ returned 0 after 208 usecs
> [   36.652863] calling  :01:00.0+ @ 4442, parent: :00:14.0, cb: 
> pci_pm_resume
> 
> Signed-off-by: Vaibhav Shankar 
> ---
>  drivers/pci/hotplug/pciehp_hpc.c |2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/drivers/pci/hotplug/pciehp_hpc.c 
> b/drivers/pci/hotplug/pciehp_hpc.c
> index 5c24e93..08357e7 100644
> --- a/drivers/pci/hotplug/pciehp_hpc.c
> +++ b/drivers/pci/hotplug/pciehp_hpc.c
> @@ -311,8 +311,6 @@ int pciehp_check_link_status(struct controller *ctrl)
>   else
>   msleep(1000);
>  
> - /* wait 100ms before read pci conf, and try in 1s */
> - msleep(100);
>   found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
>   PCI_DEVFN(0, 0));
>  
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Should xhci_irq() call usb_hc_died()?

2016-12-09 Thread Bjorn Helgaas
Hi Mathias,

ehci_irq(), ohci_irq(), fotg210_irq(), and oxu210_hcd_irq() contain code
equivalent to this:

  status = ehci_readl(...);
  if (status == ~(u32) 0) {
...
usb_hc_died(hcd);
...
return IRQ_HANDLED;
  }

xhci_irq() has a similar check, but does not call usb_hc_died():

  status = readl(...);
  if (status = 0x) {
...
return IRQ_HANDLED;
  }

Should xhci_irq() also call usb_hc_died()?  Maybe there's some reason
for it to be different than the others, but it wasn't obvious to this
casual observer :)

Bjorn


[PATCH] Add +~800M crashkernel explaination

2016-12-09 Thread Robert LeBlanc
When trying to configure crashkernel greater than about 800 MB, the
kernel fails to allocate memory on x86 and x86_64. This is due to an
undocumented limit that the crashkernel and other low memory items must
be allocated below 896 MB unless the ",high" option is given. This
updates the documentation to explain this and what I understand the
limitations to be on the option.

Signed-off-by: Robert LeBlanc 
---
 Documentation/kdump/kdump.txt | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index b0eb27b..aa3efa8 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -256,7 +256,9 @@ While the "crashkernel=size[@offset]" syntax is sufficient 
for most
 configurations, sometimes it's handy to have the reserved memory dependent
 on the value of System RAM -- that's mostly for distributors that pre-setup
 the kernel command line to avoid a unbootable system after some memory has
-been removed from the machine.
+been removed from the machine. If you need to allocate more than ~800M
+for x86 or x86_64 then you must use the simple format as the format
+',high' conflicts with the separators of ranges.
 
 The syntax is:
 
@@ -282,11 +284,21 @@ Boot into System Kernel
 1) Update the boot loader (such as grub, yaboot, or lilo) configuration
files as necessary.
 
-2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
+2) Boot the system kernel with the boot parameter "crashkernel=Y[@X | ,high]",
where Y specifies how much memory to reserve for the dump-capture kernel
-   and X specifies the beginning of this reserved memory. For example,
-   "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
-   starting at physical address 0x0100 (16MB) for the dump-capture kernel.
+   and X specifies the beginning of this reserved memory or ',high' to load in
+   high memory. For example, "crashkernel=64M@16M" tells the system
+   kernel to reserve 64 MB of memory starting at physical address
+   0x0100 (16MB) for the dump-capture kernel.
+
+   Specifying "crashkernel=1G,high" tells the system kernel to reserve 1 GB
+   of memory using high memory for the dump-capture kernel, there may also
+   be some low memory allocated as well. If you need more than ~800M for
+   the crash kernel to operate (volumes on FC/iSCSI, large volumes, systemd
+   added to the previous, etc), you need to specify ',high' since without
+   it crashkerenel has to try and fit under 896M along with some other
+   items and will fail to allocate memory. High memory may only be relevant
+   on x86 and x86_64.
 
On x86 and x86_64, use "crashkernel=64M@16M".
 
-- 
2.10.2



Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers

2016-12-09 Thread Stephen Hemminger
On Fri, 9 Dec 2016 22:35:05 +
Haiyang Zhang  wrote:

> > > >
> > > > Emulated NIC is already excluded in start of netvc notifier handler.
> > > >
> > > > static int netvsc_netdev_event(struct notifier_block *this,
> > > >unsigned long event, void *ptr)
> > > > {
> > > > struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
> > > >
> > > > /* Skip our own events */
> > > > if (event_dev->netdev_ops == &device_ops)
> > > > return NOTIFY_DONE;
> > > >  
> > >
> > > Emulated device is not based on netvsc. It's the native Linux  
> > (dec100M?)  
> > > Driver. So this line doesn't exclude it. And how about other NIC type
> > > may be added in the future?  
> > 
> > Sorry, forgot about that haven't used emulated device in years.
> > The emulated device should appear to be on a PCI bus, but the serial
> > would not match??  
> 
> It's not a vmbus device, not a hv_pci device either. Hv_PCI is a subset
> of vmbus devices. So emulated NIC won't have hv_pci serial number.
> 
> In my patch, the following code ensure, we only try to get serial number
> after confirming it's vmbus and hv_pci device:
> 
> +   if (!dev_is_vmbus(dev))
> +   continue;
> +
> +   hdev = device_to_hv_device(dev);
> +   if (hdev->device_id != HV_PCIE)
> +   continue;

Ok, the walk back up the device tree is logically ok, but I don't
know enough about PCI device tree to be assured that it is safe.
Also, you could short circuit away most of the unwanted devices
by making sure the vf_netdev->dev.parent is a PCI device.

Also the loop to look for serial number in the devices on the
hv_pci bus could be made a separate function and have a short circuit
return (although it probably doesn't matter since there will only
be on e PCI VF device per bus there).


[PATCH] driver core: flush async calls before testing driver removal

2016-12-09 Thread Vladimir Zapolskiy
If CONFIG_DEBUG_TEST_DRIVER_REMOVE option is enabled a number of false
positives are reported for ATA controller drivers, because ATA port
probes are done asynchronously, and the same problem may also touch
other asynchronously probed drivers.

To reduce the rate of false reports on boot call async_synchronize_full()
before attempting to remove a driver, the same is done in delete_module()
syscall for all possible drivers and in __device_release_driver() function
for asynchronously probed drivers.

Fixes: bea5b158ff0d ("driver core: add test of driver remove calls during 
probe")
Suggested-by: Tejun Heo 
Signed-off-by: Vladimir Zapolskiy 
---
Some time ago the issue was discussed on the linux-ide mailing list, see

  https://www.spinics.net/lists/linux-ide/msg53481.html

 drivers/base/dd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index d76cd97..a4feecf 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -384,6 +384,8 @@ static int really_probe(struct device *dev, struct 
device_driver *drv)
if (test_remove) {
test_remove = false;
 
+   async_synchronize_full();
+
if (dev->bus->remove)
dev->bus->remove(dev);
else if (drv->remove)
-- 
2.10.2



[GIT PULL] drm-vc4-next-2016-12-09

2016-12-09 Thread Eric Anholt
I just got the ack on the DT bindings for VEC, so I'd like to get it
pulled if we can.  If it's too late for 4.10, I'm fine waiting.

Adding VEC support should have a low chance of regressions because it
doesn't get a mode configured by default.  This is due to it reporting
unknown connector state (the HW doesn't have hotplug detect).

I'd say the only dubious change here is the DRM_MODE_SUBCONNECTOR_xx
enum.  I would have left it for danvet to pull, but the TV connector
states patch ends up depending on it and he suggested I pull that one.


The following changes since commit c778cc5df944291dcdb1ca7a6bb781fbc22550c5:

  drm/vc4: Add fragment shader threading support (2016-11-16 13:25:26 -0800)

are available in the git repository at:

  https://github.com/anholt/linux tags/drm-vc4-next-2016-12-09

for you to fetch changes up to c167df443b4a8d97d25a8e69bd9f490a1e3fe646:

  drm/vc4: Don't use drm_put_dev (2016-12-09 15:28:42 -0800)


This pull request brings in VEC (TV-out) support for vc4, along with a
pageflipping race fix.


Boris Brezillon (5):
  drm/vc4: Fix ->clock_select setting for the VEC encoder
  drm: Turn DRM_MODE_SUBCONNECTOR_xx definitions into an enum
  drm: Add TV connector states to drm_connector_state
  drm/vc4: Add support for the VEC (Video Encoder) IP
  drm/vc4: Document VEC DT binding

Daniel Vetter (1):
  drm/vc4: Don't use drm_put_dev

Derek Foreman (1):
  drm/vc4: Fix race between page flip completion event and clean-up

 .../devicetree/bindings/display/brcm,bcm-vc4.txt   |  14 +
 drivers/gpu/drm/drm_atomic.c   |  50 ++
 drivers/gpu/drm/vc4/Makefile   |   1 +
 drivers/gpu/drm/vc4/vc4_crtc.c |  46 +-
 drivers/gpu/drm/vc4/vc4_debugfs.c  |   1 +
 drivers/gpu/drm/vc4/vc4_drv.c  |   5 +-
 drivers/gpu/drm/vc4/vc4_drv.h  |   7 +
 drivers/gpu/drm/vc4/vc4_kms.c  |  33 +-
 drivers/gpu/drm/vc4/vc4_regs.h |   3 +-
 drivers/gpu/drm/vc4/vc4_vec.c  | 657 +
 include/drm/drm_connector.h|  32 +
 include/uapi/drm/drm_mode.h|  18 +-
 12 files changed, 834 insertions(+), 33 deletions(-)
 create mode 100644 drivers/gpu/drm/vc4/vc4_vec.c


Re: [PATCH] perf annotate: check that objdump correctly works

2016-12-09 Thread Alexis Berlemont
Arnaldo Carvalho de Melo wrote:
> Em Thu, Dec 01, 2016 at 01:04:36AM +0100, Alexis Berlemont escreveu:
> > Before disassembling, the tool objdump is called just to be sure:
> > * objdump is available in the path;
> > * objdump is an executable binary;
> > * objdump has no dependency issue or anything else.
> > 
> > This objdump "pre-"command is only necessary because the real objdump
> > command is followed by some " | grep ..."; this prevents the shell
> > from returning the exit code of objdump execution.
> > 
> > Signed-off-by: Alexis Berlemont 
> > ---
> >  tools/perf/util/annotate.c | 79 
> > +-
> >  tools/perf/util/annotate.h |  3 ++
> >  2 files changed, 81 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> > index 3e34ee0..9d6c3a0 100644
> > --- a/tools/perf/util/annotate.c
>   
> > +static int annotate__check_objdump(void)
> > +{
> > +   char command[PATH_MAX * 2];
> > +   int wstatus, err;
> > +   pid_t pid;
> > +
> > +   snprintf(command, sizeof(command),
> > +   "%s -v > /dev/null 2>&1",
> > +   objdump_path ? objdump_path : "objdump");
> > +
> > +   pid = fork();
> > +   if (pid < 0) {
> > +   pr_err("Failure forking to run %s\n", command);
> > +   return -1;
> > +   }
> > +
> > +   if (pid == 0) {
> > +   execl("/bin/sh", "sh", "-c", command, NULL);
> > +   exit(-1);
> > +   }
> > +
> > +   err = waitpid(pid, &wstatus, 0);
> > +   if (err < 0) {
> > +   pr_err("Failure calling waitpid: %s: (%s)\n",
> > +   strerror(errno), command);
> > +   return -1;
> > +   }
> > +
> > +   pr_err("%s: %d %d\n", command, pid, WEXITSTATUS(wstatus));
> 
> So this will appear in all cases, no need for that, i.e. in the success
> case we don't need to get that flashing on the screen, on the last line.
>

Many thanks for your answer and your time.

Sorry for the late anwser and such an obvious error.

> > +   switch (WEXITSTATUS(wstatus)) {
> > +   case 0:
> > +   /* Success */
> > +   err = 0;
> > +   break;
> 
> So probably you want to return 0;  here instead and then at the error
> case, i.e. when you set err to !0 you do that pr_err() call above, but I
> think it would be better to use pr_debug(), the warning on the popup box
> is what by default is more polished to tell the user, the details are
> for developers or people wanting to dig in.
> 
> But while doing this I thought that you could instead call this only
> after objdump fails, i.e. if all is right, no need for checking what
> went wrong.
> 
> I.e. you would do the grep step separately, after checking objdump's
> error.
> 
> If you think that is too much work, then please just do the
> pr_err->pr_debug conversion, which would remove the flashing for the
> success case.

I will do the grep separately; no problem.

Alexis.

> 
> I tested it, btw, using:
> 
>   perf annotate --objdump /dev/null page_fault
> 
> Which produced a better output than what we have now (nothing):
> 
>
> ??Error:
>???Couldn't annotate page_fault: ???
>???The objdump tool found in $PATH cannot be executed???
>???  ???
>???  ???
>???Press any key...  ???
>
> 
> 
> 
> 
> /dev/null -v > /dev/null 2>&1: 10336 126
> 
> 
> ---
> 
> summary: make that last line appear only when -v is used (pr_debug) and
> consider covering the case where --objdump was used, where talking about $PATH
> is misleading.
> 
> 
> > +   case 127:
> > +   /* The shell did not find objdump in the path */
> > +   err = SYMBOL_ANNOTATE_ERRNO__NO_OBJDUMP;
> > +   break;
> > +   default:
> > +   /*
> > +* In the default case, we consider that objdump
> > +* cannot be executed; so it gathers many fault
> > +* scenarii:
> > +* - objdump is not an executable (126);
> > +* - objdump has some dependency issue;
> > +* - ...
> > +*/
> > +   err = SYMBOL_ANNOTATE_ERRNO__NO_EXEC_OBJDUMP;
> > +   break;
> > +   }
> > +
> > +   return err;
> > +}
> > +
> >  static const char *annotate__norm_arch(const char *arch_name)
> >  {
> > struct utsname uts;
> > @@ -1351,6 +1424,10 @@ int symbol__disassemble(struct symbol *sym, struct 
> > map *map, const char *arch_na
> > if (err)
> > return err;
> >  
> > +   err = annotate__check_objdump();
> > +   if (err)
> > +   return err;
> > +
> > arch_nam

[PATCH] ACPI / CPPC: Fix per-CPU pointers management

2016-12-09 Thread Rafael J. Wysocki
From: Rafael J. Wysocki 

Enabling ACPI CPPC on x86 causes a NULL pointer dereference to occur
(on boot on a "default" KVM setup) in acpi_cppc_processor_exit() due
to a missing check against NULL in there:

|BUG: unable to handle kernel NULL pointer dereference at   (null)
|IP: [] acpi_cppc_processor_exit+0x40/0x60
|PGD 0 [0.577616]
|Oops:  [#1] SMP
|Modules linked in:
|CPU: 3 PID: 1 Comm: swapper/0 Not tainted 4.9.0-rc6-00146-g17669006adf6 #51
|task: 88003f878000 task.stack: c9008000
|RIP: 0010:[]  [] 
acpi_cppc_processor_exit+0x40/0x60
|RSP: :c900bd48  EFLAGS: 00010296
|RAX: 000137e0 RBX:  RCX: 0001
|RDX: 88003fc0 RSI:  RDI: 88003fbca130
|RBP: c900bd60 R08: 0514 R09: 
|R10: 0001 R11:  R12: 0002
|R13: 0020 R14: 8167cb00 R15: 
|FS:  () GS:88003fcc() knlGS:
|CS:  0010 DS:  ES:  CR0: 80050033
|CR2:  CR3: 01618000 CR4: 000406e0
|Stack:
| 88003f939848 88003fbca130 0001 c900bd80
| 812a4ccb 88003fc0cee8  c900bdb8
| 812dc20d 88003fc0cee8 8167cb00 88003fc0cf48
|Call Trace:
| [] acpi_processor_stop+0xb2/0xc5
| [] driver_probe_device+0x14d/0x2f0
| [] __driver_attach+0x6e/0x90
| [] bus_for_each_dev+0x54/0x90
| [] driver_attach+0x19/0x20
| [] bus_add_driver+0xe6/0x200
| [] driver_register+0x83/0xc0
| [] acpi_processor_driver_init+0x20/0x94
| [] do_one_initcall+0x97/0x180
| [] kernel_init_freeable+0x112/0x1a6
| [] kernel_init+0x9/0xf0
| [] ret_from_fork+0x25/0x30
|Code: 02 00 00 00 48 8b 14 d5 e0 c3 55 81 48 8b 1c 02 4c 8d 6b 20 eb 15 49 8b 
7d 00 48 85 ff 74 05 e8 39 8c d9 ff 41 ff c4 49 83 c5 20 <44> 3b 23 72 e6 48 8d 
bb a0 02 00 00 e8 b1 6f f9 ff 48 89 df e8
|RIP  [] acpi_cppc_processor_exit+0x40/0x60
| RSP 
|CR2: 

Fix that and while at it, fix a possible use-after-free scenario in
acpi_cppc_processor_probe() that can happen if the function returns
without cleaning up the per-CPU pointer set by it previously.

Reported-by: Sebastian Andrzej Siewior 
Original-by: Sebastian Andrzej Siewior 
Signed-off-by: Rafael J. Wysocki 
---

Hi Thomas,

The crash fixed by this is exposed by the ITMT (asymmetric packing) series
(which involves using ACPI CPPC on x86), so IMO it would be good to route it
through tip along with that series.

Thanks,
Rafael

---
 drivers/acpi/cppc_acpi.c |   12 
 1 file changed, 8 insertions(+), 4 deletions(-)

Index: linux-pm/drivers/acpi/cppc_acpi.c
===
--- linux-pm.orig/drivers/acpi/cppc_acpi.c
+++ linux-pm/drivers/acpi/cppc_acpi.c
@@ -776,9 +776,6 @@ int acpi_cppc_processor_probe(struct acp
init_waitqueue_head(&pcc_data.pcc_write_wait_q);
}
 
-   /* Plug PSD data into this CPUs CPC descriptor. */
-   per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
-
/* Everything looks okay */
pr_debug("Parsed CPC struct for CPU: %d\n", pr->id);
 
@@ -789,10 +786,15 @@ int acpi_cppc_processor_probe(struct acp
goto out_free;
}
 
+   /* Plug PSD data into this CPUs CPC descriptor. */
+   per_cpu(cpc_desc_ptr, pr->id) = cpc_ptr;
+
ret = kobject_init_and_add(&cpc_ptr->kobj, &cppc_ktype, &cpu_dev->kobj,
"acpi_cppc");
-   if (ret)
+   if (ret) {
+   per_cpu(cpc_desc_ptr, pr->id) = NULL;
goto out_free;
+   }
 
kfree(output.pointer);
return 0;
@@ -826,6 +828,8 @@ void acpi_cppc_processor_exit(struct acp
void __iomem *addr;
 
cpc_ptr = per_cpu(cpc_desc_ptr, pr->id);
+   if (!cpc_ptr)
+   return;
 
/* Free all the mapped sys mem areas for this CPU */
for (i = 2; i < cpc_ptr->num_entries; i++) {



Re: [PATCH 1/2] of: base: add support to get machine model name

2016-12-09 Thread Frank Rowand
On 12/09/16 08:03, Rob Herring wrote:
> On Wed, Nov 23, 2016 at 4:25 AM, Sudeep Holla  wrote:
>>
>>
>> On 22/11/16 21:35, Rob Herring wrote:
>>>
>>> On Tue, Nov 22, 2016 at 12:44 PM, Frank Rowand 
>>> wrote:
>>
>>
>> [...]
>>

 This patch adds a function that leads to conflating the "model" property
 and the "compatible" property. This leads to opaque, confusing and
 unclear
 code where ever it is used.   I think it is not good for the device tree
 framework to contribute to writing unclear code.

 Further, only two of the proposed users of this new function appear to
 be proper usage.  I do not think that the small amount of reduced lines
 of code is a good trade off for the reduced code clarity and for the
 potential for future mis-use of this function.

 Can I convince you to revert this patch?
>>>
>>>
>>> Yes, I will revert.
> 
> I looked at this again and the users. They are all informational, so

A comment in the function docbook header stating that the intent of the
returned value is for informational use only would make me happy.

There is at least on proposed use in patch 2/2 that is not just
informational.  init_octeon_system_type() sometimes uses the value of
the model property to create the value of variable octeon_system_type.
octeon_pcie_pcibios_map_irq() checks the value of octeon_system_type
(via the function octeon_board_type_string()) to determine whether
to apply a fixup:

int __init octeon_pcie_pcibios_map_irq(const struct pci_dev *dev,
   u8 slot, u8 pin)
{
/*
 * The EBH5600 board with the PCI to PCIe bridge mistakenly
 * wires the first slot for both device id 2 and interrupt
 * A. According to the PCI spec, device id 2 should be C. The
 * following kludge attempts to fix this.
 */
if (strstr(octeon_board_type_string(), "EBH5600") &&
dev->bus && dev->bus->parent) {


> I'm not worried if a compatible string could be returned with this
> change. The function returns the best name for the machine and having
> consistency is a good thing.

> 
> I was considering not reverting (as I'd not yet gotten around to it),
> but I'm still going to revert for the naming.
> 
>>>
 If not, will you accept a patch to change the function name to more
 clearly indicate what it does?  (One possible name would be
 of_model_or_1st_compatible().)
>>>
>>>
>>> I took it as there's already the FDT equivalent function.
>>
>>
>> Yes it was mainly for non of_flat_* replacement for
>> of_flat_dt_get_machine_name
> 
> I would suggest just of_get_machine_name().
> 
> You might also add a fallback to return "unknown", and drop some of
> the custom strings. I don't think anyone should care about the actual
> string. However, it's an error to have a DT with no model or top level
> compatible, so maybe a WARN.

The name and other suggestions sound fine to me.

-Frank

> 
> Rob
> 



[PATCH] Input: i8042-x86ia64io.h - Comment else/endif of CONFIG_PNP

2016-12-09 Thread Marcos Paulo de Souza
As this define check if huge, this makes easier to read the code.

Signed-off-by: Marcos Paulo de Souza 
---
 While reviewing patches from Dmitry about presence of 8042, it makes it
 much easier to understand the ifdefs...

 drivers/input/serio/i8042-x86ia64io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/input/serio/i8042-x86ia64io.h 
b/drivers/input/serio/i8042-x86ia64io.h
index 073246c..ddd3132 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -1131,10 +1131,10 @@ static int __init i8042_pnp_init(void)
return 0;
 }
 
-#else
+#else  /* !CONFIG_PNP */
 static inline int i8042_pnp_init(void) { return 0; }
 static inline void i8042_pnp_exit(void) { }
-#endif
+#endif /* CONFIG_PNP */
 
 static int __init i8042_platform_init(void)
 {
-- 
2.9.3



Re: [PATCH 0/4] x86: Trust firmware a bit more about presence of 8042

2016-12-09 Thread Marcos Paulo de Souza
Hi Dmitry,

On Fri, Dec 09, 2016 at 12:57:37PM -0800, Dmitry Torokhov wrote:
> Hi,
> 
> Historically we did not trust PNP data regarding keyboard controllers on
> X86, but more and more boards get upset with us if they try to tell us that
> there is no keyboard controller and we still go and try to poke at where we
> think it might be. To work around this issue let's have a bit more faith in
> BIOS data, and if [lack] of PNP devices for mouse and keyboard matches whet
> firmware (basically ACPI FADT) tells us, let's abort i8042 probe.
> 
> We add a new flag (enum) to x86_platform.legacy structure so we can
> distinguish between cases where platform/subarch never has 8042 (such as
> MID platform) and cases where firmware says that it is not there, so that
> i8042 driver can either abort immediately or go and check for presence of
> PNP devices. We also remove x86_platform.i8042_detect() as it is no longer
> used (platforms can set value of x86_platform.legacy.i8042 as needed in
> quirks).
> 
> If you are OK with arch/x86 changes please apply together with the input
> part.

Looks very good, better than my initial idea of adding more code into 
i8042_detect.

Acked-by: Marcos Paulo de Souza 

> 
> Thanks,
> Dmitry
> 
> Dmitry Torokhov (4):
>   x86/init: add i8042 state to the platform data
>   Input: i8042 - trust firmware a bit more when probing on X86
>   x86/init: remove i8042_detect() form platform ops
>   x86/init: fix a couple typos in arch/x86/include/asm/x86_init.h
> 
>  arch/x86/include/asm/x86_init.h | 26 +-
>  arch/x86/kernel/acpi/boot.c |  7 +++
>  arch/x86/kernel/platform-quirks.c   |  5 +
>  arch/x86/kernel/x86_init.c  |  2 --
>  arch/x86/platform/ce4100/ce4100.c   |  6 --
>  arch/x86/platform/intel-mid/intel-mid.c |  7 ---
>  drivers/input/serio/i8042-x86ia64io.h   | 10 +++---
>  7 files changed, 40 insertions(+), 23 deletions(-)
> 
> -- 
> 2.8.0.rc3.226.g39d4020
> 


Re: [RFC 0/5] rcu: Introduce leaf_node_for_each_mask_possible_cpu() and its friend

2016-12-09 Thread Paul E. McKenney
On Fri, Dec 09, 2016 at 04:48:22PM +0800, Boqun Feng wrote:
> Hi Paul,
> 
> While reading the discussion at:
> 
> https://marc.info/?l=linux-kernel&m=148044253400769

This discussion was for stalls specifically, rather than for routine
scans of the bitmasks.

But it does look to save some code, so worth looking into.

> I figured we might use this fact to save some extra checks in RCU core code,
> currently we iterate over all the possible CPUs on a leaf node, check whether
> they were masked in a certain mask and do something. However, given the fact
> that the masks on a leaf node should always be sparse than the corresponding
> part of cpu_possible_mask, we'd better iterate over all bits in a mask and
> check whether the corresponding CPU is possible or not.
> 
> So I made this RFC, I did a simple build/boot/rcutorture test on my box with
> SMP=4, nothing bad happens. Currently I'm waiting for the 0day and trying to
> test this one a bigger system, in the meanwhile, looking forwards to any
> comment and suggestion.
> 
> So thoughts?

By analogy with for_each_cpu() and for_each_possible_cpu(), the name
should instead be for_each_leaf_node_cpu(), the tradition of excessively
long names in RCU notwithstanding.  ;-)

Thanx, Paul



[PATCH v3 4/4] ARM: treewide: Replace uses of virt_to_phys with __pa_symbol

2016-12-09 Thread Florian Fainelli
All low-level PM/SMP code using virt_to_phys() should actually use
__pa_symbol() against kernel symbols. Update code where relevant to move
away from virt_to_phys().

Signed-off-by: Florian Fainelli 
---
 arch/arm/common/mcpm_entry.c  | 12 ++--
 arch/arm/mach-alpine/platsmp.c|  2 +-
 arch/arm/mach-axxia/platsmp.c |  2 +-
 arch/arm/mach-bcm/bcm63xx_smp.c   |  2 +-
 arch/arm/mach-bcm/platsmp-brcmstb.c   |  2 +-
 arch/arm/mach-bcm/platsmp.c   |  4 ++--
 arch/arm/mach-berlin/platsmp.c|  2 +-
 arch/arm/mach-exynos/firmware.c   |  4 ++--
 arch/arm/mach-exynos/mcpm-exynos.c|  2 +-
 arch/arm/mach-exynos/platsmp.c|  4 ++--
 arch/arm/mach-exynos/pm.c |  6 +++---
 arch/arm/mach-exynos/suspend.c|  6 +++---
 arch/arm/mach-hisi/platmcpm.c |  2 +-
 arch/arm/mach-hisi/platsmp.c  |  6 +++---
 arch/arm/mach-imx/platsmp.c   |  2 +-
 arch/arm/mach-imx/pm-imx6.c   |  2 +-
 arch/arm/mach-imx/src.c   |  2 +-
 arch/arm/mach-mediatek/platsmp.c  |  2 +-
 arch/arm/mach-mvebu/pm.c  |  2 +-
 arch/arm/mach-mvebu/pmsu.c|  2 +-
 arch/arm/mach-mvebu/system-controller.c   |  2 +-
 arch/arm/mach-omap2/control.c |  8 
 arch/arm/mach-omap2/omap-mpuss-lowpower.c |  8 
 arch/arm/mach-omap2/omap-smp.c|  4 ++--
 arch/arm/mach-prima2/platsmp.c|  2 +-
 arch/arm/mach-prima2/pm.c |  2 +-
 arch/arm/mach-pxa/palmz72.c   |  2 +-
 arch/arm/mach-pxa/pxa25x.c|  2 +-
 arch/arm/mach-pxa/pxa27x.c|  2 +-
 arch/arm/mach-pxa/pxa3xx.c|  2 +-
 arch/arm/mach-realview/platsmp-dt.c   |  2 +-
 arch/arm/mach-rockchip/platsmp.c  |  4 ++--
 arch/arm/mach-rockchip/pm.c   |  2 +-
 arch/arm/mach-s3c24xx/mach-jive.c |  2 +-
 arch/arm/mach-s3c24xx/pm-s3c2410.c|  2 +-
 arch/arm/mach-s3c24xx/pm-s3c2416.c|  2 +-
 arch/arm/mach-s3c64xx/pm.c|  2 +-
 arch/arm/mach-s5pv210/pm.c|  2 +-
 arch/arm/mach-sa1100/pm.c |  2 +-
 arch/arm/mach-shmobile/platsmp-apmu.c |  6 +++---
 arch/arm/mach-shmobile/platsmp-scu.c  |  4 ++--
 arch/arm/mach-socfpga/platsmp.c   |  4 ++--
 arch/arm/mach-spear/platsmp.c |  2 +-
 arch/arm/mach-sti/platsmp.c   |  2 +-
 arch/arm/mach-sunxi/platsmp.c |  4 ++--
 arch/arm/mach-tango/platsmp.c |  2 +-
 arch/arm/mach-tango/pm.c  |  2 +-
 arch/arm/mach-tegra/reset.c   |  4 ++--
 arch/arm/mach-ux500/platsmp.c |  2 +-
 arch/arm/mach-vexpress/dcscb.c|  2 +-
 arch/arm/mach-vexpress/platsmp.c  |  2 +-
 arch/arm/mach-vexpress/tc2_pm.c   |  4 ++--
 arch/arm/mach-zx/platsmp.c|  4 ++--
 arch/arm/mach-zynq/platsmp.c  |  2 +-
 54 files changed, 84 insertions(+), 84 deletions(-)

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index a923524d1040..cf062472e07b 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -144,7 +144,7 @@ extern unsigned long 
mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
 
 void mcpm_set_entry_vector(unsigned cpu, unsigned cluster, void *ptr)
 {
-   unsigned long val = ptr ? virt_to_phys(ptr) : 0;
+   unsigned long val = ptr ? __pa_symbol(ptr) : 0;
mcpm_entry_vectors[cluster][cpu] = val;
sync_cache_w(&mcpm_entry_vectors[cluster][cpu]);
 }
@@ -299,8 +299,8 @@ void mcpm_cpu_power_down(void)
 * the kernel as if the power_up method just had deasserted reset
 * on the CPU.
 */
-   phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-   phys_reset(virt_to_phys(mcpm_entry_point));
+   phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
+   phys_reset(__pa_symbol(mcpm_entry_point));
 
/* should never get here */
BUG();
@@ -388,8 +388,8 @@ static int __init nocache_trampoline(unsigned long _arg)
__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
__mcpm_cpu_down(cpu, cluster);
 
-   phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
-   phys_reset(virt_to_phys(mcpm_entry_point));
+   phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
+   phys_reset(__pa_symbol(mcpm_entry_point));
BUG();
 }
 
@@ -449,7 +449,7 @@ int __init mcpm_sync_init(
sync_cache_w(&mcpm_sync);
 
if (power_up_setup) {
-   mcpm_power_up_setup_phys = virt_to_phys(power_up_setup);
+   mcpm_power_up_setup_phys = __pa_symbol(power_up_setup);
sync_cache_w(&mcpm_power_up_setup_phys);
}
 
diff --git a/arch/arm/mach-alpine/platsmp.c b/arch/arm/mach-alpine/platsmp.c
index dd77ea25e7ca..6dc6

[PATCH v3 3/4] ARM: Add support for CONFIG_DEBUG_VIRTUAL

2016-12-09 Thread Florian Fainelli
x86 has an option: CONFIG_DEBUG_VIRTUAL to do additional checks on
virt_to_phys calls. The goal is to catch users who are calling
virt_to_phys on non-linear addresses immediately. This includes caller
using __virt_to_phys() on image addresses instead of __pa_symbol(). This
is a generally useful debug feature to spot bad code (particulary in
drivers).

Signed-off-by: Florian Fainelli 
---
 arch/arm/Kconfig  |  1 +
 arch/arm/include/asm/memory.h | 16 --
 arch/arm/mm/Makefile  |  1 +
 arch/arm/mm/physaddr.c| 51 +++
 4 files changed, 67 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/mm/physaddr.c

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b5d529fdffab..5e66173c5787 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2,6 +2,7 @@ config ARM
bool
default y
select ARCH_CLOCKSOURCE_DATA
+   select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index bee7511c5098..d90300193adf 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -213,7 +213,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
: "r" (x), "I" (__PV_BITS_31_24)\
: "cc")
 
-static inline phys_addr_t __virt_to_phys(unsigned long x)
+static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
phys_addr_t t;
 
@@ -245,7 +245,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 #define PHYS_OFFSETPLAT_PHYS_OFFSET
 #define PHYS_PFN_OFFSET((unsigned long)(PHYS_OFFSET >> PAGE_SHIFT))
 
-static inline phys_addr_t __virt_to_phys(unsigned long x)
+static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
return (phys_addr_t)x - PAGE_OFFSET + PHYS_OFFSET;
 }
@@ -261,6 +261,16 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
unsigned long)(kaddr) - PAGE_OFFSET) >> PAGE_SHIFT) + \
 PHYS_PFN_OFFSET)
 
+#define __pa_symbol_nodebug(x) __virt_to_phys_nodebug((x))
+
+#ifdef CONFIG_DEBUG_VIRTUAL
+extern phys_addr_t __virt_to_phys(unsigned long x);
+extern phys_addr_t __phys_addr_symbol(unsigned long x);
+#else
+#define __virt_to_phys(x)  __virt_to_phys_nodebug(x)
+#define __phys_addr_symbol(x)  __pa_symbol_nodebug(x)
+#endif
+
 /*
  * These are *only* valid on the kernel direct mapped RAM memory.
  * Note: Drivers should NOT use these.  They are the wrong
@@ -283,9 +293,11 @@ static inline void *phys_to_virt(phys_addr_t x)
  * Drivers should NOT use these either.
  */
 #define __pa(x)__virt_to_phys((unsigned long)(x))
+#define __pa_symbol(x) __phys_addr_symbol(RELOC_HIDE((unsigned 
long)(x), 0))
 #define __va(x)((void 
*)__phys_to_virt((phys_addr_t)(x)))
 #define pfn_to_kaddr(pfn)  __va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
+
 extern long long arch_phys_to_idmap_offset;
 
 /*
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index e8698241ece9..b3dea80715b4 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -14,6 +14,7 @@ endif
 
 obj-$(CONFIG_ARM_PTDUMP)   += dump.o
 obj-$(CONFIG_MODULES)  += proc-syms.o
+obj-$(CONFIG_DEBUG_VIRTUAL)+= physaddr.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)   += alignment.o
 obj-$(CONFIG_HIGHMEM)  += highmem.o
diff --git a/arch/arm/mm/physaddr.c b/arch/arm/mm/physaddr.c
new file mode 100644
index ..0288760306ce
--- /dev/null
+++ b/arch/arm/mm/physaddr.c
@@ -0,0 +1,51 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+
+#include "mm.h"
+
+static inline bool __virt_addr_valid(unsigned long x)
+{
+   /* high_memory does not get immediately defined, and there
+* are early callers of __pa() against PAGE_OFFSET, just catch
+* these here, then do normal checks, with the exception of
+* MAX_DMA_ADDRESS.
+*/
+   if ((x >= PAGE_OFFSET && !high_memory) ||
+  (x >= PAGE_OFFSET &&
+   high_memory && x < (unsigned long)high_memory) ||
+   x == MAX_DMA_ADDRESS)
+   return true;
+
+   return false;
+}
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+   WARN(!__virt_addr_valid(x),
+"virt_to_phys used for non-linear address: %pK (%pS)\n",
+(void *)x,
+(void *)x);
+
+   return __virt_to_phys_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+   /* This is bounds checking against the kernel image only.
+* __pa_symbol should only be used on kernel symbol addresses.
+*/
+   VIRTUAL_BUG_ON(x < (unsigned long)KERNEL_START ||
+  x > (unsigned long)KERNEL_END);
+
+   return __pa_symbol_nodebug(x);

[PATCH v3 1/4] mtd: lart: Rename partition defines to be prefixed with PART_

2016-12-09 Thread Florian Fainelli
In preparation for defining KERNEL_START on ARM, rename KERNEL_START to
PART_KERNEL_START, and to be consistent, do this for all
partition-related constants.

Signed-off-by: Florian Fainelli 
---
 drivers/mtd/devices/lart.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c
index 82bd00af5cc3..268aae45b514 100644
--- a/drivers/mtd/devices/lart.c
+++ b/drivers/mtd/devices/lart.c
@@ -75,18 +75,18 @@ static char module_name[] = "lart";
 
 /* blob */
 #define NUM_BLOB_BLOCKSFLASH_NUMBLOCKS_16m_PARAM
-#define BLOB_START 0x
-#define BLOB_LEN   (NUM_BLOB_BLOCKS * 
FLASH_BLOCKSIZE_PARAM)
+#define PART_BLOB_START0x
+#define PART_BLOB_LEN  (NUM_BLOB_BLOCKS * FLASH_BLOCKSIZE_PARAM)
 
 /* kernel */
 #define NUM_KERNEL_BLOCKS  7
-#define KERNEL_START   (BLOB_START + BLOB_LEN)
-#define KERNEL_LEN (NUM_KERNEL_BLOCKS * 
FLASH_BLOCKSIZE_MAIN)
+#define PART_KERNEL_START  (PART_BLOB_START + PART_BLOB_LEN)
+#define PART_KERNEL_LEN(NUM_KERNEL_BLOCKS * 
FLASH_BLOCKSIZE_MAIN)
 
 /* initial ramdisk */
 #define NUM_INITRD_BLOCKS  24
-#define INITRD_START   (KERNEL_START + KERNEL_LEN)
-#define INITRD_LEN (NUM_INITRD_BLOCKS * 
FLASH_BLOCKSIZE_MAIN)
+#define PART_INITRD_START  (PART_KERNEL_START + PART_KERNEL_LEN)
+#define PART_INITRD_LEN(NUM_INITRD_BLOCKS * 
FLASH_BLOCKSIZE_MAIN)
 
 /*
  * See section 4.0 in "3 Volt Fast Boot Block Flash Memory" Intel Datasheet
@@ -587,20 +587,20 @@ static struct mtd_partition lart_partitions[] = {
/* blob */
{
.name   = "blob",
-   .offset = BLOB_START,
-   .size   = BLOB_LEN,
+   .offset = PART_BLOB_START,
+   .size   = PART_BLOB_LEN,
},
/* kernel */
{
.name   = "kernel",
-   .offset = KERNEL_START, /* MTDPART_OFS_APPEND */
-   .size   = KERNEL_LEN,
+   .offset = PART_KERNEL_START,/* MTDPART_OFS_APPEND */
+   .size   = PART_KERNEL_LEN,
},
/* initial ramdisk / file system */
{
.name   = "file system",
-   .offset = INITRD_START, /* MTDPART_OFS_APPEND */
-   .size   = INITRD_LEN,   /* MTDPART_SIZ_FULL */
+   .offset = PART_INITRD_START,/* MTDPART_OFS_APPEND */
+   .size   = PART_INITRD_LEN,  /* MTDPART_SIZ_FULL */
}
 };
 #define NUM_PARTITIONS ARRAY_SIZE(lart_partitions)
-- 
2.9.3



[PATCH v3 2/4] ARM: Define KERNEL_START and KERNEL_END

2016-12-09 Thread Florian Fainelli
In preparation for adding CONFIG_DEBUG_VIRTUAL support, define a set of
common constants: KERNEL_START and KERNEL_END which abstract
CONFIG_XIP_KERNEL vs. !CONFIG_XIP_KERNEL. Update the code where
relevant.

Signed-off-by: Florian Fainelli 
---
 arch/arm/include/asm/memory.h | 7 +++
 arch/arm/mm/init.c| 7 ++-
 arch/arm/mm/mmu.c | 6 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 76cbd9c674df..bee7511c5098 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -111,6 +111,13 @@
 
 #endif /* !CONFIG_MMU */
 
+#ifdef CONFIG_XIP_KERNEL
+#define KERNEL_START   _sdata
+#else
+#define KERNEL_START   _stext
+#endif
+#define KERNEL_END _end
+
 /*
  * We fix the TCM memories max 32 KiB ITCM resp DTCM at these
  * locations
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 370581aeb871..c87d0d5b65f2 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -230,11 +230,8 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, 
phys_addr_t align)
 void __init arm_memblock_init(const struct machine_desc *mdesc)
 {
/* Register the kernel text, kernel data and initrd with memblock. */
-#ifdef CONFIG_XIP_KERNEL
-   memblock_reserve(__pa(_sdata), _end - _sdata);
-#else
-   memblock_reserve(__pa(_stext), _end - _stext);
-#endif
+   memblock_reserve(__pa(KERNEL_START), _end - KERNEL_START);
+
 #ifdef CONFIG_BLK_DEV_INITRD
/* FDT scan will populate initrd_start */
if (initrd_start && !phys_initrd_size) {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4001dd15818d..f0fd1a2db036 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1437,11 +1437,7 @@ static void __init kmap_init(void)
 static void __init map_lowmem(void)
 {
struct memblock_region *reg;
-#ifdef CONFIG_XIP_KERNEL
-   phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE);
-#else
-   phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
-#endif
+   phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), 
SECTION_SIZE);
phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
/* Map all the lowmem memory banks. */
-- 
2.9.3



[PATCH v3 0/4] ARM: Add support for CONFIG_DEBUG_VIRTUAL

2016-12-09 Thread Florian Fainelli
This patch series builds on top of Laura's [PATCHv5 00/10] CONFIG_DEBUG_VIRTUAL
for arm64 to add support for CONFIG_DEBUG_VIRTUAL for ARM.

This was tested on a Brahma B15 platform (ARMv7 + HIGHMEM + LPAE).

Note that the treewide changes would involve a huge CC list, which
is why it has been purposely trimmed to just focusing on the DEBUG_VIRTUAL
aspect.

Changes in v3:

- fix build failures reported by Kbuild test robot

Changes in v2:

- Modified MTD LART driver not to create symbol conflicts with
  KERNEL_START
- Fixed patch that defines and uses KERNEL_START/END
- Fixed __pa_symbol()'s definition
- Inline __pa_symbol() check wihtin the VIRTUAL_BUG_ON statement
- Simplified check for virtual addresses
- Added a tree-wide patch changing SMP/PM implementations to use
  __pa_symbol(), build tested against multi_v{5,7}_defconfig

Thanks!

Florian Fainelli (4):
  mtd: lart: Rename partition defines to be prefixed with PART_
  ARM: Define KERNEL_START and KERNEL_END
  ARM: Add support for CONFIG_DEBUG_VIRTUAL
  ARM: treewide: Replace uses of virt_to_phys with __pa_symbol

 arch/arm/Kconfig  |   1 +
 arch/arm/boot/compressed/piggy.xzkern | Bin 0 -> 2998584 bytes
 arch/arm/common/mcpm_entry.c  |  12 +++
 arch/arm/include/asm/memory.h |  23 --
 arch/arm/mach-alpine/platsmp.c|   2 +-
 arch/arm/mach-axxia/platsmp.c |   2 +-
 arch/arm/mach-bcm/bcm63xx_smp.c   |   2 +-
 arch/arm/mach-bcm/platsmp-brcmstb.c   |   2 +-
 arch/arm/mach-bcm/platsmp.c   |   4 +--
 arch/arm/mach-berlin/platsmp.c|   2 +-
 arch/arm/mach-exynos/firmware.c   |   4 +--
 arch/arm/mach-exynos/mcpm-exynos.c|   2 +-
 arch/arm/mach-exynos/platsmp.c|   4 +--
 arch/arm/mach-exynos/pm.c |   6 ++--
 arch/arm/mach-exynos/suspend.c|   6 ++--
 arch/arm/mach-hisi/platmcpm.c |   2 +-
 arch/arm/mach-hisi/platsmp.c  |   6 ++--
 arch/arm/mach-imx/platsmp.c   |   2 +-
 arch/arm/mach-imx/pm-imx6.c   |   2 +-
 arch/arm/mach-imx/src.c   |   2 +-
 arch/arm/mach-mediatek/platsmp.c  |   2 +-
 arch/arm/mach-mvebu/pm.c  |   2 +-
 arch/arm/mach-mvebu/pmsu.c|   2 +-
 arch/arm/mach-mvebu/system-controller.c   |   2 +-
 arch/arm/mach-omap2/control.c |   8 ++---
 arch/arm/mach-omap2/omap-mpuss-lowpower.c |   8 ++---
 arch/arm/mach-omap2/omap-smp.c|   4 +--
 arch/arm/mach-prima2/platsmp.c|   2 +-
 arch/arm/mach-prima2/pm.c |   2 +-
 arch/arm/mach-pxa/palmz72.c   |   2 +-
 arch/arm/mach-pxa/pxa25x.c|   2 +-
 arch/arm/mach-pxa/pxa27x.c|   2 +-
 arch/arm/mach-pxa/pxa3xx.c|   2 +-
 arch/arm/mach-realview/platsmp-dt.c   |   2 +-
 arch/arm/mach-rockchip/platsmp.c  |   4 +--
 arch/arm/mach-rockchip/pm.c   |   2 +-
 arch/arm/mach-s3c24xx/mach-jive.c |   2 +-
 arch/arm/mach-s3c24xx/pm-s3c2410.c|   2 +-
 arch/arm/mach-s3c24xx/pm-s3c2416.c|   2 +-
 arch/arm/mach-s3c64xx/pm.c|   2 +-
 arch/arm/mach-s5pv210/pm.c|   2 +-
 arch/arm/mach-sa1100/pm.c |   2 +-
 arch/arm/mach-shmobile/platsmp-apmu.c |   6 ++--
 arch/arm/mach-shmobile/platsmp-scu.c  |   4 +--
 arch/arm/mach-socfpga/platsmp.c   |   4 +--
 arch/arm/mach-spear/platsmp.c |   2 +-
 arch/arm/mach-sti/platsmp.c   |   2 +-
 arch/arm/mach-sunxi/platsmp.c |   4 +--
 arch/arm/mach-tango/platsmp.c |   2 +-
 arch/arm/mach-tango/pm.c  |   2 +-
 arch/arm/mach-tegra/reset.c   |   4 +--
 arch/arm/mach-ux500/platsmp.c |   2 +-
 arch/arm/mach-vexpress/dcscb.c|   2 +-
 arch/arm/mach-vexpress/platsmp.c  |   2 +-
 arch/arm/mach-vexpress/tc2_pm.c   |   4 +--
 arch/arm/mach-zx/platsmp.c|   4 +--
 arch/arm/mach-zynq/platsmp.c  |   2 +-
 arch/arm/mm/Makefile  |   1 +
 arch/arm/mm/init.c|   7 ++--
 arch/arm/mm/mmu.c |   6 +---
 arch/arm/mm/physaddr.c|  51 ++
 drivers/mtd/devices/lart.c|  24 +++---
 62 files changed, 173 insertions(+), 108 deletions(-)
 create mode 100644 arch/arm/boot/compressed/piggy.xzkern
 create mode 100644 arch/arm/mm/physaddr.c

-- 
2.9.3



Re: [PATCH RFC] user-namespaced file capabilities - now with even more magic

2016-12-09 Thread Eric W. Biederman
ebied...@xmission.com (Eric W. Biederman) writes:

> "Serge E. Hallyn"  writes:
>
>> Quoting Eric W. Biederman (ebied...@xmission.com):
>>> "Serge E. Hallyn"  writes:
>>> 
>>> > On Thu, Dec 08, 2016 at 05:43:09PM +1300, Eric W. Biederman wrote:
>>> >> "Serge E. Hallyn"  writes:
>>> 
>>> >> Any chance of a singed-off-by?
>>> >
>>> > Yes, sorry, Stéphane had pointed out that I'd apparently forgotten to do
>>> > -s.  Do you want me to resend the whole shebang, or does
>>> >
>>> > Signed-off-by: Serge Hallyn 
>>> >
>>> > suffice?  (My previous iterations did have it fwiw so I don't think I 
>>> > could
>>> > legally disavow it now :)
>>> 
>>> I was really hoping to get this in this for 4.10, but I am seeing a couple
>>> of little things in my review.  Comments referring to a non-existent v4
>>> and a few other niggling little things so I am going to target this for
>>> the next kernel release so there is time review.  With a little luck I
>>> can place this patch in my for-next tree just after the merge window
>>> closes and 4.10-rc1 ships.
>>
>> Ok, thanks.  This is not something I'd want to rush :)
>
> Sure.  This is just something we get merged.

By which I meant to say this is something we need to get merged, and
hopefully before all of the developers forget what is going on.  Not
having this is clearly a pain point for people working with file
capabilities.

Eric



Re: [PATCH 2/6] net: ethernet: ti: cpts: add support for ext rftclk selection

2016-12-09 Thread Grygorii Strashko


On 12/08/2016 06:47 PM, Stephen Boyd wrote:
> On 12/06, Grygorii Strashko wrote:
>> Subject: [PATCH] cpts refclk sel
>>
>> Signed-off-by: Grygorii Strashko 
>> ---
>>  arch/arm/boot/dts/keystone-k2e-netcp.dtsi | 10 +-
>>  drivers/net/ethernet/ti/cpts.c| 52 
>> ++-
>>  2 files changed, 60 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi 
>> b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
>> index 919e655..b27aa22 100644
>> --- a/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
>> +++ b/arch/arm/boot/dts/keystone-k2e-netcp.dtsi
>> @@ -138,7 +138,7 @@ netcp: netcp@2400 {
>>  /* NetCP address range */
>>  ranges = <0 0x2400 0x100>;
>>
>> -clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
>> +clocks = <&clkpa>, <&clkcpgmac>, <&cpts_mux>;

^^ mux clock used here

>>  clock-names = "pa_clk", "ethss_clk", "cpts";
>>  dma-coherent;
>>
>> @@ -162,6 +162,14 @@ netcp: netcp@2400 {
>>  cpts-ext-ts-inputs = <6>;
>>  cpts-ts-comp-length;
>>
>> +cpts_mux: cpts_refclk_mux {
>> +#clock-cells = <0>;
>> +clocks = <&chipclk12>, <&chipclk13>;
>> +cpts-mux-tbl = <0>, <1>;
>> +assigned-clocks = <&cpts_mux>;
>> +assigned-clock-parents = <&chipclk12>;
> 
> Is there a binding update?
 
this was pure RFC-DEV patch just to check the possibility of modeling 
CPTS_RFTCLK_SEL register as mux clock. 
Original patch:
https://lkml.org/lkml/2016/11/28/780

I've plan to resend it using clk framework.

 Why the subnode? 

Sry, I did not get this question - is there another way to pas phandle on clock
in clocks list property? Am I missing smth.?

Sry, this is my first clock :)

> Why not have it as part of the netcp node?

cpts is part of gbe ethss, which is part of netcp.

Only netcp is modeled as DD - cpts and gbe ethss implemented without using DD 
model,
so generic resources acquired by netcp and then passed to cpts and gbe ethss.

CPTS has register to control an external multiplexer that selects
one of up to 32 clocks for time sync reference (RFTCLK)

> Does the cpts-mux-tbl property change?

On Keystone 2 66AK2e (as example) the following list of clocks can be selected 
as ref clocks (list is different for other SoCs):
 = SYSCLK2
0001 = SYSCLK3
0010 = TIMI0
0011 = TIMI1
0100 = TSIPCLKA
1000 = TSREFCLK
1100 = TSIPCLKB
Others = Reserved

and only 0 and 1 are internal, other external and board specific
(parameters unknown and corresponding inputs can be used for other purposes),
so I can't define all parent clocks, only internal:

clocks = <&chipclk12>, <&chipclk13>;
cpts-mux-tbl = <0>, <1>;

to use another, external, clock - it should be explicitly defined in board file 
the board file 

timi1clk: timi1clk {
#clock-cells = <0>;
compatible = "fixed-clock";
...

&cpts_mux {
clocks = <&chipclk12>, <&chipclk13>, ;
^^^ i can't predict value here
cpts-mux-tbl = <0>, <1>, <3>;
^^i can't predict value here
assigned-clocks = <&cpts_mux>;
assigned-clock-parents = <&timi1clk>;
};

or I understood your question wrongly?

> 
>> +};
>> +
>>  interfaces {
>>  gbe0: interface-0 {
>>  slave-port = <0>;
>> diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
>> index 938de22..ef94316 100644
>> --- a/drivers/net/ethernet/ti/cpts.c
>> +++ b/drivers/net/ethernet/ti/cpts.c
>> @@ -17,6 +17,7 @@
>>   * along with this program; if not, write to the Free Software
>>   * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
>>   */
>> +#include 
>>  #include 
>>  #include 
>>  #include 
>> @@ -672,6 +673,7 @@ int cpts_register(struct cpts *cpts)
>>  cpts->phc_index = ptp_clock_index(cpts->clock);
>>
>>  schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
>> +
> 
> Maybe in another patch.
> 

sure

>>  return 0;
>>
>>  err_ptp:
>> @@ -741,6 +743,54 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
>>   freq, cpts->cc_mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
>>  }
>>

...

>> +
>> +reg = &cpts->reg->rftclk_sel;
>> +
>> +clk = clk_register_mux_table(cpts->dev, refclk_np->name,
>> + parent_names, num_parents,
>> + 0, reg, 0, 0x1F, 0, mux_table, NULL);
>> +if (IS_ERR(clk))
>> +return PTR_ERR(clk);
>> +
>> +return of_clk_add_provider(refclk_np, of_clk_src_simple_get, clk);
> 
> Can you please use the clk_hw APIs instead?
> 

ok

-- 
regards,
-grygorii


Re: [V9fs-developer] [PATCH 2/5] 9p: store req details and callback in struct p9_req_t

2016-12-09 Thread Stefano Stabellini
On Fri, 9 Dec 2016, Dominique Martinet wrote:
> Nice. I like the idea of async I/Os :)
> 
> Stefano Stabellini wrote on Thu, Dec 08, 2016:
> > Add a few fields to struct p9_req_t. Callback is the function which will
> > be called upon requestion completion. offset, rsize, pagevec and kiocb
> > store important information regarding the read or write request,
> > essential to complete the request.
> > 
> > Currently not utilized, but they will be used in a later patch.
> > 
> > Signed-off-by: Stefano Stabellini 
> > ---
> >  include/net/9p/client.h | 8 
> >  net/9p/client.c | 9 -
> >  2 files changed, 16 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/net/9p/client.h b/include/net/9p/client.h
> > index aef19c6..69fc2f0 100644
> > --- a/include/net/9p/client.h
> > +++ b/include/net/9p/client.h
> > @@ -110,6 +110,7 @@ enum p9_req_status_t {
> >   *
> >   */
> >  
> > +struct p9_client;
> >  struct p9_req_t {
> > int status;
> > int t_err;
> > @@ -118,6 +119,13 @@ struct p9_req_t {
> > struct p9_fcall *rc;
> > void *aux;
> >  
> > +/* Used for async requests */
> > +   void (*callback)(struct p9_client *c, struct p9_req_t *req, int status);
> > +   size_t offset;
> > +   u64 rsize;
> > +   struct page **pagevec;
> > +   struct kiocb *kiocb;
> > +
> > struct list_head req_list;
> >  };
> >  
> > diff --git a/net/9p/client.c b/net/9p/client.c
> > index b5ea9a3..bfe1715 100644
> > --- a/net/9p/client.c
> > +++ b/net/9p/client.c
> > @@ -405,6 +405,10 @@ static void p9_free_req(struct p9_client *c, struct 
> > p9_req_t *r)
> > int tag = r->tc->tag;
> > p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);
> >  
> > +   r->offset = 0;
> > +   r->rsize = 0;
> > +   r->kiocb = NULL;
> > +   r->callback = NULL;
> 
> Probably want to cleanup r->pagevec here too, even if that doesn't seem
> to have any implication short-term (e.g. only looked at if callback is
> not empty from what I've seen)

Thanks, I missed it.


> > r->status = REQ_STATUS_IDLE;
> > if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool))
> > p9_idpool_put(tag, c->tagpool);
> > @@ -427,7 +431,10 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t 
> > *req, int status)
> > smp_wmb();
> > req->status = status;
> >  
> > -   wake_up(req->wq);
> > +   if (req->callback != NULL)
> > +   req->callback(c, req, status);
> > +   else
> > +   wake_up(req->wq);
> > p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
> >  }
> >  EXPORT_SYMBOL(p9_client_cb);
> 
> Mostly a warning here, but p9_client_cb is called from an interrupt
> context in 9P/RDMA.
> This has been working up till now because we only do a wake_up and
> there's no waiting, but (looking at later patches),
> p9_client_read_complete for example does allocations and possibly other
> unsafe operations from an interrupt context.
> 
> I don't know if the way forward is to move p9_client_cb from that
> context or to have the callback be scheduled in a work queue instead;
> but we'll need to fix that later.

Either would work. It might be simpler to have the callback run as a
work queue. I'll make the change. Maybe I'll use kiocb to figure out if
we have to schedule_work.


Re: [PATCH 1/1] dri: vc4: set error code on failure

2016-12-09 Thread Eric Anholt
Pan Bian  writes:

> Function vc4_cl_lookup_bos() does not set the error code when
> drm_malloc_ab() returns a NULL pointer, and will return 0 (indicates
> success). This patch fixes the bug, assigning "-ENOMEM" to the return
> variable ret on the path that memory allocation fails.
>
> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188631
> Signed-off-by: Pan Bian 

This one was already fixed in b2cdeb19f16ad984eb5bb9193f793d05a8101511


signature.asc
Description: PGP signature


Re: [PATCH] proc: mm: export PTE sizes directly in smaps (v3)

2016-12-09 Thread Dave Hansen
On 12/01/2016 06:50 AM, Andy Shevchenko wrote:
>> > +static int size_shift(unsigned long long nr)
>> > +{
>> > +   if (nr < (1ULL<<10))
>> > +   return 0;
>> > +   if (nr < (1ULL<<20))
>> > +   return 10;
>> > +   if (nr < (1ULL<<30))
>> > +   return 20;
>> > +   if (nr < (1ULL<<40))
>> > +   return 30;
>> > +   if (nr < (1ULL<<50))
>> > +   return 40;
>> > +   if (nr < (1ULL<<60))
>> > +   return 50;
>> > +   return 60;
>> > +}
>> > +
> New copy of string_get_size() ?

Not really.  That prints to a buffer, so we'll need to allocate stack
space for a buffer, which we also have to size properly.  We also want
to be consistent with other parts of smaps that mean kB==1024 bytes, so
we want string_get_size()'s STRING_UNITS_10 strings, but
STRING_UNITS_2's divisor.

Also, guaranteeing that we have a power-of-2 'block size' lets us cheat
and do things much faster than using real division.  Not that it
matters, but we could do it thousands of times for a large smaps file.

Being defined locally, this stuff also gets inlined pretty aggressively.

Given all that, I'm not sure I want to modify string_get_size() to do
exactly what we need here.


Remaining crypto API regressions with CONFIG_VMAP_STACK

2016-12-09 Thread Eric Biggers
In the 4.9 kernel, virtually-mapped stacks will be supported and enabled by
default on x86_64.  This has been exposing a number of problems in which
on-stack buffers are being passed into the crypto API, which to support crypto
accelerators operates on 'struct page' rather than on virtual memory.

Some of these problems have already been fixed, but I was wondering how many
problems remain, so I briefly looked through all the callers of sg_set_buf() and
sg_init_one().  Overall I found quite a few remaining problems, detailed below.

The following crypto drivers initialize a scatterlist to point into an
ahash_request, which may have been allocated on the stack with
AHASH_REQUEST_ON_STACK():

drivers/crypto/bfin_crc.c:351
drivers/crypto/qce/sha.c:299
drivers/crypto/sahara.c:973,988
drivers/crypto/talitos.c:1910
drivers/crypto/ccp/ccp-crypto-aes-cmac.c:105,119,142
drivers/crypto/ccp/ccp-crypto-sha.c:95,109,124
drivers/crypto/qce/sha.c:325

The following crypto drivers initialize a scatterlist to point into an
ablkcipher_request, which may have been allocated on the stack with
SKCIPHER_REQUEST_ON_STACK():

drivers/crypto/ccp/ccp-crypto-aes-xts.c:162
drivers/crypto/ccp/ccp-crypto-aes.c:94

And these other places do crypto operations on buffers clearly on the stack:

drivers/net/wireless/intersil/orinoco/mic.c:72
drivers/usb/wusbcore/crypto.c:264
net/ceph/crypto.c:182
net/rxrpc/rxkad.c:737,1000
security/keys/encrypted-keys/encrypted.c:500
fs/cifs/smbencrypt.c:96

Note: I almost certainly missed some, since I excluded places where the use of a
stack buffer was not obvious to me.  I also excluded AEAD algorithms since there
isn't an AEAD_REQUEST_ON_STACK() macro (yet).

The "good" news with these bugs is that on x86_64 without CONFIG_DEBUG_SG=y or
CONFIG_DEBUG_VIRTUAL=y, you can still do virt_to_page() and then page_address()
on a vmalloc address and get back the same address, even though you aren't
*supposed* to be able to do this.  This will make things still work for most
people.  The bad news is that if you happen to have consumed just about 1 page
(or N pages) of your stack at the time you call the crypto API, your stack
buffer may actually span physically non-contiguous pages, so the crypto
algorithm will scribble over some unrelated page.  Also, hardware crypto drivers
which actually do operate on physical memory will break too.

So I am wondering: is the best solution really to make all these crypto API
algorithms and users use heap buffers, as opposed to something like maintaining
a lowmem alias for the stack, or introducing a more general function to convert
buffers (possibly in the vmalloc space) into scatterlists?  And if the current
solution is desired, who is going to fix all of these bugs and when?

Eric


Re: [PATCH] x86/smpboot: Make logical package management more robust

2016-12-09 Thread Thomas Gleixner
On Fri, 9 Dec 2016, Boris Ostrovsky wrote:
> On 12/09/2016 05:06 PM, Thomas Gleixner wrote:
> > On Thu, 8 Dec 2016, Thomas Gleixner wrote:
> >
> > Boris, can you please verify if that makes the
> > topology_update_package_map() call which you placed into the Xen cpu
> > starting code obsolete ?
> 
> Will do. I did test your patch but without removing
> topology_update_package_map() call. It complained about package IDs
> being wrong, but that's expected until I fix Xen part.

That should not longer be the case as I changed the approach to that
management thing.

Thanks,

tglx


Re: [PATCH] x86/smpboot: Make logical package management more robust

2016-12-09 Thread Boris Ostrovsky
On 12/09/2016 05:06 PM, Thomas Gleixner wrote:
> On Thu, 8 Dec 2016, Thomas Gleixner wrote:
>
> Boris, can you please verify if that makes the
> topology_update_package_map() call which you placed into the Xen cpu
> starting code obsolete ?

Will do. I did test your patch but without removing
topology_update_package_map() call. It complained about package IDs
being wrong, but that's expected until I fix Xen part.

-boris


Re: [PATCH] x86/tsc: RFC: re-synchronize TSCs to boot cpu TSC

2016-12-09 Thread Thomas Gleixner
On Fri, 9 Dec 2016, Roland Scheidegger wrote:

Cc'ed someone from Dell. 

> Am 09.12.2016 um 18:33 schrieb Thomas Gleixner:
> > Can you add the patch below to gather more information? There is a hunk in
> > there with an '#if 0' which sets the TSC ADJUST to 0 on boot, which you can
> > turn on as second step.
> 
> Ok, here's the results:
> ...
> TSC ADJUST synchronize: Reference CPU0: -2820267100 CPU1: -2822498296
> TSC target sync skipped
> smpboot: Vector locked
> smpboot: Vector setup done
> smpboot: Clock setup
> TSC source sync skipped
> smpboot: Target CPU is online

I did not expect that to happen. Now I'm puzzled and curious where the
machine gets lost after that. See below.

> With the #if 0 block activated, it boots up fine, the output was:

That does not make any sense at all, but yes, nothing in this context makes
sense.

> [1.038892] x86: Booting SMP configuration:
> [1.038930]  node  #0, CPUs:#1
> [0.171851] TSC ADJUST: CPU1: -2830353064 218577682002
> [1.117495] TSC source sync 0 -> 1 runs 3
> [0.171852] TSC ADJUST differs: Reference CPU0: -2828600940 CPU1:
> -2830353064
> [0.171853] TSC ADJUST synchronize: Reference CPU0: 0 CPU1: -2830353064
> [1.117497] TSC target sync skip

> (And fwiw with my quick hack the lockups disappear to when I change that
> back to blast a zero into TSC_ADJ for all cpus.)

Right, That's what that hunk does as well.

Now what's interesting is that the adjustement of CPU1 in the non write to
zero case results in the following:

TSC ADJUST: CPU1: -2830353064 218577682002 <-- TSC value
TSC ADJUST differs: Reference CPU0: -2828600940 CPU1: -2830353064

We write CPU1 adjust register to -2828600940 which makes the TSC on CPU1
jump forwards by -2828600940 - -2830353064 = 1752124 cycles.

In the write to zero case the jump is forward as well, but this time it's
huge, i.e. 2830353064 cycles.

I tried to wreckage the TSC by writing similar values to the adjust MSR on
early boot, but independent of the values and independent of the write to
zero part the machine comes up happily all the time.

The only difference is that my machine has a somewhat saner BIOS. So the
thing might just die in the value add SMM crap, but who knows.

In the patch below is another bunch of debug prints which emit the state
information of CPU1 during bringup. Maybe that gives a hint where the
system gets stuck when you disable the 'write to zero' magic again.

The NMI watchdog does not catch anything, right?

> The system also came back up fine from suspend with this (well - still
> minus graphics...), however disabled tsc clocksource:
> 
> [  579.931739] Enabling non-boot CPUs ...
> [  579.943107] smpboot: Booting Node 0 Processor 1 APIC 0x2
> [  579.943189] TSC ADJUST: CPU1: -1504429974 21601834126

Fun, yet another adjust value. Are they set by a random number generator?

> [  579.944093] CPU1 is up

> [  580.458983] clocksource: timekeeping watchdog on CPU1: Marking
> clocksource 'tsc' as unstable because the skew is too large:
> [  580.458985] clocksource:   'hpet' wd_now: 587c1
> wd_last: 437c7 mask: 
> [  580.458986] clocksource:   'tsc' cs_now:
> 563963cd8 cs_last: 508f5a02a mask: 

Ok, that's caused by the fact that we do not sanitize the TSC adjust
register on the boot CPU in the resume path.

> [  581.006760] [Firmware Bug]: TSC ADJUST differs: CPU0 0 -->
> -1502494750. Restoring

We only detect it later and correct it, but that's too late. The untested
patch below should cure that.

> > This BIOS seems to be rather strange ...
> Don't tell me...
> 
> No idea what it's doing, but I think it's safe to say whatever it's
> trying to do, it's doing it wrong...

Amen to that. I've seen a lot of Value Add BIOSes which broke things left
and right, but this one takes it to a new level of insanity.

Did you report that back to DELL already?

Thanks,

tglx

8<
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -47,7 +47,7 @@ extern int tsc_clocksource_reliable;
  */
 #ifdef CONFIG_X86_TSC
 extern bool tsc_store_and_check_tsc_adjust(void);
-extern void tsc_verify_tsc_adjust(void);
+extern void tsc_verify_tsc_adjust(bool resume);
 extern void check_tsc_sync_source(int cpu);
 extern void check_tsc_sync_target(void);
 #else
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -277,7 +277,7 @@ void exit_idle(void)
 
 void arch_cpu_idle_enter(void)
 {
-   tsc_verify_tsc_adjust();
+   tsc_verify_tsc_adjust(false);
local_touch_nmi();
enter_idle();
 }
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1080,6 +1080,11 @@ static void detect_art(void)
 
 static struct clocksource clocksource_tsc;
 
+static void tsc_resume(struct clocksource *cs)
+{
+   tsc_verify_tsc_adjust(true);
+}
+
 /*
  * We used to compare the TSC to the cycle_last value in the clocksource
  * structure to avoid a nasty time-warp. This can 

Re: [PATCH] x86/kbuild: enable modversions for symbols exported from asm

2016-12-09 Thread Dodji Seketeli
Hello,

Nicholas Piggin  a écrit:

[...]

> That said, a dwarf based checker tool should be able to do as good a job
> (maybe a bit better because report is very informative and it may pick up
> compiler alignments or padding options).

So, Nicholas was kind enough to send me the two Linux Kernel binaries
that he built with the tiny little interface change that we were
discussing earlier.  Here is what the abidiff[1] tools says about that
interface change:

$ time ~/git/libabigail/kabidiff/build/tools/abidiff vmlinux.abi1.abi 
vmlinux.abi2.abi
Functions changes summary: 0 Removed, 1 Changed, 0 Added function
Variables changes summary: 0 Removed, 0 Changed, 0 Added variable

1 function with some indirect sub-type change:

  [C]'function int foo(blah*)' at memory.c:82:1 has some indirect sub-type 
changes:
parameter 1 of type 'blah*' has sub-type changes:
  in pointed to type 'struct blah' at memory.c:78:1:
type size changed from 32 to 64 bits
1 data member insertion:
  'int blah::y', at offset 0 (in bits) at memory.c:79:1
1 data member change:
 'int blah::x' offset changed from 0 to 32 (in bits) (by +32 bits)



real0m2.595s
user0m2.489s
sys 0m0.108s
$ 

I kept the timing information to give you an idea of the time it takes
on a non-optimized build of abidiff.

One could for instance want that types that are not defined in header
files be kept out of the change report.  In that case it's possible to
write a little suppression specification file like this one:

$ cat vmlinux.abignore 
[suppress_type]
  source_location_not_regexp = .*\\.h
$

You can then pass that suppression file to the tool:

$ ~/git/libabigail/kabidiff/build/tools/abidiff --suppr vmlinux.abignore 
vmlinux.abi1.abi vmlinux.abi2.abi
Functions changes summary: 0 Removed, 0 Changed (1 filtered out), 0 Added 
function
Variables changes summary: 0 Removed, 0 Changed, 0 Added variable


real0m2.574s
user0m2.473s
sys 0m0.102s
$

So this is the kind of interface change analysis tool we are working on
at the moment.

One could also imagine a tool that would compute a CRC that takes the
very same suppression specification files into account, letting people
to decide that some interface changes are OK.  That CRC would thus be
added to the special ELF sections we already have today.  We could keep
the modversion machinery, but with a greater dose of flexibility.
Whenever modversion detects a change, abidiff would tell people what the
change is exactly.

What do you guys think?

[1]: https://sourceware.org/libabigail/manual/abidiff.html
 Okay, the abidiff I used in this message is one that is not yet
 released.  It's source code is in the dodji/kabidiff branch of the
 Git repository at https://sourceware.org/git/?p=libabigail.git;a=summary


Cheers,

-- 
Dodji


Re: [PATCH v3 2/6] mfd: dt: ranges, #address-cells and #size-cells as optional properties

2016-12-09 Thread Andrew Jeffery
On Fri, 2016-12-09 at 16:49 -0600, Rob Herring wrote:
> On Tue, Dec 06, 2016 at 01:53:17PM +1100, Andrew Jeffery wrote:
> > Whilst describing a device and not a bus, simple-mfd is modelled on
> > simple-bus where child nodes are iterated and registered as platform
> > devices. Some complex devices, e.g. the Aspeed LPC controller, can
> > benefit from address space mapping such that child nodes can use the
> > regs property to describe their resources within the multi-function
> > device.
> > 
> > > > Signed-off-by: Andrew Jeffery 
> > ---
> >  Documentation/devicetree/bindings/mfd/mfd.txt | 10 ++
> >  1 file changed, 10 insertions(+)
> 
> No objections to this, but this is all implied by having a reg property.

Thanks for clarifying. I wasn't sure so I wrote the patch with the
thought that we could drop it if it wasn't necessary. Regardless, I
think being explicit about the properties is nice.

> 
> Acked-by: Rob Herring 

Thanks,

Andrew

> 
> Rob

signature.asc
Description: This is a digitally signed message part


RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers

2016-12-09 Thread Haiyang Zhang


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Friday, December 9, 2016 5:05 PM
> To: Haiyang Zhang 
> Cc: Greg KH ; KY Srinivasan
> ; o...@aepfle.de; linux-kernel@vger.kernel.org;
> bjorn.helg...@gmail.com; a...@canonical.com; de...@linuxdriverproject.org;
> leann.ogasaw...@canonical.com; jasow...@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 21:53:49 +
> Haiyang Zhang  wrote:
> 
> > > -Original Message-
> > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > Sent: Friday, December 9, 2016 4:45 PM
> > > To: Haiyang Zhang 
> > > Cc: Greg KH ; KY Srinivasan
> > > ; o...@aepfle.de; linux-kernel@vger.kernel.org;
> > > bjorn.helg...@gmail.com; a...@canonical.com;
> de...@linuxdriverproject.org;
> > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > serial numbers
> > >
> > > On Fri, 9 Dec 2016 21:31:25 +
> > > Haiyang Zhang  wrote:
> > >
> > > > > -Original Message-
> > > > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > > > Sent: Friday, December 9, 2016 3:30 PM
> > > > > To: Haiyang Zhang 
> > > > > Cc: Greg KH ; KY Srinivasan
> > > > > ; o...@aepfle.de; linux-
> ker...@vger.kernel.org;
> > > > > bjorn.helg...@gmail.com; a...@canonical.com;
> > > de...@linuxdriverproject.org;
> > > > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based
> on
> > > > > serial numbers
> > > > >
> > > > > On Fri, 9 Dec 2016 20:09:49 +
> > > > > Haiyang Zhang  wrote:
> > > > >
> > > > > > > -Original Message-
> > > > > > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > > > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > > > > To: Greg KH 
> > > > > > > Cc: KY Srinivasan ; o...@aepfle.de;
> Haiyang
> > > Zhang
> > > > > > > ; linux-kernel@vger.kernel.org;
> > > > > > > bjorn.helg...@gmail.com; a...@canonical.com;
> > > > > de...@linuxdriverproject.org;
> > > > > > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching
> based
> > > on
> > > > > > > serial numbers
> > > > > > >
> > > > > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > > > > Greg KH  wrote:
> > > > > > >
> > > > > > > > On Fri, Dec 09, 2016 at 12:05:53AM +, KY Srinivasan
> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > -Original Message-
> > > > > > > > > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > > > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > > > > To: KY Srinivasan 
> > > > > > > > > > Cc: linux-kernel@vger.kernel.org;
> > > de...@linuxdriverproject.org;
> > > > > > > > > > o...@aepfle.de; a...@canonical.com; vkuzn...@redhat.com;
> > > > > > > > > > jasow...@redhat.com; leann.ogasaw...@canonical.com;
> > > > > > > > > > bjorn.helg...@gmail.com; Haiyang Zhang
> > > 
> > > > > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF
> matching
> > > > > based on
> > > > > > > serial
> > > > > > > > > > numbers
> > > > > > > > > >
> > > > > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> > > > > > > k...@exchange.microsoft.com
> > > > > > > > > > wrote:
> > > > > > > > > > > From: Haiyang Zhang 
> > > > > > > > > > >
> > > > > > > > > > > We currently use MAC address to match VF and
> synthetic
> > > NICs.
> > > > > > > Hyper-V
> > > > > > > > > > > provides a serial number to both devices for this
> > > purpose.
> > > > > This
> > > > > > > patch
> > > > > > > > > > > implements the matching based on VF serial numbers.
> This
> > > is
> > > > > the
> > > > > > > way
> > > > > > > > > > > specified by the protocol and more reliable.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Haiyang Zhang 
> > > > > > > > > > > Signed-off-by: K. Y. Srinivasan 
> > > > > > > > > > > ---
> > > > > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > > > > > > > ---
> > > > > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > > @@ -1165,9 +1165,10 @@ static void
> > > netvsc_free_netdev(struct
> > > > > > > > > > net_device *netdev)
> > > > > > > > > > >   free_netdev(netdev);
> > > > > > > > > > >  }
> > > > > > > > > > >
> > > > > > > > > > > -static struct net_device *get_netvsc_bymac(const u8
> > > *mac)
> > > > > > > > > > > +static struct net_device *get_netvsc_byvfser(u32
> vfser)
> > > > > > > > > > >  {
> > > > > > > > > > >   struct net_device *dev;
> > > > > > > > > > 

Re: [PATCH 18/22] m68k/mm: kmap - Modernize printing of kernel messages

2016-12-09 Thread Finn Thain

On Thu, 8 Dec 2016, I wrote:

> 
> On Wed, 7 Dec 2016, Geert Uytterhoeven wrote:
> 
> >   - Convert from printk() to pr_*(),
> >   - Add missing continuations,
> >   - Remove #undef DEBUG.
> > 
> > Note that "#ifdef DEBUG" is sometimes retained because pr_cont() is 
> > not optimized away when debugging is disabled.
> > 
> 
> I think that argues for using printk(KERN_DEBUG ...) and print(KERN_CONT 
> ...) inside #ifdef DEBUG, which would need no explanation.
> 
> If instead you use a combination of pr_debug and pr_cont and #ifdef 
> DEBUG, perhaps the explanation should be moved from the commit log to a 
> comment in the code?
> 

Perhaps a better solution than these alternatives would be,

#if defined(DEBUG)
#define pr_debug_cont pr_cont
#else
#define pr_debug_cont no_printk
#endif

But this API is still surprising and ugly. It doesn't work with 
CONFIG_DYNAMIC_DEBUG but that's not so important.

IMO, a far better linux/printk.h would have provided us with these 
definitions:

#define pr_emerg(fmt, ...) \
printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
/* ... */
#define pr_debug(fmt, ...) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_cont(fmt, ...) \
printk(KERN_CONT fmt, ##__VA_ARGS__)

#if defined(CONFIG_DYNAMIC_DEBUG)
#define pr_debug_cond(fmt, ...) \
dynamic_pr_debug(fmt, ##__VA_ARGS__)
#define pr_cont_cond(fmt, ...) \
dynamic_pr_cont(fmt, ##__VA_ARGS__)
#elif defined(DEBUG)
#define pr_debug_cond(fmt, ...) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_cont_cond(fmt, ...) \
no_printk(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug_cond(fmt, ...) \
no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#define pr_cont_cond(fmt, ...) \
no_printk(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
#endif

Which have the virtues of symmetry and least surprise.

-- 


Re: [PATCH v3 2/6] mfd: dt: ranges, #address-cells and #size-cells as optional properties

2016-12-09 Thread Rob Herring
On Tue, Dec 06, 2016 at 01:53:17PM +1100, Andrew Jeffery wrote:
> Whilst describing a device and not a bus, simple-mfd is modelled on
> simple-bus where child nodes are iterated and registered as platform
> devices. Some complex devices, e.g. the Aspeed LPC controller, can
> benefit from address space mapping such that child nodes can use the
> regs property to describe their resources within the multi-function
> device.
> 
> Signed-off-by: Andrew Jeffery 
> ---
>  Documentation/devicetree/bindings/mfd/mfd.txt | 10 ++
>  1 file changed, 10 insertions(+)

No objections to this, but this is all implied by having a reg property.

Acked-by: Rob Herring 

Rob


[PATCH] PCI: pciehp: Optimize PCIe root resume time

2016-12-09 Thread Vaibhav Shankar
On Apollolake platforms, PCIe rootport takes a long time to resume
from S3. With 100ms delay before read pci conf, rootport takes
~200ms during resume.

commit 2f5d8e4ff947 ("PCI: pciehp: replace unconditional sleep with
config space access check") is the one that added the 100ms delay
before reading pci conf.

This patch removes the 100ms delay.By removing the delay, the
PCIe root port takes ~16ms during resume. As per PCIe spec, we
only require 1000ms delay. This delay is provide by
pci_bus_check_dev() function.

With 100ms delay:
[  155.102713] calling  :00:14.0+ @ 70, parent: pci:00, cb: 
pci_pm_resume_noirq
[  155.119337] call :00:14.0+ returned 0 after 16231 usecs
[  155.119467] calling  :01:00.0+ @ 5845, parent: :00:14.0, cb: 
pci_pm_resume_noirq
[  155.321670] call :00:14.0+ returned 0 after 185327 usecs
[  155.321743] calling  :01:00.0+ @ 5849, parent: :00:14.0, cb: 
pci_pm_resume

After removing 100ms delay:
[   36.624709] calling  :00:14.0+ @ 4434, parent: pci:00, cb: 
pci_pm_resume_noirq
[   36.641367] call :00:14.0+ returned 0 after 16263 usecs
[   36.652458] calling  :00:14.0+ @ 4443, parent: pci:00, cb: 
pci_pm_resume
[   36.652673] call :00:14.0+ returned 0 after 208 usecs
[   36.652863] calling  :01:00.0+ @ 4442, parent: :00:14.0, cb: 
pci_pm_resume

Signed-off-by: Vaibhav Shankar 
---
 drivers/pci/hotplug/pciehp_hpc.c |2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 5c24e93..08357e7 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -311,8 +311,6 @@ int pciehp_check_link_status(struct controller *ctrl)
else
msleep(1000);
 
-   /* wait 100ms before read pci conf, and try in 1s */
-   msleep(100);
found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
PCI_DEVFN(0, 0));
 
-- 
1.7.9.5



Re: [PATCH v3 1/6] mfd: dt: Fix "indicates" typo in mfd bindings document

2016-12-09 Thread Rob Herring
On Tue, Dec 06, 2016 at 01:53:16PM +1100, Andrew Jeffery wrote:
> Signed-off-by: Andrew Jeffery 
> ---
>  Documentation/devicetree/bindings/mfd/mfd.txt | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Acked-by: Rob Herring 


RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers

2016-12-09 Thread Haiyang Zhang


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Friday, December 9, 2016 1:21 PM
> To: Greg KH 
> Cc: KY Srinivasan ; o...@aepfle.de; Haiyang Zhang
> ; linux-kernel@vger.kernel.org;
> bjorn.helg...@gmail.com; a...@canonical.com; de...@linuxdriverproject.org;
> leann.ogasaw...@canonical.com; jasow...@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 08:31:22 +0100
> Greg KH  wrote:
> 
> > On Fri, Dec 09, 2016 at 12:05:53AM +, KY Srinivasan wrote:
> > >
> > >
> > > > -Original Message-
> > > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > To: KY Srinivasan 
> > > > Cc: linux-kernel@vger.kernel.org; de...@linuxdriverproject.org;
> > > > o...@aepfle.de; a...@canonical.com; vkuzn...@redhat.com;
> > > > jasow...@redhat.com; leann.ogasaw...@canonical.com;
> > > > bjorn.helg...@gmail.com; Haiyang Zhang 
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial
> > > > numbers
> > > >
> > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> k...@exchange.microsoft.com
> > > > wrote:
> > > > > From: Haiyang Zhang 
> > > > >
> > > > > We currently use MAC address to match VF and synthetic NICs.
> Hyper-V
> > > > > provides a serial number to both devices for this purpose. This
> patch
> > > > > implements the matching based on VF serial numbers. This is the
> way
> > > > > specified by the protocol and more reliable.
> > > > >
> > > > > Signed-off-by: Haiyang Zhang 
> > > > > Signed-off-by: K. Y. Srinivasan 
> > > > > ---
> > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > ---
> > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > index 9522763..c5778cf 100644
> > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > @@ -1165,9 +1165,10 @@ static void netvsc_free_netdev(struct
> > > > net_device *netdev)
> > > > >   free_netdev(netdev);
> > > > >  }
> > > > >
> > > > > -static struct net_device *get_netvsc_bymac(const u8 *mac)
> > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > >  {
> > > > >   struct net_device *dev;
> > > > > + struct net_device_context *ndev_ctx;
> > > > >
> > > > >   ASSERT_RTNL();
> > > > >
> > > > > @@ -1175,7 +1176,8 @@ static void netvsc_free_netdev(struct
> net_device
> > > > *netdev)
> > > > >   if (dev->netdev_ops != &device_ops)
> > > > >   continue;   /* not a netvsc device */
> > > > >
> > > > > - if (ether_addr_equal(mac, dev->perm_addr))
> > > > > + ndev_ctx = netdev_priv(dev);
> > > > > + if (ndev_ctx->vf_serial == vfser)
> > > > >   return dev;
> > > > >   }
> > > > >
> > > > > @@ -1205,21 +1207,66 @@ static void netvsc_free_netdev(struct
> > > > net_device *netdev)
> > > > >   return NULL;
> > > > >  }
> > > > >
> > > > > +static u32 netvsc_get_vfser(struct net_device *vf_netdev)
> > > > > +{
> > > > > + struct device *dev;
> > > > > + struct hv_device *hdev;
> > > > > + struct hv_pcibus_device *hbus = NULL;
> > > > > + struct list_head *iter;
> > > > > + struct hv_pci_dev *hpdev;
> > > > > + unsigned long flags;
> > > > > + u32 vfser = 0;
> > > > > + u32 count = 0;
> > > > > +
> > > > > + for (dev = &vf_netdev->dev; dev; dev = dev->parent) {
> > > >
> > > > You are going to walk the whole device tree backwards?  That's
> crazy.
> > > > And foolish.  And racy and broken (what happens if the tree
> changes
> > > > while you do this?)  Where is the lock being grabbed while this
> happens?
> > > > What about reference counts?  Do you see other drivers ever doing
> this
> > > > (if you do, point them out and I'll go yell at them too...)
> > >
> > > Greg,
> > >
> > > We are registering for netdev events. Coming into this function, the
> caller
> > > guarantees that the list of netdevs does not change - we assert this
> on entry:
> > > ASSERT_RTNL(). We are only walking up the device tree for the
> netdevs whose
> > > state change is being notified to us - the device tree being walked
> here is limited to
> > > netdevs under question.
> >
> > But a netdev is a child of some type of "real" device, and you are now
> > walking the tree of all devices up to the "root" parent device, which
> > means you will hit PCI bridges, USB controllers, and all sorts of fun
> > things if you are a child of those types of devices.
> >
> > And can't you tell if the netdev for this event, really is "your"
> > netdev?  Or are you getting called this for "all" netdevs?  Sorry, I
> > don't know this api, any pointers to it would be appreciated.
> >
> > > We have a reference to the device and we know the device is no

Re: [PATCH 4.8 00/45] 4.8.14-stable review

2016-12-09 Thread Guenter Roeck
On Fri, Dec 09, 2016 at 05:20:29PM +0100, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.8.14 release.
> There are 45 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sun Dec 11 16:17:38 UTC 2016.
> Anything received after that time might be too late.
> 
Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 122 pass: 122 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter


Re: [PATCH v2 1/4] net: hix5hd2_gmac: add generic compatible string

2016-12-09 Thread Rob Herring
On Mon, Dec 05, 2016 at 09:27:58PM +0800, Dongpo Li wrote:
> The "hix5hd2" is SoC name, add the generic ethernet driver name.
> The "hisi-gemac-v1" is the basic version and "hisi-gemac-v2" adds
> the SG/TXCSUM/TSO/UFO features.
> 
> Signed-off-by: Dongpo Li 
> ---
>  .../devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt|  9 +++--
>  drivers/net/ethernet/hisilicon/hix5hd2_gmac.c | 15 
> +++
>  2 files changed, 18 insertions(+), 6 deletions(-)
> 
> diff --git a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt 
> b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
> index 75d398b..75920f0 100644
> --- a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
> +++ b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
> @@ -1,7 +1,12 @@
>  Hisilicon hix5hd2 gmac controller
>  
>  Required properties:
> -- compatible: should be "hisilicon,hix5hd2-gmac".
> +- compatible: should contain one of the following SoC strings:
> + * "hisilicon,hix5hd2-gemac"
> + * "hisilicon,hi3798cv200-gemac"
> + and one of the following version string:
> + * "hisilicon,hisi-gemac-v1"
> + * "hisilicon,hisi-gemac-v2"

What combinations are valid? I assume both chips don't have both v1 and 
v2. 2 SoCs and 2 versions so far, I don't think there is much point to 
have the v1 and v2 compatible strings.

>  - reg: specifies base physical address(s) and size of the device registers.
>The first region is the MAC register base and size.
>The second region is external interface control register.
> @@ -20,7 +25,7 @@ Required properties:
>  
>  Example:
>   gmac0: ethernet@f984 {
> - compatible = "hisilicon,hix5hd2-gmac";
> + compatible = "hisilicon,hix5hd2-gemac", 
> "hisilicon,hisi-gemac-v1";

You can't just change compatible strings.

>   reg = <0xf984 0x1000>,<0xf984300c 0x4>;
>   interrupts = <0 71 4>;
>   #address-cells = <1>;


Re: [PATCH 4.4 00/28] 4.4.38-stable review

2016-12-09 Thread Guenter Roeck
On Fri, Dec 09, 2016 at 05:17:42PM +0100, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.4.38 release.
> There are 28 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sun Dec 11 16:17:32 UTC 2016.
> Anything received after that time might be too late.
> 

Build results:
total: 149 pass: 149 fail: 0
Qemu test results:
total: 115 pass: 115 fail: 0

Details are available at http://kerneltests.org/builders.

Guenter


[PATCH] tty: serial: fsl_lpuart: potential NULL dereference

2016-12-09 Thread Alexey Khoroshilov
tty_port_tty_get() might return a tty which is NULL
if the port is not associated with a tty
(e.g. due to close or hangup).
But lpuart_start_rx_dma() dereferences tty without any check.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov 
---
 drivers/tty/serial/fsl_lpuart.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 76103f2c4a80..9945b37c914a 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -833,9 +833,16 @@ static inline int lpuart_start_rx_dma(struct lpuart_port 
*sport)
struct circ_buf *ring = &sport->rx_ring;
int ret, nent;
int bits, baud;
-   struct tty_struct *tty = tty_port_tty_get(&sport->port.state->port);
-   struct ktermios *termios = &tty->termios;
+   struct tty_struct *tty;
+   struct ktermios *termios;
 
+   tty = tty_port_tty_get(&sport->port.state->port);
+   if (!tty) {
+   dev_err(sport->port.dev, "Port is not associated with a tty\n");
+   return -ENODEV;
+   }
+
+   termios = &tty->termios;
baud = tty_get_baud_rate(tty);
 
bits = (termios->c_cflag & CSIZE) == CS7 ? 9 : 10;
-- 
2.7.4



Re: [PATCH] sched/pid fix use-after free in task_tgid_vnr

2016-12-09 Thread Eric W. Biederman
Oleg Nesterov  writes:

> On 12/09, EunTaik Lee wrote:
>>
>> There is a use-after-free case with below call stack.
>>
>> pid_nr_ns+0x10/0x38
>> cgroup_pidlist_start+0x144/0x400
>> cgroup_seqfile_start+0x1c/0x24
>> kernfs_seq_start+0x54/0x90
>> seq_read+0x15c/0x3a8
>> kernfs_fop_read+0x38/0x160
>> __vfs_read+0x28/0xc8
>> vfs_read+0x84/0xfc

How is this a use after free.  The function pid_nr_ns should take a NULL pointer
as input and return 0?

Certainly if the addtion of pid_alive fixes it pid_vnr(task_tgid(tsk))
is fine.  Are we perhaps missing rcu locking?

Or is the problem simply that in task_tgid we are accessing
task->group_leader which may already be dead?  If so the fix needs to be
in task_tgid.

> This reminds about perf_event_pid() which is equally buggy...
>
>>  static inline pid_t task_tgid_vnr(struct task_struct *tsk)
>>  {
>> -return pid_vnr(task_tgid(tsk));
>> +pid_t pid = 0;
>> +
>> +rcu_read_lock();
>> +if (pid_alive(tsk))
>> +pid = pid_vnr(task_tgid(tsk));
>> +rcu_read_unlock();
>> +
>> +return pid;
>>  }
>
> Eric, EunTaik, what do you think about the patch below?
>
> I can't decide whether it is too ugly or not, but it would be nice
> to avoid the code duplication.

I think it can be beaten into shape but I am not certain it addresses the
core issue.

>
> Oleg.
>
>
> --- x/include/linux/pid.h
> +++ x/include/linux/pid.h
> @@ -8,7 +8,8 @@ enum pid_type
>   PIDTYPE_PID,
>   PIDTYPE_PGID,
>   PIDTYPE_SID,
> - PIDTYPE_MAX
> + PIDTYPE_MAX,
> + PIDTYPE_TGID/* do not use */


I would do:

/* __PIDTYPE_TGID is only valid to __task_pid_nr_ns */
#define __PIDTYPE_TGID PIDTYPE_MAX

Prefixing __PIDTYPE_TGID  with __ should help make it clear
this is a special use define.

I am also curious why pid_alive is the proper check to see if
task->group_leader is valid?  That feels like it could get us into
trouble later.

Especially as that is the real problem child here.

>  };
>  
>  /*
> --- x/kernel/pid.c
> +++ x/kernel/pid.c
> @@ -526,8 +526,11 @@ pid_t __task_pid_nr_ns(struct task_struc
>   if (!ns)
>   ns = task_active_pid_ns(current);
>   if (likely(pid_alive(task))) {
> - if (type != PIDTYPE_PID)
> + if (type != PIDTYPE_PID) {
> + if (type == PIDTYPE_TGID)
> + type = PIDTYPE_PID;
>   task = task->group_leader;
> + }
>   nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
>   }
>   rcu_read_unlock();
> @@ -538,7 +541,7 @@ EXPORT_SYMBOL(__task_pid_nr_ns);
>  
>  pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
>  {
> - return pid_nr_ns(task_tgid(tsk), ns);
> + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns);
>  }
>  EXPORT_SYMBOL(task_tgid_nr_ns);
>  


[GIT PULL] VFIO updates for v4.10-rc1

2016-12-09 Thread Alex Williamson
 virt/kvm/vfio.c   |   18 +
 23 files changed, 4486 insertions(+), 265 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-vfio-mdev
 create mode 100644 Documentation/vfio-mediated-device.txt
 create mode 100644 drivers/vfio/mdev/Kconfig
 create mode 100644 drivers/vfio/mdev/Makefile
 create mode 100644 drivers/vfio/mdev/mdev_core.c
 create mode 100644 drivers/vfio/mdev/mdev_driver.c
 create mode 100644 drivers/vfio/mdev/mdev_private.h
 create mode 100644 drivers/vfio/mdev/mdev_sysfs.c
 create mode 100644 drivers/vfio/mdev/vfio_mdev.c
 create mode 100644 include/linux/mdev.h
 create mode 100644 samples/vfio-mdev/Makefile
 create mode 100644 samples/vfio-mdev/mtty.c

---
Merge note: Stephen Rothwell has identified an API change conflict in
linux-next for get_user_pages_remote().  Depending on the ordering of
merges versus akpm's branches, a fixup may be necessary as Stephen has
provided in linux-next 20161209 as:

commit 53fa4eed56cb11d8cea7e15a299cd1054ddc0425
Author: Stephen Rothwell 
Date:   Wed Dec 7 11:02:15 2016 +1100

vfio iommu type1: merge fix for get_user_pages_remote API change

Link: http://lkml.kernel.org/r/20161122210511.024ec...@canb.auug.org.au
Signed-off-by: Stephen Rothwell 
Cc: Lorenzo Stoakes 
Cc: Michal Hocko 
Signed-off-by: Andrew Morton 

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 9815e45..f3726ba 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -362,7 +362,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned 
long vaddr,
 
down_read(&mm->mmap_sem);
ret = get_user_pages_remote(NULL, mm, vaddr, 1, flags, page,
-   NULL);
+   NULL, NULL);
up_read(&mm->mmap_sem);
}
 

I'm not sure how these are typically handled, but I'm hoping to queue
these changes towards the front of the merge window to allow Intel to
get in their dependent changes.  No merge issues vs current mainline as
of v4.9-rc8+ (a37102dcd7ec). Thanks,

Alex


Re: [V9fs-developer] [PATCH 4/5] 9p: introduce async read requests

2016-12-09 Thread Stefano Stabellini
On Fri, 9 Dec 2016, Dominique Martinet wrote:
> Stefano Stabellini wrote on Thu, Dec 08, 2016:
> > If the read is an async operation, send a 9p request and return
> > EIOCBQUEUED. Do not wait for completion.
> > 
> > Complete the read operation from a callback instead.
> > 
> > Signed-off-by: Stefano Stabellini 
> > ---
> >  net/9p/client.c | 88 
> > +++--
> >  1 file changed, 86 insertions(+), 2 deletions(-)
> > 
> > diff --git a/net/9p/client.c b/net/9p/client.c
> > index eb589ef..f9f09db 100644
> > --- a/net/9p/client.c
> > +++ b/net/9p/client.c
> > @@ -28,6 +28,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -1554,13 +1555,68 @@ int p9_client_unlinkat(struct p9_fid *dfid, const 
> > char *name, int flags)
> >  }
> >  EXPORT_SYMBOL(p9_client_unlinkat);
> >  
> > +static void
> > +p9_client_read_complete(struct p9_client *clnt, struct p9_req_t *req, int 
> > status)
> > +{
> > +   int err, count, n, i, total = 0;
> > +   char *dataptr, *to;
> > +
> > +   if (req->status == REQ_STATUS_ERROR) {
> > +   p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
> > +   err = req->t_err;
> > +   goto out;
> > +   }
> > +   err = p9_check_errors(clnt, req);
> > +   if (err)
> > +   goto out;
> > +
> > +   err = p9pdu_readf(req->rc, clnt->proto_version,
> > +   "D", &count, &dataptr);
> > +   if (err) {
> > +   trace_9p_protocol_dump(clnt, req->rc);
> > +   goto out;
> > +   }
> > +   if (!count) {
> > +   p9_debug(P9_DEBUG_ERROR, "count=%d\n", count);
> > +   err = 0;
> > +   goto out;
> > +   }
> > +
> > +   p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
> > +   if (count > req->rsize)
> > +   count = req->rsize;
> > +
> > +   for (i = 0; i < ((req->rsize + PAGE_SIZE - 1) / PAGE_SIZE); i++) {
> > +   to = kmap(req->pagevec[i]);
> > +   to += req->offset;
> > +   n = PAGE_SIZE - req->offset;
> > +   if (n > count)
> > +   n = count;
> > +   memcpy(to, dataptr, n);
> > +   kunmap(req->pagevec[i]);
> > +   req->offset = 0;
> > +   count -= n;
> > +   total += n;
> > +   }
> > +
> > +   err = total;
> > +   req->kiocb->ki_pos += total;
> > +
> > +out:
> > +   req->kiocb->ki_complete(req->kiocb, err, 0);
> > +
> > +   release_pages(req->pagevec, (req->rsize + PAGE_SIZE - 1) / PAGE_SIZE, 
> > false);
> > +   kvfree(req->pagevec);
> > +   p9_free_req(clnt, req);
> > +}
> > +
> >  int
> >  p9_client_read(struct p9_fid *fid, struct kiocb *iocb, u64 offset,
> > struct iov_iter *to, int *err)
> >  {
> > struct p9_client *clnt = fid->clnt;
> > struct p9_req_t *req;
> > -   int total = 0;
> > +   int total = 0, i;
> > *err = 0;
> >  
> > p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
> > @@ -1587,10 +1643,38 @@ int p9_client_unlinkat(struct p9_fid *dfid, const 
> > char *name, int flags)
> > req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize,
> >0, 11, "dqd", fid->fid,
> >offset, rsize);
> > -   } else {
> > +   /* sync request */
> > +   } else if(iocb == NULL || is_sync_kiocb(iocb)) {
> > non_zc = 1;
> > req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, 
> > offset,
> > rsize);
> > +   /* async request */
> > +   } else {
> 
> I'm not too familiar with iocb/how async IOs should work, but a logic
> question just to make sure that has been thought out:
> We prefer zc here to async, even if zc can be slow?
> 
> Ideally at some point zc and async aren't exclusive so we'll have async
> zc and async normal, but for now I'd say async comes before zc - yes
> there will be an extra copy in memory, but it will be done
> asynchronously.
> Was it intentional to prefer zc here?

I wasn't sure what to do about zc. The backends I am testing with don't
support zc, so I didn't feel confident in changing its behavior. I think
whether zc is faster than async+copy depends on the specific benchmark.
iodepth and blocksize parameters in fio, for example. With iodepth=1, zc
would be faster, the higher the iodepth, the faster async+copy would
become in comparison. At some point async+copy will be faster than zc,
but I am not sure where is the threshold, it would probably be storage
backend dependent too. Maybe around iodepth=3. This is a reasonable
guess but I haven't run any numbers to confirm it.

That said, I am happy to follow any strategy you suggest in regards to zc.


> > +   req = p9_client_get_req(clnt, P9_TREAD, "dqd", 
> > fid->fid, offset, rsize);
> > +   if (IS_ERR(req)) {
> > +  

Re: [PATCH] x86/smpboot: Make logical package management more robust

2016-12-09 Thread Thomas Gleixner
On Thu, 8 Dec 2016, Thomas Gleixner wrote:

Boris, can you please verify if that makes the
topology_update_package_map() call which you placed into the Xen cpu
starting code obsolete ?

Thanks,

tglx


RE: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers

2016-12-09 Thread Haiyang Zhang


> -Original Message-
> From: Stephen Hemminger [mailto:step...@networkplumber.org]
> Sent: Friday, December 9, 2016 4:45 PM
> To: Haiyang Zhang 
> Cc: Greg KH ; KY Srinivasan
> ; o...@aepfle.de; linux-kernel@vger.kernel.org;
> bjorn.helg...@gmail.com; a...@canonical.com; de...@linuxdriverproject.org;
> leann.ogasaw...@canonical.com; jasow...@redhat.com
> Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> serial numbers
> 
> On Fri, 9 Dec 2016 21:31:25 +
> Haiyang Zhang  wrote:
> 
> > > -Original Message-
> > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > Sent: Friday, December 9, 2016 3:30 PM
> > > To: Haiyang Zhang 
> > > Cc: Greg KH ; KY Srinivasan
> > > ; o...@aepfle.de; linux-kernel@vger.kernel.org;
> > > bjorn.helg...@gmail.com; a...@canonical.com;
> de...@linuxdriverproject.org;
> > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > serial numbers
> > >
> > > On Fri, 9 Dec 2016 20:09:49 +
> > > Haiyang Zhang  wrote:
> > >
> > > > > -Original Message-
> > > > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > > To: Greg KH 
> > > > > Cc: KY Srinivasan ; o...@aepfle.de; Haiyang
> Zhang
> > > > > ; linux-kernel@vger.kernel.org;
> > > > > bjorn.helg...@gmail.com; a...@canonical.com;
> > > de...@linuxdriverproject.org;
> > > > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based
> on
> > > > > serial numbers
> > > > >
> > > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > > Greg KH  wrote:
> > > > >
> > > > > > On Fri, Dec 09, 2016 at 12:05:53AM +, KY Srinivasan wrote:
> > > > > > >
> > > > > > >
> > > > > > > > -Original Message-
> > > > > > > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > > To: KY Srinivasan 
> > > > > > > > Cc: linux-kernel@vger.kernel.org;
> de...@linuxdriverproject.org;
> > > > > > > > o...@aepfle.de; a...@canonical.com; vkuzn...@redhat.com;
> > > > > > > > jasow...@redhat.com; leann.ogasaw...@canonical.com;
> > > > > > > > bjorn.helg...@gmail.com; Haiyang Zhang
> 
> > > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching
> > > based on
> > > > > serial
> > > > > > > > numbers
> > > > > > > >
> > > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,
> > > > > k...@exchange.microsoft.com
> > > > > > > > wrote:
> > > > > > > > > From: Haiyang Zhang 
> > > > > > > > >
> > > > > > > > > We currently use MAC address to match VF and synthetic
> NICs.
> > > > > Hyper-V
> > > > > > > > > provides a serial number to both devices for this
> purpose.
> > > This
> > > > > patch
> > > > > > > > > implements the matching based on VF serial numbers. This
> is
> > > the
> > > > > way
> > > > > > > > > specified by the protocol and more reliable.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Haiyang Zhang 
> > > > > > > > > Signed-off-by: K. Y. Srinivasan 
> > > > > > > > > ---
> > > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55
> > > > > > > > ---
> > > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > @@ -1165,9 +1165,10 @@ static void
> netvsc_free_netdev(struct
> > > > > > > > net_device *netdev)
> > > > > > > > >   free_netdev(netdev);
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > -static struct net_device *get_netvsc_bymac(const u8
> *mac)
> > > > > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > > > > >  {
> > > > > > > > >   struct net_device *dev;
> > > > > > > > > + struct net_device_context *ndev_ctx;
> > > > > > > > >
> > > > > > > > >   ASSERT_RTNL();
> > > > > > > > >
> > > > > > > > > @@ -1175,7 +1176,8 @@ static void
> netvsc_free_netdev(struct
> > > > > net_device
> > > > > > > > *netdev)
> > > > > > > > >   if (dev->netdev_ops != &device_ops)
> > > > > > > > >   continue;   /* not a netvsc device 
> > > > > > > > > */
> > > > > > > > >
> > > > > > > > > - if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > > > + ndev_ctx = netdev_priv(dev);
> > > > > > > > > + if (ndev_ctx->vf_serial == vfser)
> > > > > > > > >   return dev;
> > > > > > > > >   }
> > > > > > > > >
> > > > > > > > > @@ -1205,21 +1207,66 @@ static void
> > > netvsc_free_netdev(struct
> > > > > > > > net_device *netdev)
> > > > > > > > >   return NULL;
> > > > > > > > >  }
> > > > > > > > >
> > > > > > > > > +st

Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on serial numbers

2016-12-09 Thread Stephen Hemminger
On Fri, 9 Dec 2016 21:53:49 +
Haiyang Zhang  wrote:

> > -Original Message-
> > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > Sent: Friday, December 9, 2016 4:45 PM
> > To: Haiyang Zhang 
> > Cc: Greg KH ; KY Srinivasan
> > ; o...@aepfle.de; linux-kernel@vger.kernel.org;
> > bjorn.helg...@gmail.com; a...@canonical.com; de...@linuxdriverproject.org;
> > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > serial numbers
> > 
> > On Fri, 9 Dec 2016 21:31:25 +
> > Haiyang Zhang  wrote:
> >   
> > > > -Original Message-
> > > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > > Sent: Friday, December 9, 2016 3:30 PM
> > > > To: Haiyang Zhang 
> > > > Cc: Greg KH ; KY Srinivasan
> > > > ; o...@aepfle.de; linux-kernel@vger.kernel.org;
> > > > bjorn.helg...@gmail.com; a...@canonical.com;  
> > de...@linuxdriverproject.org;  
> > > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based on
> > > > serial numbers
> > > >
> > > > On Fri, 9 Dec 2016 20:09:49 +
> > > > Haiyang Zhang  wrote:
> > > >  
> > > > > > -Original Message-
> > > > > > From: Stephen Hemminger [mailto:step...@networkplumber.org]
> > > > > > Sent: Friday, December 9, 2016 1:21 PM
> > > > > > To: Greg KH 
> > > > > > Cc: KY Srinivasan ; o...@aepfle.de; Haiyang  
> > Zhang  
> > > > > > ; linux-kernel@vger.kernel.org;
> > > > > > bjorn.helg...@gmail.com; a...@canonical.com;  
> > > > de...@linuxdriverproject.org;  
> > > > > > leann.ogasaw...@canonical.com; jasow...@redhat.com
> > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching based  
> > on  
> > > > > > serial numbers
> > > > > >
> > > > > > On Fri, 9 Dec 2016 08:31:22 +0100
> > > > > > Greg KH  wrote:
> > > > > >  
> > > > > > > On Fri, Dec 09, 2016 at 12:05:53AM +, KY Srinivasan wrote:  
> > > > > > > >
> > > > > > > >  
> > > > > > > > > -Original Message-
> > > > > > > > > From: Greg KH [mailto:gre...@linuxfoundation.org]
> > > > > > > > > Sent: Thursday, December 8, 2016 7:56 AM
> > > > > > > > > To: KY Srinivasan 
> > > > > > > > > Cc: linux-kernel@vger.kernel.org;  
> > de...@linuxdriverproject.org;  
> > > > > > > > > o...@aepfle.de; a...@canonical.com; vkuzn...@redhat.com;
> > > > > > > > > jasow...@redhat.com; leann.ogasaw...@canonical.com;
> > > > > > > > > bjorn.helg...@gmail.com; Haiyang Zhang  
> >   
> > > > > > > > > Subject: Re: [PATCH 3/3] hv_netvsc: Implement VF matching  
> > > > based on  
> > > > > > serial  
> > > > > > > > > numbers
> > > > > > > > >
> > > > > > > > > On Thu, Dec 08, 2016 at 12:33:43AM -0800,  
> > > > > > k...@exchange.microsoft.com  
> > > > > > > > > wrote:  
> > > > > > > > > > From: Haiyang Zhang 
> > > > > > > > > >
> > > > > > > > > > We currently use MAC address to match VF and synthetic  
> > NICs.  
> > > > > > Hyper-V  
> > > > > > > > > > provides a serial number to both devices for this  
> > purpose.  
> > > > This  
> > > > > > patch  
> > > > > > > > > > implements the matching based on VF serial numbers. This  
> > is  
> > > > the  
> > > > > > way  
> > > > > > > > > > specified by the protocol and more reliable.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Haiyang Zhang 
> > > > > > > > > > Signed-off-by: K. Y. Srinivasan 
> > > > > > > > > > ---
> > > > > > > > > >  drivers/net/hyperv/netvsc_drv.c |   55  
> > > > > > > > > ---  
> > > > > > > > > >  1 files changed, 51 insertions(+), 4 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/hyperv/netvsc_drv.c  
> > > > > > > > > b/drivers/net/hyperv/netvsc_drv.c  
> > > > > > > > > > index 9522763..c5778cf 100644
> > > > > > > > > > --- a/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > +++ b/drivers/net/hyperv/netvsc_drv.c
> > > > > > > > > > @@ -1165,9 +1165,10 @@ static void  
> > netvsc_free_netdev(struct  
> > > > > > > > > net_device *netdev)  
> > > > > > > > > > free_netdev(netdev);
> > > > > > > > > >  }
> > > > > > > > > >
> > > > > > > > > > -static struct net_device *get_netvsc_bymac(const u8  
> > *mac)  
> > > > > > > > > > +static struct net_device *get_netvsc_byvfser(u32 vfser)
> > > > > > > > > >  {
> > > > > > > > > > struct net_device *dev;
> > > > > > > > > > +   struct net_device_context *ndev_ctx;
> > > > > > > > > >
> > > > > > > > > > ASSERT_RTNL();
> > > > > > > > > >
> > > > > > > > > > @@ -1175,7 +1176,8 @@ static void  
> > netvsc_free_netdev(struct  
> > > > > > net_device  
> > > > > > > > > *netdev)  
> > > > > > > > > > if (dev->netdev_ops != &device_ops)
> > > > > > > > > > continue;   /* not a netvsc device 
> > > > > > > > > > */
> > > > > > > > > >
> > > > > > > > > > -   if (ether_addr_equal(mac, dev->perm_addr))
> > > > > > > > > > +   ndev_ctx = netdev_priv(dev);
> > > > > 

Re: md: Combine two kmalloc() calls into one in sb_equal()

2016-12-09 Thread Bernd Schubert



On 09.12.2016 22:58, SF Markus Elfring wrote:

Irrelevant, the variable is not used before checking it.


* Will it be more appropriate to attempt another memory allocation only if
  the previous one succeeded already?

* Can it be a bit more efficient to duplicate only the required data
  in a single function call before?


How many memory allocations do you expect to fail?



Re: [RFC 00/10] implement alternative and much simpler id allocator

2016-12-09 Thread Andrew Morton
On Thu,  8 Dec 2016 02:22:55 +0100 Rasmus Villemoes  
wrote:

> TL;DR: these patches save 250 KB of memory, with more low-hanging
> fruit ready to pick.
> 
> While browsing through the lib/idr.c code, I noticed that the code at
> the end of ida_get_new_above() probably doesn't work as intended: Most
> users of ida use it via ida_simple_get(), and that starts by
> unconditionally calling ida_pre_get(), ensuring that ida->idr has
> 8==MAX_IDR_FREE idr_layers in its free list id_free. In the common
> case, none (or at most one) of these get used during
> ida_get_new_above(), and we only free one, leaving at least 6 (usually
> 7) idr_layers in the free list.

Please be aware of

http://ozlabs.org/~akpm/mmots/broken-out/reimplement-idr-and-ida-using-the-radix-tree.patch
http://lkml.kernel.org/r/1480369871-5271-68-git-send-email-mawil...@linuxonhyperv.com

I expect we'll be merging patches 1-32 of that series into 4.10-rc1 and
the above patch (#33) into 4.11-rc1.


Re: [PATCH v6 1/2] sparc: fix a building error reported by kbuild

2016-12-09 Thread Sam Ravnborg
Hi Gonglei.

On Thu, Dec 08, 2016 at 12:37:08PM +0800, Gonglei wrote:
> >> arch/sparc/include/asm/topology_64.h:44:44:
> error: implicit declaration of function 'cpu_data'
> [-Werror=implicit-function-declaration]
> 
>  #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id)
>^
> Let's include cpudata.h in topology_64.h.
> 
> Cc: Sam Ravnborg 
> Cc: David S. Miller 
> Cc: sparcli...@vger.kernel.org
> Suggested-by: Sam Ravnborg 
> Signed-off-by: Gonglei 
Acked-by: Sam Ravnborg 

> ---
>  arch/sparc/include/asm/topology_64.h | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/sparc/include/asm/topology_64.h 
> b/arch/sparc/include/asm/topology_64.h
> index 7b4898a..2255430 100644
> --- a/arch/sparc/include/asm/topology_64.h
> +++ b/arch/sparc/include/asm/topology_64.h
> @@ -4,6 +4,7 @@
>  #ifdef CONFIG_NUMA
>  
>  #include 
> +#include 

Nitpick - if you are going to resend this patch, then please
order the two includes in alphabetic order.

For two includes this looks like bikeshedding, but when we add
more having them in a defined arder prevents merge conflicts.
And makes it readable too.

We also sometimes order the includes with the longest lines topmost,
and lines with the ame length are ordered alphabetically.
But this is not seen so often.

Sam


Re: md: Combine two kmalloc() calls into one in sb_equal()

2016-12-09 Thread SF Markus Elfring
> Irrelevant, the variable is not used before checking it.

* Will it be more appropriate to attempt another memory allocation only if
  the previous one succeeded already?

* Can it be a bit more efficient to duplicate only the required data
  in a single function call before?

Regards,
Markus


Re: [PATCH] md: Combine two kmalloc() calls into one in sb_equal()

2016-12-09 Thread Joe Perches
On Fri, 2016-12-09 at 21:30 +, Al Viro wrote:
> On Fri, Dec 09, 2016 at 11:05:14AM -0800, Joe Perches wrote:
> > On Fri, 2016-12-09 at 19:30 +0100, SF Markus Elfring wrote:
> > > From: Markus Elfring 
> > > Date: Fri, 9 Dec 2016 19:09:13 +0100
> > > 
> > > The function "kmalloc" was called in one case by the function "sb_equal"
> > > without checking immediately if it failed.
> > > This issue was detected by using the Coccinelle software.
> > > 
> > > Perform the desired memory allocation (and release at the end)
> > > by a single function call instead.
> > > 
> > > Fixes: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 ("Linux-2.6.12-rc2")
> > 
> > Making a change does not mean fixes.
> > 
> > There's nothing particularly _wrong_ with the code as-is.
> > 
> > 2 kmemdup calls might make the code more obvious.
> > 
> > There's a small optimization possible in that only the
> > first MB_SB_GENERIC_CONSTANT_WORDS of the struct are
> > actually compared.  Alloc and copy of both entire structs
> > is inefficient and unnecessary.
> > 
> > Perhaps something like the below would be marginally
> > better/faster, but the whole thing is dubious.
> > 
> > static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
> > {
> > int ret;
> > void *tmp1, *tmp2;
> > 
> > tmp1 = kmemdup(sb1, MD_SB_GENERIC_CONSTANT_WORDS * sizeof(__u32), 
> > GFP_KERNEL);
> > tmp2 = kmemdup(sb2, MD_SB_GENERIC_CONSTANT_WORDS * sizeof(__u32), 
> > GFP_KERNEL);
> > 
> > if (!tmp1 || !tmp2) {
> > ret = 0;
> > goto out;
> > }
> > 
> > /*
> >  * nr_disks is not constant
> >  */
> > ((mdp_super_t *)tmp1)->nr_disks = 0;
> > ((mdp_super_t *)tmp2)->nr_disks = 0;
> > 
> > ret = memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * sizeof(__u32)) 
> > == 0;
> > 
> > out:
> > kfree(tmp1);
> > kfree(tmp2);
> > return ret;
> > }
> 
> May I politely inquire if either of you has actually bothered to read the
> code and figure out what it does?  This is grotesque...
> 
> For really slow: we have two objects.  We want to check if anything in the
> 128-byte chunks in their beginnings other than one 32bit field happens to be
> different.  For that we
>   * allocate two 128-byte pieces of memory
>   * *copy* our objects into those
>   * forcibly zero the field in question in both of those copies
>   * compare the fuckers
>   * free them
> 
> And you two are discussing whether it's better to combine allocations of those
> copies into a single 256-byte allocation?  Really?

No.  May I suggest you read my suggestion?
At no point did I suggest a single allocation.

I think the single allocation is silly and just
makes the code harder to read.

>   _IF_ it is a hot path,
> the obvious optimization would be to avoid copying that crap in the first
> place - simply by
>   return memcmp(sb1, sb2, offsetof(mdp_super_t, nr_disks)) ||
>  memcmp(&sb1->nr_disks + 1, &sb2->nr_disks + 1,
>   MD_SB_GENERIC_CONSTANT_WORDS * sizeof(__u32) -
>   offsetof(mdp_super_t, nr_disks) - 4);

That's all true, but Markus has enough trouble reading simple
code without trying to explain to him what offsetof does.

btw:  the "- 4" should be " - sizeof(__u32)" just for consistency
with the line above it.

> If it is _not_ a hot path, why bother with it at all?

exactly.



[PATCH] net: mlx5: Fix Kconfig help text

2016-12-09 Thread Christopher Covington
Since the following commit, Infiniband and Ethernet have not been
mutually exclusive.

Fixes: 4aa17b28 mlx5: Enable mutual support for IB and Ethernet

Signed-off-by: Christopher Covington 
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig 
b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index aae4688..521cfdb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -18,8 +18,6 @@ config MLX5_CORE_EN
default n
---help---
  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
- Ethernet and Infiniband support in ConnectX-4 are currently mutually
- exclusive.
 
 config MLX5_CORE_EN_DCB
bool "Data Center Bridging (DCB) Support"
-- 
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm
Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora
Forum, a Linux Foundation Collaborative Project.



Re: [PATCH v2 2/2] staging: iio: ad7606: move out of staging

2016-12-09 Thread kbuild test robot
Hi Eva,

[auto build test WARNING on iio/togreg]
[also build test WARNING on next-20161209]
[cannot apply to v4.9-rc8]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Eva-Rachel-Retuya/staging-iio-ad7606-move-driver-out-of-staging/20161210-041408
base:   https://git.kernel.org/pub/scm/linux/kernel/git/jic23/iio.git togreg
config: blackfin-allmodconfig (attached as .config)
compiler: bfin-uclinux-gcc (GCC) 6.2.0
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=blackfin 

All warnings (new ones prefixed by >>):

   In file included from ./arch/blackfin/include/generated/asm/div64.h:1:0,
from include/linux/kernel.h:142,
from include/linux/interrupt.h:5,
from drivers/iio/adc/ad7606.c:9:
   drivers/iio/adc/ad7606.c: In function 'ad7606_probe':
   include/asm-generic/div64.h:207:28: warning: comparison of distinct pointer 
types lacks a cast
 (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \
   ^
>> drivers/iio/adc/ad7606.c:440:27: note: in expansion of macro 'do_div'
  st->scale_avail[i][1] = do_div(scale, 1) * 10;
  ^~
   In file included from include/linux/linkage.h:4:0,
from include/linux/kernel.h:6,
from include/linux/interrupt.h:5,
from drivers/iio/adc/ad7606.c:9:
   include/asm-generic/div64.h:220:25: warning: right shift count >= width of 
type [-Wshift-count-overflow]
 } else if (likely(((n) >> 32) == 0)) {  \
^
   include/linux/compiler.h:167:40: note: in definition of macro 'likely'
# define likely(x) __builtin_expect(!!(x), 1)
   ^
>> drivers/iio/adc/ad7606.c:440:27: note: in expansion of macro 'do_div'
  st->scale_avail[i][1] = do_div(scale, 1) * 10;
  ^~
   In file included from ./arch/blackfin/include/generated/asm/div64.h:1:0,
from include/linux/kernel.h:142,
from include/linux/interrupt.h:5,
from drivers/iio/adc/ad7606.c:9:
   include/asm-generic/div64.h:224:22: error: passing argument 1 of 
'__div64_32' from incompatible pointer type [-Werror=incompatible-pointer-types]
  __rem = __div64_32(&(n), __base); \
 ^
>> drivers/iio/adc/ad7606.c:440:27: note: in expansion of macro 'do_div'
  st->scale_avail[i][1] = do_div(scale, 1) * 10;
  ^~
   include/asm-generic/div64.h:198:17: note: expected 'uint64_t * {aka long 
long unsigned int *}' but argument is of type 'unsigned int *'
extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
^~
   cc1: some warnings being treated as errors

vim +/do_div +440 drivers/iio/adc/ad7606.c

b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
424  
e61181d0 drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-05-18  
425   st = iio_priv(indio_dev);
e61181d0 drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-05-18  
426  
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
427   st->dev = dev;
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
428   st->bops = bops;
b9618c0c drivers/staging/iio/adc/ad7606_core.c Michael Hennerich  2011-02-22  
429   st->base_address = base_address;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
430   /* tied to logic low, analog input range is +/- 5V */
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
431   st->range = 0;
e79e8027 drivers/staging/iio/adc/ad7606_core.c Lars-Peter Clausen 2016-10-19  
432   st->oversampling = 1;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
433   /* Populate the scales, 2.5/2**16 then 5/2**16 */
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
434   range = 5000;
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
435   for (i = 0, j = 1; i < ARRAY_SIZE(st->scale_avail); i++, j--) {
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
436   scale = ((u64)range * 1) >>
c22bfdb9 drivers/staging/iio/adc/ad7606.c  Eva Rachel Retuya  2016-12-09  
437   ad7606_channels[1].scan_type.realbits;
c22bfdb9 drivers/staging/iio/adc/

[PATCH v2] drivers: Update drv260x driver

2016-12-09 Thread Jingkui Wang
Update driver drv260x to use generic device properties
Remove platform data and corresponding header file

Signed-off-by: Jingkui Wang 
---
Changes in v2:
- Delete unused header file

 drivers/input/misc/drv260x.c| 43 ++---
 include/linux/platform_data/drv260x-pdata.h | 28 ---
 2 files changed, 8 insertions(+), 63 deletions(-)
 delete mode 100644 include/linux/platform_data/drv260x-pdata.h

diff --git a/drivers/input/misc/drv260x.c b/drivers/input/misc/drv260x.c
index 2adfd86c..4f448ba 100644
--- a/drivers/input/misc/drv260x.c
+++ b/drivers/input/misc/drv260x.c
@@ -19,7 +19,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -27,7 +26,6 @@
 #include 
 
 #include 
-#include 
 
 #define DRV260X_STATUS 0x0
 #define DRV260X_MODE   0x1
@@ -468,52 +466,39 @@ static const struct regmap_config drv260x_regmap_config = 
{
.cache_type = REGCACHE_NONE,
 };
 
-#ifdef CONFIG_OF
-static int drv260x_parse_dt(struct device *dev,
+static int drv260x_read_device_property(struct device *dev,
struct drv260x_data *haptics)
 {
-   struct device_node *np = dev->of_node;
unsigned int voltage;
int error;
 
-   error = of_property_read_u32(np, "mode", &haptics->mode);
+   error = device_property_read_u32(dev, "mode", &haptics->mode);
if (error) {
dev_err(dev, "%s: No entry for mode\n", __func__);
return error;
}
 
-   error = of_property_read_u32(np, "library-sel", &haptics->library);
+   error = device_property_read_u32(dev, "library-sel", &haptics->library);
if (error) {
dev_err(dev, "%s: No entry for library selection\n",
__func__);
return error;
}
 
-   error = of_property_read_u32(np, "vib-rated-mv", &voltage);
+   error = device_property_read_u32(dev, "vib-rated-mv", &voltage);
if (!error)
haptics->rated_voltage = drv260x_calculate_voltage(voltage);
 
-
-   error = of_property_read_u32(np, "vib-overdrive-mv", &voltage);
+   error = device_property_read_u32(dev, "vib-overdrive-mv", &voltage);
if (!error)
haptics->overdrive_voltage = drv260x_calculate_voltage(voltage);
 
return 0;
 }
-#else
-static inline int drv260x_parse_dt(struct device *dev,
-  struct drv260x_data *haptics)
-{
-   dev_err(dev, "no platform data defined\n");
-
-   return -EINVAL;
-}
-#endif
 
 static int drv260x_probe(struct i2c_client *client,
 const struct i2c_device_id *id)
 {
-   const struct drv260x_platform_data *pdata = 
dev_get_platdata(&client->dev);
struct drv260x_data *haptics;
int error;
 
@@ -524,21 +509,9 @@ static int drv260x_probe(struct i2c_client *client,
haptics->rated_voltage = DRV260X_DEF_OD_CLAMP_VOLT;
haptics->rated_voltage = DRV260X_DEF_RATED_VOLT;
 
-   if (pdata) {
-   haptics->mode = pdata->mode;
-   haptics->library = pdata->library_selection;
-   if (pdata->vib_overdrive_voltage)
-   haptics->overdrive_voltage = 
drv260x_calculate_voltage(pdata->vib_overdrive_voltage);
-   if (pdata->vib_rated_voltage)
-   haptics->rated_voltage = 
drv260x_calculate_voltage(pdata->vib_rated_voltage);
-   } else if (client->dev.of_node) {
-   error = drv260x_parse_dt(&client->dev, haptics);
-   if (error)
-   return error;
-   } else {
-   dev_err(&client->dev, "Platform data not set\n");
-   return -ENODEV;
-   }
+   error = drv260x_read_device_property(&client->dev, haptics);
+   if (error)
+   return error;
 
 
if (haptics->mode < DRV260X_LRA_MODE ||
diff --git a/include/linux/platform_data/drv260x-pdata.h 
b/include/linux/platform_data/drv260x-pdata.h
deleted file mode 100644
index 0a03b09..000
--- a/include/linux/platform_data/drv260x-pdata.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Platform data for DRV260X haptics driver family
- *
- * Author: Dan Murphy 
- *
- * Copyright:   (C) 2014 Texas Instruments, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-
-#ifndef _LINUX_DRV260X_PDATA_H
-#define _LINUX_DRV260X_PDATA_H
-
-struct drv260x_platform_data {
-   u32 library_selection;
-   u32 mode;
-   u32 vib_rated_voltage;
-   u32 vib_overdrive_voltage;
-};
-
-#endif
-- 
2.

  1   2   3   4   5   6   7   >