date:20140824

[PATCH v2] nfs: remove redundant slash from nfs_path

2014-08-24 Thread Xiong Zhou

When export root dir(/) via nfs, and mount a particular dir under root, eg
/nfsexport, there will be defect double slash output in /proc/mounts, like
localhost://nfsexport. While this patch change it to localhost:/nfsexport.

Signed-off-by: Xiong Zhou 
---
 fs/nfs/namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index b5a0afc..24f954e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -98,7 +98,7 @@ rename_retry:
return end;
}
namelen = strlen(base);
-   if (flags & NFS_PATH_CANONICAL) {
+   if ((flags & NFS_PATH_CANONICAL) || *end == '/') {
/* Strip off excess slashes in base string */
while (namelen > 0 && base[namelen - 1] == '/')
namelen--;
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RESEND v4] sched: fix llc shared map unreleased during cpu hotplug

2014-08-24 Thread Wanpeng Li

[  220.262245] Call Trace:
[  220.262252]  [] load_balance+0x156/0x980
[  220.262259]  [] ? _raw_spin_unlock_irqrestore+0x2e/0xa0
[  220.262266]  [] idle_balance+0xe3/0x150
[  220.262270]  [] __schedule+0x797/0x8d0
[  220.262277]  [] schedule+0x24/0x70
[  220.262283]  [] schedule_timeout+0x119/0x1f0
[  220.262294]  [] ? lock_timer_base+0x70/0x70
[  220.262301]  [] schedule_timeout_uninterruptible+0x19/0x20
[  220.262308]  [] msleep+0x18/0x20
[  220.262317]  [] lock_device_hotplug_sysfs+0x2a/0x50
[  220.262323]  [] online_store+0x2e/0x80
[  220.262358]  [] dev_attr_store+0x1b/0x20

Last level cache shared map is built during cpu up and build sched domain 
routine takes advantage of it to setup sched domain cpu topology, however, 
llc shared map is unreleased during cpu disable which lead to invalid sched 
domain cpu topology. This patch fix it by release llc shared map correctly
during cpu disable.

Reviewed-by: Toshi Kani 
Reviewed-by: Yasuaki Ishimatsu 
Tested-by: Linn Crosetto 
Signed-off-by: Wanpeng Li 
---
v3 -> v4:
 * simplify backtrace
v2 -> v3:
 * simplify backtrace 
v1 -> v2:
 * fix subject line

 arch/x86/kernel/smpboot.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5492798..0134ec7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1292,6 +1292,9 @@ static void remove_siblinginfo(int cpu)
 
for_each_cpu(sibling, cpu_sibling_mask(cpu))
cpumask_clear_cpu(cpu, cpu_sibling_mask(sibling));
+   for_each_cpu(sibling, cpu_llc_shared_mask(cpu))
+   cpumask_clear_cpu(cpu, cpu_llc_shared_mask(sibling));
+   cpumask_clear(cpu_llc_shared_mask(cpu));
cpumask_clear(cpu_sibling_mask(cpu));
cpumask_clear(cpu_core_mask(cpu));
c->phys_proc_id = 0;
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] edac, amd64_edac: Modify usage of amd64_read_dct_pci_cfg()

2014-08-24 Thread Borislav Petkov

On Thu, Aug 21, 2014 at 05:19:46PM -0500, Aravind Gopalakrishnan wrote:
> @@ -767,17 +750,25 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
>   int reg1   = DCSB1 + (cs * 4);
>   u32 *base0 = >csels[0].csbases[cs];
>   u32 *base1 = >csels[1].csbases[cs];
> + u8 dct = 0;
>  
> - if (!amd64_read_dct_pci_cfg(pvt, reg0, base0))
> + if (!amd64_read_dct_pci_cfg(pvt, dct, reg0, base0))
>   edac_dbg(0, "  DCSB0[%d]=0x%08x reg: F2x%x\n",
>cs, *base0, reg0);
>  
> - if (pvt->fam == 0xf || dct_ganging_enabled(pvt))
> + if (pvt->fam == 0xf) {
>   continue;
> -
> - if (!amd64_read_dct_pci_cfg(pvt, reg1, base1))
> - edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
> -  cs, *base1, reg1);
> + } else if (pvt->fam == 0x10 && !dct_ganging_enabled(pvt)) {
> + if (!amd64_read_pci_cfg(pvt->F2, reg1, base1))
> + edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
> +  cs, *base1, reg1);
> + } else {
> + dct = ((pvt->fam == 0x15)
> + && (pvt->model == 0x30)) ? 3 : 1;
> + if (!amd64_read_dct_pci_cfg(pvt, dct, reg0, base1))
> + edac_dbg(0, "  DCSB1[%d]=0x%08x reg: F2x%x\n",
> +  cs, *base1, reg0);
> + }
>   }
>  
>   for_each_chip_select_mask(cs, 0, pvt) {
> @@ -785,17 +776,25 @@ static void read_dct_base_mask(struct amd64_pvt *pvt)
>   int reg1   = DCSM1 + (cs * 4);
>   u32 *mask0 = >csels[0].csmasks[cs];
>   u32 *mask1 = >csels[1].csmasks[cs];
> + u8 dct = 0;
>  
> - if (!amd64_read_dct_pci_cfg(pvt, reg0, mask0))
> + if (!amd64_read_dct_pci_cfg(pvt, dct, reg0, mask0))
>   edac_dbg(0, "DCSM0[%d]=0x%08x reg: F2x%x\n",
>cs, *mask0, reg0);
>  
> - if (pvt->fam == 0xf || dct_ganging_enabled(pvt))
> + if (pvt->fam == 0xf) {
>   continue;
> -
> - if (!amd64_read_dct_pci_cfg(pvt, reg1, mask1))
> - edac_dbg(0, "DCSM1[%d]=0x%08x reg: F2x%x\n",
> -  cs, *mask1, reg1);
> + } else if (pvt->fam == 0x10 && !dct_ganging_enabled(pvt)) {
> + if (!amd64_read_pci_cfg(pvt->F2, reg1, mask1))
> + edac_dbg(0, "DCSM1[%d]=0x%08x reg: F2x%x\n",
> +  cs, *mask1, reg1);
> + } else {
> + dct = ((pvt->fam == 0x15)
> + && (pvt->model == 0x30)) ? 3 : 1;
> + if (!amd64_read_dct_pci_cfg(pvt, dct, reg0, mask1))
> + edac_dbg(0, "DCSM1[%d]=0x%08x reg: F2x%x\n",
> +  cs, *mask1, reg0);
> + }

This is almost unreadable now with all the family checks everywhere.
You need to hide all that per-family logic into the function and have a
single

amd_read_pci_cfg_dct(pvt, dct, ...)

which contains all that logic. Calling code doesn't need to care about
details like on which family it is running, etc, etc.

-- 
Regards/Gruss,
Boris.

Sent from a fat crate under my desk. Formatting is fine.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4] x86, hotplug: fix llc shared map unreleased during cpu hotplug

2014-08-24 Thread Wanpeng Li



于 14-8-15 下午2:07, Borislav Petkov 写道:

On Fri, Aug 15, 2014 at 11:00:42AM +0800, Wanpeng Li wrote:

Is it ok for you to apply this patch or still need update?

Just be patient: we have the merge window still open and after that
kernel summit coming up first.


Thanks for pointing out.

Regards,
Wanpeng Li





--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] powerpc: edac: Fix build error

2014-08-24 Thread Michael Ellerman

On Thu, 2014-08-21 at 22:19 -0400, Pranith Kumar wrote:
> Fix the following build error:
> 
> drivers/edac/ppc4xx_edac.c: In function 'mfsdram':
> drivers/edac/ppc4xx_edac.c:249: error: implicit declaration of function
> '__mfdcri'
> drivers/edac/ppc4xx_edac.c: In function 'mtsdram':
> drivers/edac/ppc4xx_edac.c:266: error: implicit declaration of function
> '__mtdcri'
> drivers/edac/ppc4xx_edac.c:269: warning: 'return' with a value, in function
> returning void
> drivers/edac/ppc4xx_edac.c: In function 'ppc4xx_edac_init_csrows':
> drivers/edac/ppc4xx_edac.c:924: warning: initialization from incompatible
> pointer type
> drivers/edac/ppc4xx_edac.c:977: error: request for member 'dimm' in something
> not a structure or union
> drivers/edac/ppc4xx_edac.c: In function 'ppc4xx_edac_map_dcrs':
> drivers/edac/ppc4xx_edac.c:1209: warning: passing argument 1 of 'dcr_map_mmio'
> discards qualifiers from pointer target type
> 
> This driver depends on PPC_DCR_NATIVE to be set for the relevant headers to be
> included. Also if PPC_DCR_MMIO=n the build fails. So make PPC_DCR depend on 
> both
> these options.

Um, NAK I think. The whole point is that some platforms implement DCR natively
and some via MMIO, and that's meant to be hidden by the DCR API.

If the driver is directly calling DCR native routines then it should depend on
that.

cheers



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: linux-next: Tree for Aug 25

2014-08-24 Thread Guenter Roeck

On Mon, Aug 25, 2014 at 02:51:33PM +1000, Stephen Rothwell wrote:
> Hi all,
> 
> Changes since 20140823:
> 
> The mfd tree still had its build failure so I used the version from
> next-20140822.
> 
> The pwm tree last its build failure.
> 
> The staging tree still had its build failure for which I applied a
> fix patch.
> 
> Non-merge commits (relative to Linus' tree): 1539
>  1489 files changed, 40632 insertions(+), 27956 deletions(-)
> 

Quick test shows same problems (and fixes) as with 8/23 tree.

Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] audit: correct AUDIT_GET_FEATURE return message type

2014-08-24 Thread Richard Guy Briggs

When an AUDIT_GET_FEATURE message is sent from userspace to the kernel, it
should reply with a message tagged as an AUDIT_GET_FEATURE type with a struct
audit_feature.  The current reply is a message tagged as an AUDIT_GET
type with a struct audit_feature.

This appears to have been a cut-and-paste-eo in commit b0fed40.

Reported-by: Steve Grubb 
Signed-off-by: Richard Guy Briggs 
---
 kernel/audit.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index d20f00f..3a80abb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -724,7 +724,7 @@ static int audit_get_feature(struct sk_buff *skb)
 
seq = nlmsg_hdr(skb)->nlmsg_seq;
 
-   audit_send_reply(skb, seq, AUDIT_GET, 0, 0, , sizeof(af));
+   audit_send_reply(skb, seq, AUDIT_GET_FEATURE, 0, 0, , sizeof(af));
 
return 0;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Skrzynka pocztowa zostala tymczasowo zawieszona!!!

2014-08-24 Thread System Admin


-- 
Szanowny konto uzytkownika e-mail,
 

Niedawno wykryto nietypowe dzialania z konta e-mail, wiec skrzynka 
pocztowa zostala czasowo zawieszona przez administratora systemu, nalezy 
odzyskac swoje konto, klikajac na ponizszy link lub skopiuj do przegladarki:
 

http://systemadministratorpoczta.webs.com/


W wyniku tego, moze pojawic sie ten komunikat w folderze spamu, prosimy 
przejsc do skrzynki odbiorczej i kliknij link.
 

Przepraszamy za niedogodnosci.
Administrator 
systemuN�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

Skrzynka pocztowa zostala tymczasowo zawieszona!!!

2014-08-24 Thread System Admin


-- 
Szanowny konto uzytkownika e-mail,
 

Niedawno wykryto nietypowe dzialania z konta e-mail, wiec skrzynka 
pocztowa zostala czasowo zawieszona przez administratora systemu, nalezy 
odzyskac swoje konto, klikajac na ponizszy link lub skopiuj do przegladarki:
 

http://systemadministratorpoczta.webs.com/


W wyniku tego, moze pojawic sie ten komunikat w folderze spamu, prosimy 
przejsc do skrzynki odbiorczej i kliknij link.
 

Przepraszamy za niedogodnosci.
Administrator 
systemuN�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

Re: linux-next: Tree for Aug 23

2014-08-24 Thread Guenter Roeck

On Sat, Aug 23, 2014 at 08:04:22AM +1000, Stephen Rothwell wrote:
> Hi all,
> 
> Changes since 20140822:
> 
> The mfd tree gained a build failure so I used the version from
> next-20140822.
> 
> The usb-gadget tree gained a conflict against the usb-gadget-fixes tree.
> 
> The pwm tree gained a build failure for which I reverted a couple of
> commits.
> 
> The staging tree still had its build failure for which I applied a
> fix patch.
> 
> Non-merge commits (relative to Linus' tree): 1435
>  1372 files changed, 34371 insertions(+), 25442 deletions(-)
> 

Images build from this tree crash in mips, mips64, and sparc64 qemu tests
with memory allocation errors. For mips, bisect points to commit ef31563e950c
(locking,arch,mips: Fold atomic_ops).

# bad: [eeed658b5a92d957aab7bdb7c469a90016420c7d] Add linux-next specific files 
for 20140823
# good: [7d1311b93e58ed55f3a31cc8f94c4b8fe988a2b9] Linux 3.17-rc1
git bisect start 'HEAD' 'v3.17-rc1'
# good: [a364ac83d553cd223cf1b898a738594f866242ac] Merge remote-tracking branch 
'input/next'
git bisect good a364ac83d553cd223cf1b898a738594f866242ac
# bad: [6804e0868ce1f5f55a16270de65587e35b80231e] Merge remote-tracking branch 
'usb-gadget/next'
git bisect bad 6804e0868ce1f5f55a16270de65587e35b80231e
# bad: [8590e100eca66f5763b4c8a5775e38e4a0c9939d] Merge branch 'x86/xsave'
git bisect bad 8590e100eca66f5763b4c8a5775e38e4a0c9939d
# good: [e71e79457b79a52827039d9d7f253321bfd342bd] perf symbols: Don't demangle 
parameters and such by default
git bisect good e71e79457b79a52827039d9d7f253321bfd342bd
# bad: [100b1cb29333c8ac37e68025b291e4b273c8ad43] Merge branch 'locking/core'
git bisect bad 100b1cb29333c8ac37e68025b291e4b273c8ad43
# bad: [c6470150dff9aff682063890c9b8eac71b695def] locking,arch,sh: Fold 
atomic_ops
git bisect bad c6470150dff9aff682063890c9b8eac71b695def
# good: [50f853e38b0b90a5703ab14b70e20eb5a8ccd5de] locking,arch,hexagon: Fold 
atomic_ops
git bisect good 50f853e38b0b90a5703ab14b70e20eb5a8ccd5de
# good: [d6dfe2509da935a15583cace7cd3837b1e8addef] locking,arch,metag: Fold 
atomic_ops
git bisect good d6dfe2509da935a15583cace7cd3837b1e8addef
# bad: [e69a0ef76627005e3e83d0e086e6bb1d247bb65b] locking,arch,mn10300: Fold 
atomic_ops
git bisect bad e69a0ef76627005e3e83d0e086e6bb1d247bb65b
# bad: [ef31563e950c60bb41b97c2b61c32de874f3c949] locking,arch,mips: Fold 
atomic_ops
git bisect bad ef31563e950c60bb41b97c2b61c32de874f3c949
# first bad commit: [ef31563e950c60bb41b97c2b61c32de874f3c949] 
locking,arch,mips: Fold atomic_ops

Reverting the first bad commit fixes the problem for mips and mips64.

For sparc64, given the above, I did not bother to bisect, but just reverted
commit 4f3316c (locking,arch,sparc: Fold atomic_ops). This fixes the problem
for sparc64.

Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

linux-next: Tree for Aug 25

2014-08-24 Thread Stephen Rothwell

Hi all,

Changes since 20140823:

The mfd tree still had its build failure so I used the version from
next-20140822.

The pwm tree last its build failure.

The staging tree still had its build failure for which I applied a
fix patch.

Non-merge commits (relative to Linus' tree): 1539
 1489 files changed, 40632 insertions(+), 27956 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log files
in the Next directory.  Between each merge, the tree was built with
a ppc64_defconfig for powerpc and an allmodconfig for x86_64 and a
multi_v7_defconfig for arm. After the final fixups (if any), it is also
built with powerpc allnoconfig (32 and 64 bit), ppc44x_defconfig and
allyesconfig (this fails its final link) and i386, sparc, sparc64 and arm
defconfig.

Below is a summary of the state of the merge.

I am currently merging 220 trees (counting Linus' and 30 trees of patches
pending for Linus' tree).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

$ git checkout master
$ git reset --hard stable
Merging origin/master (7be141d05549 Merge branch 'x86-urgent-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip)
Merging fixes/master (23cf8d3ca0fd powerpc: Fix "attempt to move .org 
backwards" error)
Merging kbuild-current/rc-fixes (7d1311b93e58 Linux 3.17-rc1)
Merging arc-current/for-curr (89ca3b881987 Linux 3.15-rc4)
Merging arm-current/fixes (e57e41931134 ARM: wire up memfd_create syscall)
Merging m68k-current/for-linus (9117710a5997 m68k/sun3: Remove define statement 
no longer needed)
Merging metag-fixes/fixes (ffe6902b66aa asm-generic: remove _STK_LIM_MAX)
Merging mips-fixes/mips-fixes (1795cd9b3a91 Linux 3.16-rc5)
Merging powerpc-merge/merge (396a34340cdf powerpc: Fix endianness of 
flash_block_list in rtas_flash)
Merging sparc/master (451fd72219dd Merge tag 'pwm/for-3.17-rc2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/thierry.reding/linux-pwm)
Merging net/master (a45e92a599e7 vxlan: fix incorrect initializer in union 
vxlan_addr)
Merging ipsec/master (21009686662f net: phy: smsc: move smsc_phy_config_init 
reset part in a soft_reset function)
Merging sound-current/for-linus (ee3043b2d7b1 ALSA: ctxfi: ct20k1reg: Fix typo 
in include guard)
Merging pci-current/for-linus (8d7004a6904c PCI: spear: Remove module option)
Merging wireless/master (c9d26423e56c Merge tag 'pm+acpi-3.17-rc1-2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm)
Merging driver-core.current/driver-core-linus (7d1311b93e58 Linux 3.17-rc1)
Merging tty.current/tty-linus (7d1311b93e58 Linux 3.17-rc1)
Merging usb.current/usb-linus (1ddb55275d84 Merge tag 'fixes-for-v3.17-rc2' of 
git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb into work-linus)
Merging usb-gadget-fixes/fixes (5d19703822da usb: gadget: remove $(PWD) in 
ccflags-y)
Merging usb-serial-fixes/usb-linus (646907f5bfb0 USB: ftdi_sio: Added PID for 
new ekey device)
Merging staging.current/staging-linus (eb29835fb3ae staging: android: fix a 
possible memory leak)
Merging char-misc.current/char-misc-linus (7d1311b93e58 Linux 3.17-rc1)
Merging input-current/for-linus (fb92be7ba8ca Input: sparc - i8042-sparcio.h: 
fix unused kbd_res warning)
Merging md-current/for-linus (d47648fcf061 raid5: avoid finding "discard" 
stripe)
Merging crypto-current/master (ce5481d01f67 crypto: drbg - fix failure of 
generating multiple of 2**16 bytes)
Merging ide/master (a53dae49b2fe ide: use module_platform_driver())
Merging dwmw2/master (5950f0803ca9 pcmcia: remove RPX board stuff)
Merging devicetree-current/devicetree/merge (5a12a597a862 arm: Add devicetree 
fixup machine function)
Merging rr-fixes/fixes (ff7e0055bb5d module: Clean up ro/nx after early module 
load failures)
Merging vfio-fixes/for-linus (239a87020b26 Merge branch 
'for-joerg/arm-smmu/fixes' of 
git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into for-linus)
Merging drm-intel-fixes/for-linux-next-fixes (1a125d8a2c22 drm/i915: don't try 
to retrain a DP link on an inactive CRTC)
Merging

Re: [PATCH v4 3/4] zram: zram memory size limitation

2014-08-24 Thread Minchan Kim

On Sun, Aug 24, 2014 at 11:40:50PM -0400, David Horner wrote:
> On Sun, Aug 24, 2014 at 7:56 PM, Minchan Kim  wrote:
> > Hello David,
> >
> > On Fri, Aug 22, 2014 at 06:55:38AM -0400, David Horner wrote:
> >> On Thu, Aug 21, 2014 at 8:42 PM, Minchan Kim  wrote:
> >> > Since zram has no control feature to limit memory usage,
> >> > it makes hard to manage system memrory.
> >> >
> >> > This patch adds new knob "mem_limit" via sysfs to set up the
> >> > a limit so that zram could fail allocation once it reaches
> >> > the limit.
> >> >
> >> > In addition, user could change the limit in runtime so that
> >> > he could manage the memory more dynamically.
> >> >
> >> - Default is no limit so it doesn't break old behavior.
> >> + Initial state is no limit so it doesn't break old behavior.
> >>
> >> I understand your previous post now.
> >>
> >> I was saying that setting to either a null value or garbage
> >>  (which is interpreted as zero by memparse(buf, NULL);)
> >> removes the limit.
> >>
> >> I think this is "surprise" behaviour and rather the null case should
> >> return  -EINVAL
> >> The test below should be "good enough" though not catching all garbage.
> >
> > Thanks for suggesting but as I said, it should be fixed in memparse itself,
> > not caller if it is really problem so I don't want to touch it in this
> > patchset. It's not critical for adding the feature.
> >
> 
> I've looked into the memparse function more since we talked.
> I do believe a wrapper function around it for the typical use by sysfs would
> be very valuable.

Agree.

> However, there is nothing wrong with memparse itself that needs to be fixed.
> 
> It does what it is documented to do very well (In My Uninformed Opinion).
> It provides everything that a caller needs to manage the token that it
> processes.
> It thus handles strings like "7,,5,8,,9" with the implied zeros.

Maybe strict_memparse would be better to protect such things so you
could find several places to clean it up.

> 
> The fact that other callers don't check the return pointer value to
> see if only a null
> string was processed, is not its fault.
> Nor that it may not be ideally suited to sysfs attributes; that other store
> functions use it in a given manner does not means that is correct -
> nor that it is
> incorrect for that "knob". Some attributes could be just as valid with
> null zeros.
> 
> And you are correct, to disambiguate the zero is not required for the
> limit feature.
> Your original patch which disallowed zero was full feature for mem_limit.
> It is the requested non-crucial feature to allow zero to reestablish
> the initial state
>  that benefits from distinguishing an explicit zero from a "default zero'
>  when garbage is written.
> 
> The final argument is that if we release this feature as is the undocumented
>  functionality could be relied upon, and when later fixed: user space breaks.

I don't get it. Why does it break userspace?
The sysfs-block-zram says "0" means disable the limit.
If someone writes *garabge* but work as if disabling the limit,
it's not a right thing and he already broke although it worked
so it would be not a problem if we fix later.
(ie, we don't need to take care of broken userspace)
Am I missing your point?

> They say getting API right is a difficult exercise. I suggest, if we
> don't insisting on
>  an explicit zero we have the API wrong.
> 
> I don't think you disagreed, just that the burden to get it correct
> lay elsewhere.
> 
> If that is the case it doesn't really matter, we cannot release this
> interface until
>  it is corrected wherever it must be.
> 
> And my zero check was a poor hack.
> 
> I should have explicitly checked the returned pointer value.
> 
> I will send that proposed revision, and hopefully you will consider it
> for inclusion.
> 
> 
> 
> 
> >>
> >> >
> >> > Signed-off-by: Minchan Kim 
> >> > ---
> >> >  Documentation/ABI/testing/sysfs-block-zram | 10 
> >> >  Documentation/blockdev/zram.txt| 24 ++---
> >> >  drivers/block/zram/zram_drv.c  | 41 
> >> > ++
> >> >  drivers/block/zram/zram_drv.h  |  5 
> >> >  4 files changed, 76 insertions(+), 4 deletions(-)
> >> >
> >> > diff --git a/Documentation/ABI/testing/sysfs-block-zram 
> >> > b/Documentation/ABI/testing/sysfs-block-zram
> >> > index 70ec992514d0..b8c779d64968 100644
> >> > --- a/Documentation/ABI/testing/sysfs-block-zram
> >> > +++ b/Documentation/ABI/testing/sysfs-block-zram
> >> > @@ -119,3 +119,13 @@ Description:
> >> > efficiency can be calculated using compr_data_size and 
> >> > this
> >> > statistic.
> >> > Unit: bytes
> >> > +
> >> > +What:  /sys/block/zram/mem_limit
> >> > +Date:  August 2014
> >> > +Contact:   Minchan Kim 
> >> > +Description:
> >> > +   The mem_limit file is read/write and specifies the amount
> >> > +   of memory to be able to

Re: [PATCH net-next 0/4] r8152: firmware support

2014-08-24 Thread David Miller

From: Hayes Wang 
Date: Mon, 25 Aug 2014 03:43:04 +

>  From: David Miller [mailto:da...@davemloft.net] 
> [...]
>> You haven't told us why you need to do this.
>> 
>> These are just programming registers in the chip, and I see no reason
>> to not keep these in the driver with real code.
>> 
>> I'm not applying this series, you haven't explained what is happening
>> here and the reason for doing so.  Ironically, that's exactly what you
>> are supposed to provide in this 0/4 header email.
> 
> The nic has the MCU inside which is used to fix the PHY,
> MAC, and some behavior of the USB device. Each parts have
> different methods of updating the firmware by accessing the
> registers. The firmware files are used to deal with the
> processes, so I need some functions to parse the firmware
> files to update the fimrware code.

That still doesn't convince me.

The functions I see you removing are just programming a set of
registers in some way.

And the firmware that is replacing those functions is just going to be
causing the same register writes, just even more obfuscated than it is
now.

You should keep the C functions which document and show clearly what
is being programmed in each chip.

Don't hide register programming behind firmware files, please.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 1/5] MFD: Add rk808 device tree bindings documentation

2014-08-24 Thread Doug Anderson

Chris,

On Sat, Aug 23, 2014 at 3:29 AM, Chris Zhong  wrote:
> rk808.txt: Add device tree bindings for rockchip's rk808 pmic.
>
> Signed-off-by: Chris Zhong 
>
> ---
>
> Changes in v4:
> Adviced by doug
> - add "clock-output-names" propertiey
> - add a header file "rockchip,rk808.h"
>
> Changes in v3:
> - fix compile err
>
> Changes in v2:
> Adviced by javier.martinez
> - separated from rtc-rk808.c
>
>  Documentation/devicetree/bindings/mfd/rk808.txt |  133 
> +++
>  1 file changed, 133 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/mfd/rk808.txt
>
> diff --git a/Documentation/devicetree/bindings/mfd/rk808.txt 
> b/Documentation/devicetree/bindings/mfd/rk808.txt
> new file mode 100644
> index 000..a9368fc
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/mfd/rk808.txt
> @@ -0,0 +1,133 @@
> +RK808 Power Management Integrated Circuit
> +
> +Required properties:
> +- compatible: "rockchip,rk808"
> +- reg: I2C slave address
> +- interrupt-parent: The parent interrupt controller.
> +- interrupts: the interrupt outputs of the controller.
> +- pinctrl-names: Should contain only one value - "default".
> +- pinctrl-0: Should specify pin control groups used for this controller.
> +- regulators: This is the list of child nodes that specify the regulator
> +  initialization data for defined regulators. Not all regulators for the 
> given
> +  device need to be present. The definition for each of these nodes is 
> defined
> +  using the standard binding for regulators found at
> +  Documentation/devicetree/bindings/regulator/regulator.txt.
> +- #clock-cells: the value should be 1

Mark Brown said:

You need to identify all the valid names for regulators on the device so
people know which string to use for which regulator.

...and you haven't incorporated that feedback yet.  The Example below
is not enough.  You should have a string like:

The following are the names of the regulators that the rk808 pmic block
supports. Note: The 'n' below represents the number as per the datasheet:

- DCDC_REGn
  - valid values for n are 1 to 3.
- LDO_REGn
  - valid values for n are 1 to 8.
- SWITCH_REGn
  - valid values for n are 1 to 2.

> +
> +Optional properties:
> +- clock-output-names : From common clock binding to override the
> +  default output clock name
> +- rockchip,system-power-controller: Telling whether or not this pmic is 
> controlling
> +  the system power.
> +
> +Example:
> +rk808: pmic@1b {
> +   compatible = "rockchip,rk808";
> +   interrupt-parent = <>;
> +   interrupts = <4 IRQ_TYPE_EDGE_FALLING>;
> +   pinctrl-names = "default";
> +   pinctrl-0 = <_int>;
> +   reg = <0x1b>;
> +   #clock-cells = <1>;
> +   clock-output-names = "xin32k0", "xin32k1";
> +   rockchip,system-power-controller;
> +
> +   regulators {
> +   rk808_dcdc1_reg: DCDC_REG1 {
> +   regulator-always-on;
> +   regulator-boot-on;
> +   regulator-min-microvolt = <120>;
> +   regulator-max-microvolt = <120>;
> +   regulator-name = "vdd_arm";
> +   };
> +
> +   rk808_dcdc2_reg: DCDC_REG2 {
> +   regulator-always-on;
> +   regulator-boot-on;
> +   regulator-min-microvolt = <85>;
> +   regulator-max-microvolt = <125>;
> +   regulator-name = "vdd_gpu";
> +   };
> +
> +   rk808_dcdc3_reg: DCDC_REG3 {
> +   regulator-always-on;
> +   regulator-boot-on;
> +   regulator-name = "vdd_ddr";
> +   };
> +
> +   rk808_dcdc4_reg: DCDC_REG4 {
> +   regulator-always-on;
> +   regulator-boot-on;
> +   regulator-min-microvolt = <330>;
> +   regulator-max-microvolt = <330>;
> +   regulator-name = "vccio";
> +   };
> +
> +   rk808_ldo1_reg: LDO_REG1 {
> +   regulator-always-on;
> +   regulator-boot-on;
> +   regulator-min-microvolt = <330>;
> +   regulator-max-microvolt = <330>;
> +   };
> +
> +   rk808_ldo2_reg: LDO_REG2 {
> +   regulator-always-on;
> +   regulator-boot-on;
> +   regulator-min-microvolt = <330>;
> +

Re: [PATCH] powerpc/pseries: Drop unnecessary continue

2014-08-24 Thread Michael Ellerman

On Thu, 2014-08-21 at 10:51 -0500, Robert Jennings wrote:
> On 08/20/2014 11:41 PM, Michael Ellerman wrote:
> > On Wed, 2014-08-13 at 14:48 +0530, Himangi Saraogi wrote:
> >> Continue is not needed at the bottom of a loop.
> > 
> > True.
> > 
> > I wonder though, is the code trying to continue to the outer loop?
> > I stared at it for a minute but it wasn't obvious.
> > 
> > I wonder if Robert still remembers?
> 
> I don't recall what the intent was here.  Can't believe that it's been
> almost 5 years since I wrote this.  I wish I had left a few more
> comments in the code for me to go on.
> 
> Obviously the continue should be removed since it's not doing
> anything.  I don't believe that we'd want a continue statement in
> there to get outer loop.  That would change the current cmm_page_array
> pointer (pa_curr) to the next in the list after it may have just been
> reassigned to pa_last->next.
> 
> It may be the case that an earlier version of the code had statements
> in the inner loop after that continue that I wanted to skip, or I just
> did something silly.

OK, thanks for looking at it. I came to a similar conclusion, but good to have
your review as well.

The CMM regression test suite will catch us if we get it wrong anyway.

cheers


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv3 1/4] ASoC: fsl-asrc: Convert to use regmap framework's endianness method.

2014-08-24 Thread Nicolin Chen

On Mon, Aug 25, 2014 at 11:30:59AM +0800, Xiubo Li wrote:
> Signed-off-by: Xiubo Li 
> ---
>  sound/soc/fsl/fsl_asrc.c | 6 +-
>  1 file changed, 1 insertion(+), 5 deletions(-)
> 
> diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
> index 8221104..3b14531 100644
> --- a/sound/soc/fsl/fsl_asrc.c
> +++ b/sound/soc/fsl/fsl_asrc.c
> @@ -802,10 +802,6 @@ static int fsl_asrc_probe(struct platform_device *pdev)
>  
>   asrc_priv->paddr = res->start;
>  
> - /* Register regmap and let it prepare core clock */

Oops, I didn't notice this change. This should be needless.

Otherwise, the whole change within sound/soc/fsl by this series looks fine.

Acked-by: Nicolin Chen 

> - if (of_property_read_bool(np, "big-endian"))
> - fsl_asrc_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
> -
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 4/5] Regulator: RK808: Add regulator driver for RK808

2014-08-24 Thread Doug Anderson

Chris,

On Sat, Aug 23, 2014 at 7:57 AM, Mark Brown  wrote:
> On Sat, Aug 23, 2014 at 07:04:50PM +0800, Chris Zhong wrote:
>> The regulator module consists of 4 DCDCs, 8 LDOs and 2 switches.
>> The output voltages are configurable and are meant to supply power
>> to the main processor and other components
>
> To repeat what I said on your previous posting: this driver has already
> been applied, if there are any changes required please send incremental
> patches against what is in the tree.

In you're next version, you should include your fixup to the regulator
driver in the series and you shouldn't include the regulator patch.
So you'd have:

1. Regulator: RK808: modify for struct rk808 change
2. MFD: Add rk808 device tree bindings documentation
3. MFD: RK808: Add new mfd driver for RK808
4. RTC: RK808: add RTC driver for RK808
5. Clk: RK808: Add clkout driver for RK808

Also please check your capitalization.  "Clk" should be "clk" and you
should be consistent about whether RK808 is caps or lower case.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v5 2/4] zsmalloc: change return value unit of zs_get_total_size_bytes

2014-08-24 Thread David Horner

On Sun, Aug 24, 2014 at 8:05 PM, Minchan Kim  wrote:
> zs_get_total_size_bytes returns a amount of memory zsmalloc
> consumed with *byte unit* but zsmalloc operates *page unit*
> rather than byte unit so let's change the API so benefit
> we could get is that reduce unnecessary overhead
> (ie, change page unit with byte unit) in zsmalloc.
>
> Since return type is pages, "zs_get_total_pages" is better than
> "zs_get_total_size_bytes".
>
> Reviewed-by: Dan Streetman 
Reviewed-by: David Horner 
> Signed-off-by: Minchan Kim 
> ---
>  drivers/block/zram/zram_drv.c | 4 ++--
>  include/linux/zsmalloc.h  | 2 +-
>  mm/zsmalloc.c | 9 -
>  3 files changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index d00831c3d731..f0b8b30a7128 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev,
>
> down_read(>init_lock);
> if (init_done(zram))
> -   val = zs_get_total_size_bytes(meta->mem_pool);
> +   val = zs_get_total_pages(meta->mem_pool);
> up_read(>init_lock);
>
> -   return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
> +   return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
>  }
>
>  static ssize_t max_comp_streams_show(struct device *dev,
> diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
> index e44d634e7fb7..05c214760977 100644
> --- a/include/linux/zsmalloc.h
> +++ b/include/linux/zsmalloc.h
> @@ -46,6 +46,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long 
> handle,
> enum zs_mapmode mm);
>  void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
>
> -u64 zs_get_total_size_bytes(struct zs_pool *pool);
> +unsigned long zs_get_total_pages(struct zs_pool *pool);
>
>  #endif
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 2a4acf400846..c4a91578dc96 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -297,7 +297,7 @@ static void zs_zpool_unmap(void *pool, unsigned long 
> handle)
>
>  static u64 zs_zpool_total_size(void *pool)
>  {
> -   return zs_get_total_size_bytes(pool);
> +   return zs_get_total_pages(pool) << PAGE_SHIFT;
>  }
>
>  static struct zpool_driver zs_zpool_driver = {
> @@ -1181,12 +1181,11 @@ void zs_unmap_object(struct zs_pool *pool, unsigned 
> long handle)
>  }
>  EXPORT_SYMBOL_GPL(zs_unmap_object);
>
> -u64 zs_get_total_size_bytes(struct zs_pool *pool)
> +unsigned long zs_get_total_pages(struct zs_pool *pool)
>  {
> -   u64 npages = atomic_long_read(>pages_allocated);
> -   return npages << PAGE_SHIFT;
> +   return atomic_long_read(>pages_allocated);
>  }
> -EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
> +EXPORT_SYMBOL_GPL(zs_get_total_pages);
>
>  module_init(zs_init);
>  module_exit(zs_exit);
> --
> 2.0.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v5 4/4] zram: report maximum used memory

2014-08-24 Thread David Horner

On Sun, Aug 24, 2014 at 8:05 PM, Minchan Kim  wrote:
> Normally, zram user could get maximum memory usage zram consumed
> via polling mem_used_total with sysfs in userspace.
>
> But it has a critical problem because user can miss peak memory
> usage during update inverval of polling. For avoiding that,
> user should poll it with shorter interval(ie, 0.01s)
> with mlocking to avoid page fault delay when memory pressure
> is heavy. It would be troublesome.
>
> This patch adds new knob "mem_used_max" so user could see
> the maximum memory usage easily via reading the knob and reset
> it via "echo 0 > /sys/block/zram0/mem_used_max".
>
> Reviewed-by: Dan Streetman 
Reviewed-by: David Horner 
> Signed-off-by: Minchan Kim 
> ---
>  Documentation/ABI/testing/sysfs-block-zram | 10 +
>  Documentation/blockdev/zram.txt|  1 +
>  drivers/block/zram/zram_drv.c  | 60 
> +-
>  drivers/block/zram/zram_drv.h  |  1 +
>  4 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-block-zram 
> b/Documentation/ABI/testing/sysfs-block-zram
> index dbe643775ec1..01a38eaf1552 100644
> --- a/Documentation/ABI/testing/sysfs-block-zram
> +++ b/Documentation/ABI/testing/sysfs-block-zram
> @@ -120,6 +120,16 @@ Description:
> statistic.
> Unit: bytes
>
> +What:  /sys/block/zram/mem_used_max
> +Date:  August 2014
> +Contact:   Minchan Kim 
> +Description:
> +   The mem_used_max file is read/write and specifies the amount
> +   of maximum memory zram have consumed to store compressed data.
> +   For resetting the value, you should write "0". Otherwise,
> +   you could see -EINVAL.
> +   Unit: bytes
> +
>  What:  /sys/block/zram/mem_limit
>  Date:  August 2014
>  Contact:   Minchan Kim 
> diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
> index 82c6a41116db..7fcf9c6592ec 100644
> --- a/Documentation/blockdev/zram.txt
> +++ b/Documentation/blockdev/zram.txt
> @@ -111,6 +111,7 @@ size of the disk when not in use so a huge zram is 
> wasteful.
> orig_data_size
> compr_data_size
> mem_used_total
> +   mem_used_max
>
>  8) Deactivate:
> swapoff /dev/zram0
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 370c355eb127..1a2b3e320ea5 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -149,6 +149,41 @@ static ssize_t mem_limit_store(struct device *dev,
> return len;
>  }
>
> +static ssize_t mem_used_max_show(struct device *dev,
> +   struct device_attribute *attr, char *buf)
> +{
> +   u64 val = 0;
> +   struct zram *zram = dev_to_zram(dev);
> +
> +   down_read(>init_lock);
> +   if (init_done(zram))
> +   val = atomic_long_read(>stats.max_used_pages);
> +   up_read(>init_lock);
> +
> +   return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
> +}
> +
> +static ssize_t mem_used_max_store(struct device *dev,
> +   struct device_attribute *attr, const char *buf, size_t len)
> +{
> +   int err;
> +   unsigned long val;
> +   struct zram *zram = dev_to_zram(dev);
> +   struct zram_meta *meta = zram->meta;
> +
> +   err = kstrtoul(buf, 10, );
> +   if (err || val != 0)
> +   return -EINVAL;
> +
> +   down_read(>init_lock);
> +   if (init_done(zram))
> +   atomic_long_set(>stats.max_used_pages,
> +   zs_get_total_pages(meta->mem_pool));
> +   up_read(>init_lock);
> +
> +   return len;
> +}
> +
>  static ssize_t max_comp_streams_store(struct device *dev,
> struct device_attribute *attr, const char *buf, size_t len)
>  {
> @@ -461,6 +496,21 @@ out_cleanup:
> return ret;
>  }
>
> +static inline void update_used_max(struct zram *zram,
> +   const unsigned long pages)
> +{
> +   int old_max, cur_max;
> +
> +   old_max = atomic_long_read(>stats.max_used_pages);
> +
> +   do {
> +   cur_max = old_max;
> +   if (pages > cur_max)
> +   old_max = atomic_long_cmpxchg(
> +   >stats.max_used_pages, cur_max, pages);
> +   } while (old_max != cur_max);
> +}
> +
>  static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 
> index,
>int offset)
>  {
> @@ -472,6 +522,7 @@ static int zram_bvec_write(struct zram *zram, struct 
> bio_vec *bvec, u32 index,
> struct zram_meta *meta = zram->meta;
> struct zcomp_strm *zstrm;
> bool locked = false;
> +   unsigned long alloced_pages;
>
> page = bvec->bv_page;
> if (is_partial_io(bvec)) {
> @@ -541,13 +592,15 @@ static int

Re: [PATCH v3 5/5] Clk: RK808: Add clkout driver for RK808

2014-08-24 Thread Doug Anderson

Chris,

On Sat, Aug 23, 2014 at 4:07 AM, Chris Zhong  wrote:
> Signed-off-by: Chris Zhong 
>
> ---
>
> Changes in v4:

Wait, is this v4 or v3?  The subject line still says v3, which is confusing.


> Advices by Doug
> - add a "#clock-cells" propertiy
> - update the example
>
> Changes in v3: None
> Changes in v2: None
>
>  drivers/clk/Kconfig|9 ++
>  drivers/clk/Makefile   |1 +
>  drivers/clk/clk-rk808.c|  162 
> 
>  include/dt-bindings/clock/rockchip,rk808.h |   11 ++
>  4 files changed, 183 insertions(+)
>  create mode 100644 drivers/clk/clk-rk808.c
>  create mode 100644 include/dt-bindings/clock/rockchip,rk808.h
>
> diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
> index cfd3af7..84e0590 100644
> --- a/drivers/clk/Kconfig
> +++ b/drivers/clk/Kconfig
> @@ -38,6 +38,15 @@ config COMMON_CLK_MAX77686
> ---help---
>   This driver supports Maxim 77686 crystal oscillator clock.
>
> +config COMMON_CLK_RK808
> +   tristate "Clock driver for RK808"
> +   depends on MFD_RK808
> +   ---help---
> + This driver supports RK808 crystal oscillator clock. These
> + multi-function devices have two fixed-rate oscillators,
> + clocked at 32KHz each. Clkout1 is always on, Clkout2 can off
> + by control register.
> +
>  config COMMON_CLK_SI5351
> tristate "Clock driver for SiLabs 5351A/B/C"
> depends on I2C
> diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
> index f537a0b..99f53d5 100644
> --- a/drivers/clk/Makefile
> +++ b/drivers/clk/Makefile
> @@ -28,6 +28,7 @@ obj-$(CONFIG_ARCH_NOMADIK)+= clk-nomadik.o
>  obj-$(CONFIG_ARCH_NSPIRE)  += clk-nspire.o
>  obj-$(CONFIG_COMMON_CLK_PALMAS)+= clk-palmas.o
>  obj-$(CONFIG_CLK_PPC_CORENET)  += clk-ppc-corenet.o
> +obj-$(CONFIG_COMMON_CLK_RK808) += clk-rk808.o
>  obj-$(CONFIG_COMMON_CLK_S2MPS11)   += clk-s2mps11.o
>  obj-$(CONFIG_COMMON_CLK_SI5351)+= clk-si5351.o
>  obj-$(CONFIG_COMMON_CLK_SI570) += clk-si570.o
> diff --git a/drivers/clk/clk-rk808.c b/drivers/clk/clk-rk808.c
> new file mode 100644
> index 000..50a25fc
> --- /dev/null
> +++ b/drivers/clk/clk-rk808.c
> @@ -0,0 +1,162 @@
> +/*
> + * Clkout driver for Rockchip RK808
> + *
> + * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
> + *
> + * Author: Chris Zhong 
> + * Author: Zhang Qing 

You have author below.  Why is it here, too?


> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct rk808_clkout {
> +   struct rk808 *rk808;
> +   struct clk_onecell_data clk_data;
> +   struct clk_hw   clkout1_hw;
> +   struct clk_hw   clkout2_hw;
> +};
> +
> +static unsigned long rk808_clkout_recalc_rate(struct clk_hw *hw,
> + unsigned long parent_rate)
> +{
> +   return 32768;
> +}
> +
> +static int rk808_clkout1_is_prepared(struct clk_hw *hw)
> +{
> +   return 1;
> +}
> +
> +static int rk808_clkout2_control(struct clk_hw *hw, bool enable)
> +{
> +   struct rk808_clkout *rk808_clkout = container_of(hw,
> +struct rk808_clkout,
> +clkout2_hw);
> +   struct rk808 *rk808 = rk808_clkout->rk808;
> +
> +   return regmap_update_bits(rk808->regmap, RK808_CLK32OUT_REG,
> + CLK32KOUT2_EN, enable ? CLK32KOUT2_EN : 0);
> +}
> +
> +static int rk808_clkout2_prepare(struct clk_hw *hw)
> +{
> +   return rk808_clkout2_control(hw, 1);
> +}
> +
> +static void rk808_clkout2_unprepare(struct clk_hw *hw)
> +{
> +   rk808_clkout2_control(hw, 0);
> +}
> +
> +static int rk808_clkout2_is_prepared(struct clk_hw *hw)
> +{
> +   struct rk808_clkout *rk808_clkout = container_of(hw,
> +struct rk808_clkout,
> +clkout2_hw);
> +   struct rk808 *rk808 = rk808_clkout->rk808;
> +   uint32_t val;
> +
> +   int ret = regmap_read(rk808->regmap, RK808_CLK32OUT_REG, );
> +
> +   if (ret < 0)
> +   return ret;
> +
> +   return (val & CLK32KOUT2_EN) ? 1 : 0;
> +}
> +
> +static const struct clk_ops rk808_clkout1_ops = {
> +   .is_prepared = rk808_clkout1_is_prepared,
> +

Re: [Bug 3.14.17] inconsistent lock state

2014-08-24 Thread Lan Tianyu

On 2014年08月25日 11:13, Linus Torvalds wrote:
> On Sun, Aug 24, 2014 at 7:53 PM, Lan Tianyu  wrote:
>>
>> Sorry about this. We are resolving the issue in the other bug
>> report(https://lkml.org/lkml/2014/8/21/606) and I have proposed a fix
>> patch(http://marc.info/?l=linux-acpi=140869309231199=2).
> 
> Ahh. Good. That patch looks fine to me, and while it makes me worry a
> bit that some codepath expects the power/sleep button to be handled
> immediately in interrupt context, I guess the actual callbacks have
> never actually done anything but schedule other things to happen (ie
> add events to some queue), and making the context be the same as the
> other notify callbacks would seem to be a good thing regardless of
> this particular bug.

Yes, I have the same opinion and the callback just reports power/sleep
button event to user space via input layer or ACPI netlink routines.

The button devices enumerated from ACPI namespace and FADT table share
the same notify callback and do the same things while they are running
different context. This seems not make sense.

> 
> Knut - can you please test the patch Lan pointed at? I realize it
> doesn't seem to be entirely consistent for you (which is a bit
> surprising, I wonder why lockdep doesn't trigger it consistently), but
> it would be good to have more testing. Even if that patch looks
> "obviously good" (tm) at a quick glance.

BTW, this bug only takes place on the machines with fixed button device.
This can be identified via check whether there are LNXPWRBN:00 or
LNXSLPBN:00 device nodes under /sys/bus/acpi/devices.

> 
>   Linus
> 


-- 
Best regards
Tianyu Lan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH net-next 0/4] r8152: firmware support

2014-08-24 Thread Hayes Wang

 From: David Miller [mailto:da...@davemloft.net] 
[...]
> You haven't told us why you need to do this.
> 
> These are just programming registers in the chip, and I see no reason
> to not keep these in the driver with real code.
> 
> I'm not applying this series, you haven't explained what is happening
> here and the reason for doing so.  Ironically, that's exactly what you
> are supposed to provide in this 0/4 header email.

The nic has the MCU inside which is used to fix the PHY,
MAC, and some behavior of the USB device. Each parts have
different methods of updating the firmware by accessing the
registers. The firmware files are used to deal with the
processes, so I need some functions to parse the firmware
files to update the fimrware code.

I would resend these. Sorry.
 
Best Regards,
Hayes
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 3/4] zram: zram memory size limitation

2014-08-24 Thread David Horner

On Sun, Aug 24, 2014 at 7:56 PM, Minchan Kim  wrote:
> Hello David,
>
> On Fri, Aug 22, 2014 at 06:55:38AM -0400, David Horner wrote:
>> On Thu, Aug 21, 2014 at 8:42 PM, Minchan Kim  wrote:
>> > Since zram has no control feature to limit memory usage,
>> > it makes hard to manage system memrory.
>> >
>> > This patch adds new knob "mem_limit" via sysfs to set up the
>> > a limit so that zram could fail allocation once it reaches
>> > the limit.
>> >
>> > In addition, user could change the limit in runtime so that
>> > he could manage the memory more dynamically.
>> >
>> - Default is no limit so it doesn't break old behavior.
>> + Initial state is no limit so it doesn't break old behavior.
>>
>> I understand your previous post now.
>>
>> I was saying that setting to either a null value or garbage
>>  (which is interpreted as zero by memparse(buf, NULL);)
>> removes the limit.
>>
>> I think this is "surprise" behaviour and rather the null case should
>> return  -EINVAL
>> The test below should be "good enough" though not catching all garbage.
>
> Thanks for suggesting but as I said, it should be fixed in memparse itself,
> not caller if it is really problem so I don't want to touch it in this
> patchset. It's not critical for adding the feature.
>

I've looked into the memparse function more since we talked.
I do believe a wrapper function around it for the typical use by sysfs would
be very valuable.
However, there is nothing wrong with memparse itself that needs to be fixed.

It does what it is documented to do very well (In My Uninformed Opinion).
It provides everything that a caller needs to manage the token that it
processes.
It thus handles strings like "7,,5,8,,9" with the implied zeros.

The fact that other callers don't check the return pointer value to
see if only a null
string was processed, is not its fault.
Nor that it may not be ideally suited to sysfs attributes; that other store
functions use it in a given manner does not means that is correct -
nor that it is
incorrect for that "knob". Some attributes could be just as valid with
null zeros.

And you are correct, to disambiguate the zero is not required for the
limit feature.
Your original patch which disallowed zero was full feature for mem_limit.
It is the requested non-crucial feature to allow zero to reestablish
the initial state
 that benefits from distinguishing an explicit zero from a "default zero'
 when garbage is written.

The final argument is that if we release this feature as is the undocumented
 functionality could be relied upon, and when later fixed: user space breaks.
They say getting API right is a difficult exercise. I suggest, if we
don't insisting on
 an explicit zero we have the API wrong.

I don't think you disagreed, just that the burden to get it correct
lay elsewhere.

If that is the case it doesn't really matter, we cannot release this
interface until
 it is corrected wherever it must be.

And my zero check was a poor hack.

I should have explicitly checked the returned pointer value.

I will send that proposed revision, and hopefully you will consider it
for inclusion.

>>
>> >
>> > Signed-off-by: Minchan Kim 
>> > ---
>> >  Documentation/ABI/testing/sysfs-block-zram | 10 
>> >  Documentation/blockdev/zram.txt| 24 ++---
>> >  drivers/block/zram/zram_drv.c  | 41 
>> > ++
>> >  drivers/block/zram/zram_drv.h  |  5 
>> >  4 files changed, 76 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/Documentation/ABI/testing/sysfs-block-zram 
>> > b/Documentation/ABI/testing/sysfs-block-zram
>> > index 70ec992514d0..b8c779d64968 100644
>> > --- a/Documentation/ABI/testing/sysfs-block-zram
>> > +++ b/Documentation/ABI/testing/sysfs-block-zram
>> > @@ -119,3 +119,13 @@ Description:
>> > efficiency can be calculated using compr_data_size and this
>> > statistic.
>> > Unit: bytes
>> > +
>> > +What:  /sys/block/zram/mem_limit
>> > +Date:  August 2014
>> > +Contact:   Minchan Kim 
>> > +Description:
>> > +   The mem_limit file is read/write and specifies the amount
>> > +   of memory to be able to consume memory to store store
>> > +   compressed data. The limit could be changed in run time
>> > -   and "0" is default which means disable the limit.
>> > +   and "0" means disable the limit. No limit is the initial 
>> > state.
>>
>> there should be no default in the API.
>
> Thanks.
>
>>
>> > +   Unit: bytes
>> > diff --git a/Documentation/blockdev/zram.txt 
>> > b/Documentation/blockdev/zram.txt
>> > index 0595c3f56ccf..82c6a41116db 100644
>> > --- a/Documentation/blockdev/zram.txt
>> > +++ b/Documentation/blockdev/zram.txt
>> > @@ -74,14 +74,30 @@ There is little point creating a zram of greater than 
>> > twice the size of memory
>> >  since we expect a 2:1 compression ratio. Note that zram uses about

[PATCHv3 4/4] ASoC: fsl-sai: Convert to use regmap framework's endianness method.

2014-08-24 Thread Xiubo Li

Signed-off-by: Xiubo Li 
---
 Documentation/devicetree/bindings/sound/fsl-sai.txt | 7 +++
 sound/soc/fsl/fsl_sai.c | 6 +-
 sound/soc/fsl/fsl_sai.h | 1 -
 3 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/sound/fsl-sai.txt 
b/Documentation/devicetree/bindings/sound/fsl-sai.txt
index dc9f9c3..06a405e 100644
--- a/Documentation/devicetree/bindings/sound/fsl-sai.txt
+++ b/Documentation/devicetree/bindings/sound/fsl-sai.txt
@@ -18,9 +18,8 @@ Required properties:
 - pinctrl-names: Must contain a "default" entry.
 - pinctrl-NNN: One property must exist for each entry in pinctrl-names.
   See ../pinctrl/pinctrl-bindings.txt for details of the property values.
-- big-endian-regs: If this property is absent, the little endian mode will
-  be in use as default, or the big endian mode will be in use for all the
-  device registers.
+- big-endian: Boolean property, required if all the FTM_PWM registers
+  are big-endian rather than little-endian.
 - big-endian-data: If this property is absent, the little endian mode will
   be in use as default, or the big endian mode will be in use for all the
   fifo data.
@@ -53,6 +52,6 @@ sai2: sai@40031000 {
  dma-names = "tx", "rx";
  dmas = < 0 VF610_EDMA_MUXID0_SAI2_TX>,
   < 0 VF610_EDMA_MUXID0_SAI2_RX>;
- big-endian-regs;
+ big-endian;
  big-endian-data;
 };
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 60fe7c7..a6eb784 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -544,7 +544,7 @@ static bool fsl_sai_writeable_reg(struct device *dev, 
unsigned int reg)
}
 }
 
-static struct regmap_config fsl_sai_regmap_config = {
+static const struct regmap_config fsl_sai_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
@@ -573,10 +573,6 @@ static int fsl_sai_probe(struct platform_device *pdev)
if (of_device_is_compatible(pdev->dev.of_node, "fsl,imx6sx-sai"))
sai->sai_on_imx = true;
 
-   sai->big_endian_regs = of_property_read_bool(np, "big-endian-regs");
-   if (sai->big_endian_regs)
-   fsl_sai_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
-
sai->big_endian_data = of_property_read_bool(np, "big-endian-data");
 
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index b3d8864..2cded44 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -132,7 +132,6 @@ struct fsl_sai {
struct clk *bus_clk;
struct clk *mclk_clk[FSL_SAI_MCLK_MAX];
 
-   bool big_endian_regs;
bool big_endian_data;
bool is_dsp_mode;
bool sai_on_imx;
-- 
1.8.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 3/4] ASoC: fsl-spdif: Convert to use regmap framework's endianness method.

2014-08-24 Thread Xiubo Li

Signed-off-by: Xiubo Li 
---
 sound/soc/fsl/fsl_spdif.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
index 70acfe4..ae4e408 100644
--- a/sound/soc/fsl/fsl_spdif.c
+++ b/sound/soc/fsl/fsl_spdif.c
@@ -1040,7 +1040,7 @@ static bool fsl_spdif_writeable_reg(struct device *dev, 
unsigned int reg)
}
 }
 
-static struct regmap_config fsl_spdif_regmap_config = {
+static const struct regmap_config fsl_spdif_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
@@ -1184,9 +1184,6 @@ static int fsl_spdif_probe(struct platform_device *pdev)
memcpy(_priv->cpu_dai_drv, _spdif_dai, sizeof(fsl_spdif_dai));
spdif_priv->cpu_dai_drv.name = spdif_priv->name;
 
-   if (of_property_read_bool(np, "big-endian"))
-   fsl_spdif_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
-
/* Get the addresses and IRQ */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
regs = devm_ioremap_resource(>dev, res);
-- 
1.8.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 2/4] ASoC: fsl-esai: Convert to use regmap framework's endianness method.

2014-08-24 Thread Xiubo Li

Signed-off-by: Xiubo Li 
---
 sound/soc/fsl/fsl_esai.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
index b2f6b3e..8bcdfda 100644
--- a/sound/soc/fsl/fsl_esai.c
+++ b/sound/soc/fsl/fsl_esai.c
@@ -710,7 +710,7 @@ static bool fsl_esai_writeable_reg(struct device *dev, 
unsigned int reg)
}
 }
 
-static struct regmap_config fsl_esai_regmap_config = {
+static const struct regmap_config fsl_esai_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
@@ -736,9 +736,6 @@ static int fsl_esai_probe(struct platform_device *pdev)
esai_priv->pdev = pdev;
strcpy(esai_priv->name, np->name);
 
-   if (of_property_read_bool(np, "big-endian"))
-   fsl_esai_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
-
/* Get the addresses and IRQ */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
regs = devm_ioremap_resource(>dev, res);
-- 
1.8.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 1/4] ASoC: fsl-asrc: Convert to use regmap framework's endianness method.

2014-08-24 Thread Xiubo Li

Signed-off-by: Xiubo Li 
---
 sound/soc/fsl/fsl_asrc.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 8221104..3b14531 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c
@@ -684,7 +684,7 @@ static bool fsl_asrc_writeable_reg(struct device *dev, 
unsigned int reg)
}
 }
 
-static struct regmap_config fsl_asrc_regmap_config = {
+static const struct regmap_config fsl_asrc_regmap_config = {
.reg_bits = 32,
.reg_stride = 4,
.val_bits = 32,
@@ -802,10 +802,6 @@ static int fsl_asrc_probe(struct platform_device *pdev)
 
asrc_priv->paddr = res->start;
 
-   /* Register regmap and let it prepare core clock */
-   if (of_property_read_bool(np, "big-endian"))
-   fsl_asrc_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
-
asrc_priv->regmap = devm_regmap_init_mmio_clk(>dev, "mem", regs,
  _asrc_regmap_config);
if (IS_ERR(asrc_priv->regmap)) {
-- 
1.8.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCHv3 0/4] ASoC: Convert to use regmap framework's endianness method.

2014-08-24 Thread Xiubo Li

Changes in V3:
- Delete 'ASoC: fsl-sai: rename big_endian_data to is_msb_first.' patch.


Changes in V2:
- Modified the regmap config to const type.
- Added ASRC patch.
- Followed Rutland's advice.




Xiubo Li (4):
  ASoC: fsl-asrc: Convert to use regmap framework's endianness method.
  ASoC: fsl-esai: Convert to use regmap framework's endianness method.
  ASoC: fsl-spdif: Convert to use regmap framework's endianness method.
  ASoC: fsl-sai: Convert to use regmap framework's endianness method.

 Documentation/devicetree/bindings/sound/fsl-sai.txt | 7 +++
 sound/soc/fsl/fsl_asrc.c| 6 +-
 sound/soc/fsl/fsl_esai.c| 5 +
 sound/soc/fsl/fsl_sai.c | 6 +-
 sound/soc/fsl/fsl_sai.h | 1 -
 sound/soc/fsl/fsl_spdif.c   | 5 +
 6 files changed, 7 insertions(+), 23 deletions(-)

-- 
1.8.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5] mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared

2014-08-24 Thread Peter Feiner

For VMAs that don't want write notifications, PTEs created for read
faults have their write bit set. If the read fault happens after
VM_SOFTDIRTY is cleared, then the PTE's softdirty bit will remain
clear after subsequent writes.

Here's a simple code snippet to demonstrate the bug:

  char* m = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
  system("echo 4 > /proc/$PPID/clear_refs"); /* clear VM_SOFTDIRTY */
  assert(*m == '\0'); /* new PTE allows write access */
  assert(!soft_dirty(x));
  *m = 'x';   /* should dirty the page */
  assert(soft_dirty(x));  /* fails */

With this patch, write notifications are enabled when VM_SOFTDIRTY is
cleared. Furthermore, to avoid unnecessary faults, write
notifications are disabled when VM_SOFTDIRTY is reset.

As a side effect of enabling and disabling write notifications with
care, this patch fixes a bug in mprotect where vm_page_prot bits set
by drivers were zapped on mprotect. An analogous bug was fixed in mmap
by c9d0bf241451a3ab7d02e1652c22b80cd7d93e8f.

Reported-by: Peter Feiner 
Suggested-by: Kirill A. Shutemov 
Reviewed-by: Kirill A. Shutemov 
Reviewed-by: Cyrill Gorcunov 
Signed-off-by: Peter Feiner 

---

v1 -> v2: Instead of checking VM_SOFTDIRTY in the fault handler,
  enable write notifications on vm_page_prot when we clear
  VM_SOFTDIRTY.

v2 -> v3: * Grab the mmap_sem in write mode if any VMAs have
VM_SOFTDIRTY set. This involved refactoring clear_refs_write
to make it less unwieldy.

  * In mprotect, don't inadvertently disable write notifications on VMAs
that have had VM_SOFTDIRTY cleared

  * The mprotect fix and mmap cleanup that comprised the
second and third patches in v2 were swallowed by the main
patch because of vm_page_prot corner case handling.

v3 -> v4: Handle !defined(CONFIG_MEM_SOFT_DIRTY): old patch would have
  enabled write notifications for all VMAs in this case.

v4 -> v5: IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) instead of #ifdef ...
---
 fs/proc/task_mmu.c | 113 +
 include/linux/mm.h |  14 +++
 mm/mmap.c  |  24 +---
 mm/mprotect.c  |   6 +--
 4 files changed, 97 insertions(+), 60 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dfc791c..f5e75c6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -785,13 +785,80 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
return 0;
 }
 
+static int clear_refs(struct mm_struct *mm, enum clear_refs_types type,
+  int write)
+{
+   int r = 0;
+   struct vm_area_struct *vma;
+   struct clear_refs_private cp = {
+   .type = type,
+   };
+   struct mm_walk clear_refs_walk = {
+   .pmd_entry = clear_refs_pte_range,
+   .mm = mm,
+   .private = ,
+   };
+
+   if (write)
+   down_write(>mmap_sem);
+   else
+   down_read(>mmap_sem);
+
+   if (type == CLEAR_REFS_SOFT_DIRTY)
+   mmu_notifier_invalidate_range_start(mm, 0, -1);
+
+   for (vma = mm->mmap; vma; vma = vma->vm_next) {
+   cp.vma = vma;
+   if (is_vm_hugetlb_page(vma))
+   continue;
+   /*
+* Writing 1 to /proc/pid/clear_refs affects all pages.
+*
+* Writing 2 to /proc/pid/clear_refs only affects
+* Anonymous pages.
+*
+* Writing 3 to /proc/pid/clear_refs only affects file
+* mapped pages.
+*
+* Writing 4 to /proc/pid/clear_refs affects all pages.
+*/
+   if (type == CLEAR_REFS_ANON && vma->vm_file)
+   continue;
+   if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
+   continue;
+   if (type == CLEAR_REFS_SOFT_DIRTY &&
+   (vma->vm_flags & VM_SOFTDIRTY)) {
+   if (!write) {
+   r = -EAGAIN;
+   break;
+   }
+   vma->vm_flags &= ~VM_SOFTDIRTY;
+   vma_enable_writenotify(vma);
+   }
+   walk_page_range(vma->vm_start, vma->vm_end,
+   _refs_walk);
+   }
+
+   if (type == CLEAR_REFS_SOFT_DIRTY)
+   mmu_notifier_invalidate_range_end(mm, 0, -1);
+
+   if (!r)
+   flush_tlb_mm(mm);
+
+   if (write)
+   up_write(>mmap_sem);
+   else
+   up_read(>mmap_sem);
+
+   return r;
+}
+
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
 {
struct task_struct *task;
char

RE: [PATCH net-next 4/4] r8152: support firmware files

2014-08-24 Thread Hayes Wang

Francois Romieu [mailto:rom...@fr.zoreil.com] 
[...]
> > +static void rtl_request_firmware(struct r8152 *tp)
> > +{
> > +   char *fw_name = NULL;
> > +
> > +   if (tp->rtl_fw.fw)
> > +   goto out_request;
> > +
> > +   switch (tp->version) {
> > +   case RTL_VER_01:
> > +   fw_name = "rtl_nic/rtl8152-1.fw";
> > +   break;
> > +   case RTL_VER_02:
> > +   fw_name = "rtl_nic/rtl8152-2.fw";
> > +   break;
> > +   case RTL_VER_03:
> > +   fw_name = "rtl_nic/rtl8153-1.fw";
> > +   break;
> > +   case RTL_VER_04:
> > +   fw_name = "rtl_nic/rtl8153-2.fw";
> > +   break;
> > +   case RTL_VER_05:
> > +   fw_name = "rtl_nic/rtl8153-3.fw";
> > +   break;
> 
> The driver should use MODULE_FIRMWARE() for these files.

Oops. I would fix this. Thanks.
 
Best Regards,
Hayes
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Dennis Chen

On Mon, Aug 25, 2014 at 7:14 AM, Wanpeng Li  wrote:
> Please Cc kvm ml.

You've done that for me, thanks. The page-walk length sanity check has
been done in the hardware_setup() function, so it's not necessary in
this patch, but I still think it does make sense for the memory type
check, any comments, guys?

> On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
>>This patch is used to construct the eptp in vmx mode with values
>>readed from MSR according to the intel x86 software developer's
>>manual.
>>
>>Signed-off-by: Dennis Chen 
>>---
>> arch/x86/include/asm/vmx.h |1 +
>> arch/x86/kvm/vmx.c |   21 +
>> 2 files changed, 18 insertions(+), 4 deletions(-)
>>
>>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>>index bcbfade..bf82a77 100644
>>--- a/arch/x86/include/asm/vmx.h
>>+++ b/arch/x86/include/asm/vmx.h
>>@@ -417,6 +417,7 @@ enum vmcs_field {
>> #define VMX_EPT_GAW_EPTP_SHIFT3
>> #define VMX_EPT_AD_ENABLE_BIT(1ull << 6)
>> #define VMX_EPT_DEFAULT_MT0x6ull
>>+#define VMX_EPT_UC_MT0x0ull
>> #define VMX_EPT_READABLE_MASK0x1ull
>> #define VMX_EPT_WRITABLE_MASK0x2ull
>> #define VMX_EPT_EXECUTABLE_MASK0x4ull
>>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>index bfe11cf..7add5ce 100644
>>--- a/arch/x86/kvm/vmx.c
>>+++ b/arch/x86/kvm/vmx.c
>>@@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
>>unsigned long cr0)
>>
>> static u64 construct_eptp(unsigned long root_hpa)
>> {
>>-u64 eptp;
>>+u64 eptp, pwl;
>>+
>>+if (cpu_has_vmx_ept_4levels())
>>+pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>>+else {
>>+WARN(1, "Unsupported page-walk length of 4.\n");
>>+BUG();
>>+}
>>+
>>+if (cpu_has_vmx_eptp_writeback())
>>+eptp = VMX_EPT_DEFAULT_MT | pwl;
>>+else if (cpu_has_vmx_eptp_uncacheable())
>>+eptp = VMX_EPT_UC_MT | pwl;
>>+else {
>>+WARN(1, "Unsupported memory type config in vmx eptp.\n");
>>+BUG();
>>+}
>>
>>-/* TODO write the value reading from MSR */
>>-eptp = VMX_EPT_DEFAULT_MT |
>>-VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>> if (enable_ept_ad_bits)
>> eptp |= VMX_EPT_AD_ENABLE_BIT;
>> eptp |= (root_hpa & PAGE_MASK);
>>--
>>1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Dennis Chen

On Sun, Aug 24, 2014 at 5:38 PM, Gleb Natapov  wrote:
> On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
>> This patch is used to construct the eptp in vmx mode with values
>> readed from MSR according to the intel x86 software developer's
>> manual.
>>
>>  static u64 construct_eptp(unsigned long root_hpa)
>>  {
>> -u64 eptp;
>> +u64 eptp, pwl;
>> +
>> +if (cpu_has_vmx_ept_4levels())
>> +pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>> +else {
>> +WARN(1, "Unsupported page-walk length of 4.\n");
> Page-walk length of 4 is the only one that is supported.
>
Since there is a bit 6 in IA32_VMX_EPT_VPID_CAP MSR indicating the
support for the page-walk length, I think sanity check is necessary.
But I just checked the code, it's already done in the hardware_setup()
function which will disable ept feature if the page-wake length is not
4. Gleb, any comments for the memory type check part?

>> +BUG();
>> +}
>> +
>> +if (cpu_has_vmx_eptp_writeback())
>> +eptp = VMX_EPT_DEFAULT_MT | pwl;
>> +else if (cpu_has_vmx_eptp_uncacheable())
>> +eptp = VMX_EPT_UC_MT | pwl;
>> +else {
>> +WARN(1, "Unsupported memory type config in vmx eptp.\n");
>> +BUG();
>> +}
>>
>> -/* TODO write the value reading from MSR */
>> -eptp = VMX_EPT_DEFAULT_MT |
>> -VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>>  if (enable_ept_ad_bits)
>>  eptp |= VMX_EPT_AD_ENABLE_BIT;
>>  eptp |= (root_hpa & PAGE_MASK);
>> --
>> 1.7.9.5
>
> --
> Gleb.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Bug 3.14.17] inconsistent lock state

2014-08-24 Thread Linus Torvalds

On Sun, Aug 24, 2014 at 7:53 PM, Lan Tianyu  wrote:
>
> Sorry about this. We are resolving the issue in the other bug
> report(https://lkml.org/lkml/2014/8/21/606) and I have proposed a fix
> patch(http://marc.info/?l=linux-acpi=140869309231199=2).

Ahh. Good. That patch looks fine to me, and while it makes me worry a
bit that some codepath expects the power/sleep button to be handled
immediately in interrupt context, I guess the actual callbacks have
never actually done anything but schedule other things to happen (ie
add events to some queue), and making the context be the same as the
other notify callbacks would seem to be a good thing regardless of
this particular bug.

Knut - can you please test the patch Lan pointed at? I realize it
doesn't seem to be entirely consistent for you (which is a bit
surprising, I wonder why lockdep doesn't trigger it consistently), but
it would be good to have more testing. Even if that patch looks
"obviously good" (tm) at a quick glance.

  Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 2/4] tuntap: Publish tuntap maximum number of queues as module_param

2014-08-24 Thread Jason Wang

On 08/24/2014 07:14 PM, Michael S. Tsirkin wrote:
> On Fri, Aug 22, 2014 at 07:52:22AM -0400, Pankaj Gupta wrote:
>>> On 08/20/2014 07:17 PM, Michael S. Tsirkin wrote:
 On Wed, Aug 20, 2014 at 12:58:17PM +0200, Jiri Pirko wrote:
>> Mon, Aug 18, 2014 at 03:37:18PM CEST, pagu...@redhat.com wrote:
 This patch publishes maximum number of tun/tap queues allocated as a
 read_only module parameter which a user space application like
 libvirt
 can make use of to limit maximum number of queues. Value of read_only
 module parameter can be writable only at module load time. If no
 value is set
 at module load time a default value 256 is used which is equal to
 maximum number
 of vCPUS allowed by KVM.

 Administrator can specify maximum number of queues only at the driver
 module load time.

 Signed-off-by: Pankaj Gupta 
 ---
 drivers/net/tun.c |   13 +++--
 1 files changed, 11 insertions(+), 2 deletions(-)

 diff --git a/drivers/net/tun.c b/drivers/net/tun.c
 index acaaf67..1f518e2 100644
 --- a/drivers/net/tun.c
 +++ b/drivers/net/tun.c
 @@ -119,6 +119,9 @@ struct tap_filter {

 #define TUN_FLOW_EXPIRE (3 * HZ)

 +static int max_tap_queues = MAX_TAP_QUEUES;
 +module_param(max_tap_queues, int, S_IRUGO);
>> Please do not introduce new module paramaters. Please other ways to
>> interchange values with userspace.
 I suggested this initially, but thinking more about it, I agree.

 It's a global limit (necessary to limit memory utilization by
 userspace), but it should be possible to change it
 after module load.
>>> How about pass this limit through ifr during TUNSETIFF, then
>>> alloc_netdev_mq() can use this limit.
>> Any other ideas/comments from the experts. Or shall I re-repost other 
>> patches 
>> in the series except this patch until we agree on one.
>>
> It's kind of useless without a way for userspace to discover
> how many queues it can create, no?
>

We can implement ethtool_get_channels for tuntap. But I'm still not
clear why this is necessary.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [Bug 3.14.17] inconsistent lock state

2014-08-24 Thread Lan Tianyu

On 2014年08月25日 01:50, Linus Torvalds wrote:
> Rafael? Lan Tianyu? This is not some minor locking bug. This is a
> *major* mistake unless I misread something.
> 

Hi Linus:

Sorry about this. We are resolving the issue in the other bug
report(https://lkml.org/lkml/2014/8/21/606) and I have proposed a fix
patch(http://marc.info/?l=linux-acpi=140869309231199=2).

It's my fault. ACPI button notify callback will be called in the
interrupt context when the button device is enumerated from ACPI FADT
table(So called fixed button device). The ACPI button device also can be
enumerated from ACPI namespace and its callback will be run in the
process context just like other ACPI devices' notify callbacks. These
two kind of butt devices uses the same callback. Originally, I assumed
all ACPI notify callbacks were run in the process context and didn't
check whether netlink routine can use in the interrupt context or not.
Sorry again.

> Linus
> 

-- 
Best regards
Tianyu Lan
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH net-next 2/2] net: exit busy loop when another process is runnable

2014-08-24 Thread Jason Wang

On 08/22/2014 10:16 PM, Eric Dumazet wrote:
> On Fri, 2014-08-22 at 17:08 +0800, Jason Wang wrote:
>
>> > But this is just for current process. We want to determine whether or
>> > not it was worth to loop busily in current process by checking if
>> > there's any another runnable processes or callbacks. And what we need
>> > here is just a simple and lockless hint which can't be wrong but may be
>> > inaccurate to exit the busy loop. The net code does not depends on this
>> > hint to do scheduling or yielding.
>> > 
>> > How about just introducing a boolean helper like current_can_busy_loop()
>> > and return true in one of the following conditions:
>> > 
>> > - Current task is SCHED_FIFO
>> > - Current task is neither SCHED_FIFO nor SCHED_IDLE and no other
>> > runnable processes or pending RCU callbacks in current cpu
>> > 
>> > And add warns to make sure it can only be called in process context.
> 1) Any reasons Eliezer Tamir is not included in the CC list ?
>
>He is the busypoll author after all, and did nothing wrong to be
> banned from these patches ;)
CC Eliezer Tamir


No intentional, I just generate the CC list through get_maintainer.pl :(
>
> 2) It looks like sk_buy_loop() should not be inlined, its is already too
> big.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH v4 6/14] input: cyapa: add gen3 trackpad device basic functions support

2014-08-24 Thread Dudley Du

Hi Dmitry,

> -Original Message-
> From: Dmitry Torokhov [mailto:dmitry.torok...@gmail.com]
> Sent: Saturday, August 23, 2014 7:55 AM
> To: Dudley Du
> Cc: Rafael J. Wysocki; Benson Leung; Patrik Fimml; 
> linux-in...@vger.kernel.org;
> linux-kernel@vger.kernel.org; Dudley Du
> Subject: Re: [PATCH v4 6/14] input: cyapa: add gen3 trackpad device basic
> functions support
> 
> On Thu, Jul 17, 2014 at 02:53:48PM +0800, Dudley Du wrote:
> > Based on the cyapa core, add the gen3 trackpad device's basic functions
> > supported, so gen3 trackpad device can work with kernel input system.
> > The basic function is absolutely same as previous cyapa driver only
> > support gen3 trackpad device.
> > TEST=test on Chromebooks.
> >
> > Signed-off-by: Dudley Du 
> > ---
> >  drivers/input/mouse/Makefile |2 +-
> >  drivers/input/mouse/cyapa.c  |   96 -
> >  drivers/input/mouse/cyapa.h  |1 +
> >  drivers/input/mouse/cyapa_gen3.c |  784
> ++
> >  4 files changed, 881 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/input/mouse/cyapa_gen3.c
> >
> > diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
> > index 8608eb7..63b42e0 100644
> > --- a/drivers/input/mouse/Makefile
> > +++ b/drivers/input/mouse/Makefile
> > @@ -35,4 +35,4 @@ psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT)+= trackpoint.o
> >  psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT)   += touchkit_ps2.o
> >  psmouse-$(CONFIG_MOUSE_PS2_CYPRESS)+= cypress_ps2.o
> >
> > -cyapatp-y := cyapa.o
> > +cyapatp-y := cyapa.o cyapa_gen3.o
> > diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
> > index ae24b02..5c62503 100644
> > --- a/drivers/input/mouse/cyapa.c
> > +++ b/drivers/input/mouse/cyapa.c
> > @@ -113,6 +113,15 @@ ssize_t cyapa_i2c_write(struct cyapa *cyapa, u8 reg,
> >
> >  void cyapa_default_irq_handler(struct cyapa *cyapa)
> >  {
> > +   bool cont;
> > +
> > +   /* Interrupt triggerred by command response in detecting. */
> > +   cont = true;
> > +   if (cyapa_gen3_ops.irq_cmd_handler)
> > +   cont = cyapa_gen3_ops.irq_cmd_handler(cyapa);
> 
> Why not simply
> 
>   cont = cyapa->ops->irq_cmd_handler(cyapa)?


When this the default irq handler is called, at this time, means the device
haven't been initialized yet, the cyapa->ops hasn't been setup correctly, and
the cyapa->ops->irq_cmd_handler is NULL.
So currently, maybe the driver is still on detecting, which device module should
be setup to cyapa->ops is unknown yet. But like gen5 device, it will execute 
commands
that based on interrupt signals, so the irq_cmd_handler() must be executed
for each module to complete the possible command response when an interrupt 
comes.
So, here, for each device module, their irq_cmd_handler() should be called 
directly.


> 
> 
> > +   if (!cont)
> > +   return;
> > +
> > /*
> >  * Do redetecting when device states is still unknown and
> >  * interrupt envent is received from device.
> > @@ -252,6 +261,9 @@ static int cyapa_check_is_operational(struct cyapa
> *cyapa)
> > return ret;
> >
> > switch (cyapa->gen) {
> > +   case CYAPA_GEN3:
> > +   cyapa->ops = _gen3_ops;
> > +   break;
> > default:
> > cyapa->ops = _default_ops;
> > cyapa->gen = CYAPA_GEN_UNKNOWN;
> > @@ -314,9 +326,85 @@ out:
> >   */
> >  static int cyapa_get_state(struct cyapa *cyapa)
> >  {
> > +   int ret;
> > +   u8 status[BL_STATUS_SIZE];
> > +   u8 cmd[32];
> > +   /* The i2c address of gen4 and gen5 trackpad device must be even. */
> > +   bool even_addr = ((cyapa->client->addr & 0x0001) == 0);
> > +   bool smbus = false;
> > +   int retries = 2;
> > +
> > cyapa->state = CYAPA_STATE_NO_DEVICE;
> >
> > -   return -ENODEV;
> > +   /*
> > +* Get trackpad status by reading 3 registers starting from 0.
> > +* If the device is in the bootloader, this will be BL_HEAD.
> > +* If the device is in operation mode, this will be the DATA regs.
> > +*
> > +*/
> > +   ret = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE,
> > +  status);
> > +
> > +   /*
> > +* On smbus systems in OP mode, the i2c_reg_read will fail with
> > +* -ETIMEDOUT.  In this case, try again using the smbus equivalent
> > +* command.  This should return a BL_HEAD indicating CYAPA_STATE_OP.
> > +*/
> > +   if (cyapa->smbus && (ret == -ETIMEDOUT || ret == -ENXIO)) {
> > +   if (!even_addr)
> > +   ret = cyapa_read_block(cyapa,
> > +   CYAPA_CMD_BL_STATUS, status);
> > +   smbus = true;
> > +   }
> > +   if (ret != BL_STATUS_SIZE)
> > +   goto error;
> > +
> > +   /*
> > +* Detect trackpad protocol based on characristic registers and bits.
> > +*/
> > +   do {
> > +   cyapa->status[REG_OP_STATUS] = status[REG_OP_STATUS];
> > +   cyapa->status[REG_BL_STATUS] =

RE: [PATCH v4 5/14] input: cyapa: add read firmware image and raw data interfaces in debugfs system

2014-08-24 Thread Dudley Du

Hi Dmitry,

> -Original Message-
> From: Dmitry Torokhov [mailto:dmitry.torok...@gmail.com]
> Sent: Saturday, August 23, 2014 7:52 AM
> To: Dudley Du
> Cc: Rafael J. Wysocki; Benson Leung; Patrik Fimml; 
> linux-in...@vger.kernel.org;
> linux-kernel@vger.kernel.org; Dudley Du
> Subject: Re: [PATCH v4 5/14] input: cyapa: add read firmware image and raw
> data interfaces in debugfs system
> 
> Hi Dudley,
> 
> On Thu, Jul 17, 2014 at 02:52:36PM +0800, Dudley Du wrote:
> > Add read_fw and raw_data debugfs interfaces for easier issues location
> > and collection when report by user.
> > TEST=test on Chromebooks.
> >
> > Signed-off-by: Dudley Du 
> > ---
> >  drivers/input/mouse/cyapa.c |  219
> +++
> >  1 file changed, 219 insertions(+)
> >
> > diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
> > index 53c9d59..ae24b02 100644
> > --- a/drivers/input/mouse/cyapa.c
> > +++ b/drivers/input/mouse/cyapa.c
> > @@ -39,6 +39,8 @@
> >
> >  const char unique_str[] = "CYTRA";
> >
> > +/* Global root node of the cyapa debugfs directory. */
> > +static struct dentry *cyapa_debugfs_root;
> >
> >
> >  ssize_t cyapa_i2c_reg_read_block(struct cyapa *cyapa, u8 reg, size_t len,
> > @@ -461,6 +463,205 @@ done:
> >  }
> >
> >  /*
> > + **
> > + * debugfs interface
> > + **
> > +*/
> > +static int cyapa_debugfs_open(struct inode *inode, struct file *file)
> > +{
> > +   struct cyapa *cyapa = inode->i_private;
> > +   int ret;
> > +
> > +   if (!cyapa)
> > +   return -ENODEV;
> > +
> > +   ret = mutex_lock_interruptible(>debugfs_mutex);
> > +   if (ret)
> > +   return ret;
> > +
> > +   if (!kobject_get(>client->dev.kobj)) {
> 
> Why not get_device() here and elsewhere?

Thanks. I will use get_device()/put_device() instead.

> 
> > +   ret = -ENODEV;
> > +   goto out;
> > +   }
> > +
> > +   file->private_data = cyapa;
> > +
> > +   if (cyapa->fw_image) {
> > +   ret = 0;
> > +   goto out;
> > +   }
> > +
> > +   mutex_lock(>state_sync_lock);
> > +   /*
> > +* If firmware hasn't been read yet, read it all in one pass.
> > +* Subsequent opens will reuse the data in this same buffer.
> > +*/
> > +   if (cyapa->ops->read_fw)
> > +   ret = cyapa->ops->read_fw(cyapa);
> > +   else
> > +   ret = -EPERM;
> > +   mutex_unlock(>state_sync_lock);
> > +
> > +   /* Redetect trackpad device states. */
> > +   cyapa_detect_async(cyapa, 0);
> > +
> > +out:
> > +   mutex_unlock(>debugfs_mutex);
> > +   return ret;
> > +}
> > +
> > +static int cyapa_debugfs_release(struct inode *inode, struct file *file)
> > +{
> > +   struct cyapa *cyapa = file->private_data;
> > +   int ret;
> > +
> > +   if (!cyapa)
> > +   return 0;
> > +
> > +   ret = mutex_lock_interruptible(>debugfs_mutex);
> > +   if (ret)
> > +   return ret;
> > +   file->private_data = NULL;
> > +   kobject_put(>client->dev.kobj);
> > +   mutex_unlock(>debugfs_mutex);
> > +
> > +   return 0;
> > +}
> > +
> > +/* Return some bytes from the buffered firmware image, starting from *ppos
> */
> > +static ssize_t cyapa_debugfs_read_fw(struct file *file, char __user 
> > *buffer,
> > +size_t count, loff_t *ppos)
> > +{
> > +   struct cyapa *cyapa = file->private_data;
> > +
> > +   if (!cyapa->fw_image)
> > +   return -EINVAL;
> > +
> > +   if (*ppos >= cyapa->fw_image_size)
> > +   return 0;
> > +
> > +   if (count + *ppos > cyapa->fw_image_size)
> > +   count = cyapa->fw_image_size - *ppos;
> > +
> > +   if (copy_to_user(buffer, >fw_image[*ppos], count))
> > +   return -EFAULT;
> > +
> > +   *ppos += count;
> > +   return count;
> > +}
> > +
> > +static const struct file_operations cyapa_read_fw_fops = {
> > +   .open = cyapa_debugfs_open,
> > +   .release = cyapa_debugfs_release,
> > +   .read = cyapa_debugfs_read_fw
> > +};
> > +
> > +static int cyapa_debugfs_raw_data_open(struct inode *inode, struct file
> *file)
> > +{
> > +   struct cyapa *cyapa = inode->i_private;
> > +   int ret;
> > +
> > +   if (!cyapa)
> > +   return -ENODEV;
> > +
> > +   /* Start to be supported after Gen5 trackpad devices. */
> > +   if (cyapa->gen < CYAPA_GEN5)
> > +   return -ENOTSUPP;
> > +
> > +   ret = mutex_lock_interruptible(>debugfs_mutex);
> > +   if (ret)
> > +   return ret;
> > +
> > +   if (!kobject_get(>client->dev.kobj)) {
> > +   ret = -ENODEV;
> > +   goto out;
> > +   }
> > +
> > +   file->private_data = cyapa;
> > +
> > +   mutex_lock(>state_sync_lock);
> > +   if (cyapa->ops->read_raw_data)
> > +   ret = cyapa->ops->read_raw_data(cyapa);
> > +   else
> > +   ret = -EPERM;
> > +   mutex_unlock(>state_sync_lock);
> > +out:
> > +   mutex_unlock(>debugfs_mutex);
> > +   return ret;
> > +}
> > +
> > +static

Re: [PATCH v2 2/5] MFD: RK808: Add new mfd driver for RK808

2014-08-24 Thread Chris Zhong



On 08/20/2014 05:21 PM, Lee Jones wrote:

On Wed, 20 Aug 2014, Chris Zhong wrote:


The RK808 chip is a power management IC for multimedia and handheld
devices. It contains the following components:

- Regulators
- RTC

The rk808 core driver is registered as a platform driver and provides
communication through I2C with the host device for the different
components.

Signed-off-by: Chris Zhong 

---

Changes in v2:
Adviced by Mark Browm:
- change of_find_node_by_name to find_child_by_name
- use RK808_NUM_REGULATORS as the name of the constant
- create a pdata when missing platform data
- use the rk808_reg name to supply_regulator name
- replace regulator_register with devm_regulator_register
- some other problem with coding style

  drivers/mfd/Kconfig   |   13 ++
  drivers/mfd/Makefile  |1 +
  drivers/mfd/rk808.c   |  297 +
  include/linux/mfd/rk808.h |  219 +
  4 files changed, 530 insertions(+)
  create mode 100644 drivers/mfd/rk808.c
  create mode 100644 include/linux/mfd/rk808.h

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index de5abf2..1df133e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -582,6 +582,19 @@ config MFD_RC5T583
  Additional drivers must be enabled in order to use the
  different functionality of the device.
  
+config MFD_RK808

+   tristate "Rockchip RK808 Power Management chip"
+   depends on I2C
+   select MFD_CORE
+   select REGMAP_I2C
+   select REGMAP_IRQ
+   help

<-- Use more of the allotted space 
--->


+ Select this option to get support for the RK808 Power
+ Management system device.

What's a 'system device', and how does that differ to a controller?


+ This driver provides common support for accessing the device
+ through i2c interface. The device supports multiple sub-devices

s/i2c/I2C/


+ like interrupts, RTC, LDO and DCDC regulators, onkey.

s/like/including/

I would s/and/&/, then put an "and" before "onkey".

this, this, this 'and' that.


  config MFD_SEC_CORE
bool "SAMSUNG Electronics PMIC Series Support"
depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index f001487..dbc28e7 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -160,6 +160,7 @@ obj-$(CONFIG_MFD_INTEL_MSIC)+= intel_msic.o
  obj-$(CONFIG_MFD_PALMAS)  += palmas.o
  obj-$(CONFIG_MFD_VIPERBOARD)+= viperboard.o
  obj-$(CONFIG_MFD_RC5T583) += rc5t583.o rc5t583-irq.o
+obj-$(CONFIG_MFD_RK808)+= rk808.o
  obj-$(CONFIG_MFD_SEC_CORE)+= sec-core.o sec-irq.o
  obj-$(CONFIG_MFD_SYSCON)  += syscon.o
  obj-$(CONFIG_MFD_LM3533)  += lm3533-core.o lm3533-ctrlbank.o
diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c
new file mode 100644
index 000..667cfdf
--- /dev/null
+++ b/drivers/mfd/rk808.c
@@ -0,0 +1,297 @@
+/*
+ * Mfd core driver for Rockchip RK808

s/Mfd/MFD


+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * Author: Chris Zhong 
+ * Author: Zhang Qing 
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *

Remove this line.


+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 

I'm pretty sure you don't need all of these includes.

Remove the ones you're not using.


+struct rk808_reg_data {
+   int addr;
+   int mask;
+   int value;
+};
+
+static struct rk808 *g_rk808;

Grim.


+static struct resource rtc_resources[] = {
+   {
+   .start  = RK808_IRQ_RTC_ALARM,
+   .end= RK808_IRQ_RTC_ALARM,
+   .flags  = IORESOURCE_IRQ,
+   }
+};
+
+static const struct mfd_cell rk808s[] = {
+   {
+   .name = "rk808-regulator",
+   },
+   {
+   .name = "rk808-rtc",
+   .num_resources = ARRAY_SIZE(rtc_resources),
+   .resources = _resources[0],
+   },
+   {
+   .name = "rk808-clkout",
+   },

Can you reorder these, with the single liners at the start and
actually make them one line, so:

   { .name = "rk808-clkout" },
   { .name = "rk808-regulator" },
   {
.name = "rk808-rtc",
.num_resources = ARRAY_SIZE(rtc_resources),
.resources = _resources[0],
   },

This also happens to be alphabetical.


+};
+
+static const struct

Re: [PATCH v2] memory-hotplug: add sysfs zones_online_to attribute

2014-08-24 Thread Zhang Zhen

On 2014/8/23 6:16, Andrew Morton wrote:
> On Mon, 18 Aug 2014 11:25:36 +0800 Zhang Zhen  
> wrote:
> 
>> On 2014/8/16 5:37, Toshi Kani wrote:
>>> On Wed, 2014-08-13 at 12:10 +0800, Zhang Zhen wrote:
 Currently memory-hotplug has two limits:
 1. If the memory block is in ZONE_NORMAL, you can change it to
 ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE.
 2. If the memory block is in ZONE_MOVABLE, you can change it to
 ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL.

 With this patch, we can easy to know a memory block can be onlined to
 which zone, and don't need to know the above two limits.

 Updated the related Documentation.

 Change v1 -> v2:
 - optimize the implementation following Dave Hansen's suggestion

 Signed-off-by: Zhang Zhen 
 ---
  Documentation/ABI/testing/sysfs-devices-memory |  8 
  Documentation/memory-hotplug.txt   |  4 +-
  drivers/base/memory.c  | 62 
 ++
  include/linux/memory_hotplug.h |  1 +
  mm/memory_hotplug.c|  2 +-
  5 files changed, 75 insertions(+), 2 deletions(-)

 diff --git a/Documentation/ABI/testing/sysfs-devices-memory 
 b/Documentation/ABI/testing/sysfs-devices-memory
 index 7405de2..2b2a1d7 100644
 --- a/Documentation/ABI/testing/sysfs-devices-memory
 +++ b/Documentation/ABI/testing/sysfs-devices-memory
 @@ -61,6 +61,14 @@ Users:  hotplug memory remove tools

 http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils


 +What:   /sys/devices/system/memory/memoryX/zones_online_to
>>>
>>> I think this name is a bit confusing.  How about "valid_online_types"?
>>>
>> Thanks for your suggestion.
>>
>> This patch has been added to -mm tree.
>> If most people think so, i would like to modify the interface name.
>> If not, let's leave it as it is.
> 
> Yes, the name could be better.  Do we actually need "online" in there? 
> How about "valid_zones"?

Ok, i will change it to valid_zones.
> 
> Also, it's not really clear to me why we need this sysfs file at all. 
> Do people really read sysfs files, make onlining decisions and manually
> type in commands?  Or is this stuff all automated?  If the latter then
> the script can take care of all this?  For example, attempt to online
> the memory into the desired zone and report failure if that didn't
> succeed?

Just like Dave Hansen says, the scripts should be changed when we add a new
zone type. And ZONE_MOVABLE may be missed by the scripts writer.
> 
> IOW, please update the changelog to show
> 
> a) example output from
>/sys/devices/system/memory/memoryX/whatever-we-call-it and
> 
> b) example use-cases which help reviewers understand why this
>feature will be valuable to users.

Sorry, this patch has been added to -next tree. I can't modify the changelog.
> 
> Also, please do address the error which Yasuaki Ishimatsu identified.
> 
Yeah, i have been waiting for 
http://ozlabs.org/~akpm/mmots/broken-out/memory-hotplug-add-sysfs-zones_online_to-attribute-fix-2.patch
added to -mm tree.
So i can send a patch based on -mm tree to address the error which Yasuaki 
Ishimatsu identified.
Otherwise, conflicts may occur.

Thanks!
> 
> .
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] Regulator: RK808: modify for struct rk808 change in v4

2014-08-24 Thread Chris Zhong

The "dev" has been deleted from "struct rk808" in rk808 mfd driver v4,
so rk808->dev should be replaced >dev here.

Signed-off-by: Chris Zhong 
---

 drivers/regulator/rk808-regulator.c |   34 ++
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/drivers/regulator/rk808-regulator.c 
b/drivers/regulator/rk808-regulator.c
index 4d5041c..f00d6d8 100644
--- a/drivers/regulator/rk808-regulator.c
+++ b/drivers/regulator/rk808-regulator.c
@@ -296,13 +296,14 @@ static struct of_regulator_match rk808_reg_matches[] = {
 
 static int rk808_regulator_dts(struct rk808 *rk808)
 {
-   struct rk808_board *pdata = rk808->pdata;
struct device_node *np, *reg_np;
+   struct i2c_client *client = rk808->i2c;
+   struct rk808_board *pdata = rk808->pdata;
int i, ret;
 
-   np = rk808->dev->of_node;
+   np = client->dev.of_node;
if (!np) {
-   dev_err(rk808->dev, "could not find pmic sub-node\n");
+   dev_err(>dev, "could not find pmic sub-node\n");
return -ENXIO;
}
 
@@ -310,10 +311,10 @@ static int rk808_regulator_dts(struct rk808 *rk808)
if (!reg_np)
return -ENXIO;
 
-   ret = of_regulator_match(rk808->dev, reg_np, rk808_reg_matches,
+   ret = of_regulator_match(>dev, reg_np, rk808_reg_matches,
 RK808_NUM_REGULATORS);
-   if (ret  < 0) {
-   dev_err(rk808->dev,
+   if (ret < 0) {
+   dev_err(>dev,
"failed to parse regulator data: %d\n", ret);
return ret;
}
@@ -333,24 +334,17 @@ static int rk808_regulator_dts(struct rk808 *rk808)
 static int rk808_regulator_probe(struct platform_device *pdev)
 {
struct rk808 *rk808 = dev_get_drvdata(pdev->dev.parent);
-   struct rk808_board *pdata;
+   struct i2c_client *client = rk808->i2c;
+   struct rk808_board *pdata = rk808->pdata;
struct regulator_config config = {};
struct regulator_dev *rk808_rdev;
struct regulator_init_data *reg_data;
int i = 0;
int ret = 0;
 
-   dev_dbg(rk808->dev, "%s\n", __func__);
-
-   if (!rk808) {
-   dev_err(rk808->dev, "%s no rk808\n", __func__);
-   return -ENODEV;
-   }
-
-   pdata = rk808->pdata;
if (!pdata) {
-   dev_warn(rk808->dev, "%s no pdata, create it\n", __func__);
-   pdata = devm_kzalloc(rk808->dev, sizeof(*pdata), GFP_KERNEL);
+   dev_warn(>dev, "%s no pdata, create it\n", __func__);
+   pdata = devm_kzalloc(>dev, sizeof(*pdata), GFP_KERNEL);
if (!pdata)
return -ENOMEM;
}
@@ -371,11 +365,11 @@ static int rk808_regulator_probe(struct platform_device 
*pdev)
if (!reg_data)
continue;
 
-   config.dev = rk808->dev;
+   config.dev = >dev;
config.driver_data = rk808;
config.regmap = rk808->regmap;
 
-   if (rk808->dev->of_node)
+   if (client->dev.of_node)
config.of_node = pdata->of_node[i];
 
reg_data->supply_regulator = rk808_reg[i].name;
@@ -384,7 +378,7 @@ static int rk808_regulator_probe(struct platform_device 
*pdev)
rk808_rdev = devm_regulator_register(>dev,
 _reg[i], );
if (IS_ERR(rk808_rdev)) {
-   dev_err(rk808->dev,
+   dev_err(>dev,
"failed to register %d regulator\n", i);
return PTR_ERR(rk808_rdev);
}
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/1] ipc/shm: fix the historical/wrong mm->start_stack check

2014-08-24 Thread Hugh Dickins

On Sat, 23 Aug 2014, Oleg Nesterov wrote:
> On 08/23, Cyrill Gorcunov wrote:
> >
> > On Sat, Aug 23, 2014 at 04:43:27PM +0200, Oleg Nesterov wrote:
> > > The ->start_stack check in do_shmat() looks ugly and simply wrong.
> > >
> > > 1. ->start_stack is only valid right after exec(), the application
> > >can switch to another stack and even unmap this area.
> > >
> > > 2. The reason for this check is not clear at all. The application
> > >should know what it does. And why 4 pages? And why in fact it
> > >requires 5 pages?
> > >
> > > 3. This wrongly assumes that the stack can only grown down.
> > >
> > > Personally I think we should simply kill this check, but I did not
> > > dare to do this. So the patch only fixes the 1st problem (mostly to
> > > avoid the usage of mm->start_stack) and ignores VM_GROWSUP.
> > >
> > > Signed-off-by: Oleg Nesterov 
> > Reviewed-by: Cyrill Gorcunov 

Yes, much better to use find_vma than have this strange stray use
of unreliable start_stack.

Acked-by: Hugh Dickins 

though like Manfred I didn't quite see how overflow was impossible
on unfamiliar architectures.

> 
> Thanks!
> 
> > I don't understand this check either, the comment above it says nothing
> > but only commits what code is doing not explaining why.
> 
> Yes, and this check predates the git history. I even looked into
> git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git but this
> change was added by the huge "v2.5.0.7 -> v2.5.0.8" update in 2002,
> and obviously without any explanation (apart from "fix up proper shmat
> semantics", but this connects SHM_REMAP itself).

I'd say it comes earlier, from Christoph Rohland's 2.4.17-pre7's
"Add missing checks on shmat()", though I didn't find more than that.

We can all understand wanting to leave a gap below the growsdown stack,
but of course could argue about growsup and 1 or 4 or 5 or whatever:
okay that we're all more interested in just removing that start_stack.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] net: ec_bhf: remove excessive debug messages

2014-08-24 Thread David Miller

From: Dariusz Marcinkiewicz 
Date: Sun, 24 Aug 2014 20:40:16 +0200

> This cuts down the number of debug information spit out by
> the driver.
> 
> Signed-off-by: Dariusz Marcinkiewicz 

Applied, thank you.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC 4/4] tuntap: Increase the number of queues in tun

2014-08-24 Thread David Gibson

On Mon, 18 Aug 2014 19:07:20 +0530
Pankaj Gupta  wrote:

> Networking under kvm works best if we allocate a per-vCPU RX and TX
> queue in a virtual NIC. This requires a per-vCPU queue on the host side.
> 
> It is now safe to increase the maximum number of queues.
> Preceding patches:
>   net: allow large number of rx queues
>   tuntap: Reduce the size of tun_struct by using flex array
>   tuntap: Publish tuntap max queue length as module_param
> 
>   made sure this won't cause failures due to high order memory
> allocations. Increase it to 256: this is the max number of vCPUs
> KVM supports.
> 
> Signed-off-by: Pankaj Gupta 

Reviewed-by: David Gibson 

-- 
David Gibson 


pgpmDvZxtxaFE.pgp
Description: PGP signature

Re: [RFC 3/4] tuntap: reduce the size of tun_struct by using flex array

2014-08-24 Thread David Gibson

On Mon, 18 Aug 2014 19:07:19 +0530
Pankaj Gupta  wrote:

> This patch switches to flex array to implement the flow caches, it brings
> several advantages:
> 
> - Reduce the size of the tun_struct structure, which allows us to increase the
>   upper limit of queues in future.
> - Avoid higher order memory allocation. It will be useful when switching to
>   pure hashing in flow cache which may demand a larger size array in future.
> 
> After this patch, the size of tun_struct on x86_64 reduced from 8512 to
> 328
> 
> Signed-off-by: Jason Wang 
> Signed-off-by: Pankaj Gupta 

Reviewed-by: David Gibson 

-- 
David Gibson 


pgpTa7rXkQckb.pgp
Description: PGP signature

[GIT PULL] SH Drivers Updates For v3.17

2014-08-24 Thread Simon Horman

Hi Linus,

Please consider these SH drivers updates for v3.17.

I realise these are slightly late in the rc cycle so
please don't hesitate to ask me to defer them to v3.18.


The following changes since commit 7d1311b93e58ed55f3a31cc8f94c4b8fe988a2b9:

  Linux 3.17-rc1 (2014-08-16 10:40:26 -0600)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git 
tags/renesas-sh-drivers-for-v3.17

for you to fetch changes up to 049d28048be595e0a10a58fe1c104b153c386633:

  sh: intc: Confine SH_INTC to platforms that need it (2014-08-22 12:28:16 
+0900)


SH Drivers Updates For v3.17

* Confine SH_INTC to platforms that need it


Geert Uytterhoeven (1):
  sh: intc: Confine SH_INTC to platforms that need it

 arch/arm/mach-shmobile/Kconfig | 2 ++
 arch/sh/Kconfig| 3 +++
 drivers/sh/Makefile| 3 +--
 drivers/sh/intc/Kconfig| 6 +-
 4 files changed, 11 insertions(+), 3 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] sh: intc: Confine SH_INTC to platforms that need it

2014-08-24 Thread Simon Horman

From: Geert Uytterhoeven 

Currently the sh-intc driver is compiled on all SuperH and
non-multiplatform SH-Mobile platforms, while it's only used on a limited
number of platforms:
  - SuperH: SH2(A), SH3(A), SH4(A)(L) (all but SH5)
  - ARM: sh7372, sh73a0

Drop the "default y" on SH_INTC, make all CPU platforms that use it
select it, and protect all sub-options by "if SH_INTC" to fix this.

Signed-off-by: Geert Uytterhoeven 
Acked-by: Magnus Damm 
Signed-off-by: Simon Horman 
---
 arch/arm/mach-shmobile/Kconfig | 2 ++
 arch/sh/Kconfig| 3 +++
 drivers/sh/Makefile| 3 +--
 drivers/sh/intc/Kconfig| 6 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig
index e15dff7..1e6c51c 100644
--- a/arch/arm/mach-shmobile/Kconfig
+++ b/arch/arm/mach-shmobile/Kconfig
@@ -75,6 +75,7 @@ config ARCH_SH7372
select ARM_CPU_SUSPEND if PM || CPU_IDLE
select CPU_V7
select SH_CLK_CPG
+   select SH_INTC
select SYS_SUPPORTS_SH_CMT
select SYS_SUPPORTS_SH_TMU
 
@@ -85,6 +86,7 @@ config ARCH_SH73A0
select CPU_V7
select I2C
select SH_CLK_CPG
+   select SH_INTC
select RENESAS_INTC_IRQPIN
select SYS_SUPPORTS_SH_CMT
select SYS_SUPPORTS_SH_TMU
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 453fa5c..b319846 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -172,6 +172,7 @@ menu "System type"
 #
 config CPU_SH2
bool
+   select SH_INTC
 
 config CPU_SH2A
bool
@@ -182,6 +183,7 @@ config CPU_SH3
bool
select CPU_HAS_INTEVT
select CPU_HAS_SR_RB
+   select SH_INTC
select SYS_SUPPORTS_SH_TMU
 
 config CPU_SH4
@@ -189,6 +191,7 @@ config CPU_SH4
select CPU_HAS_INTEVT
select CPU_HAS_SR_RB
select CPU_HAS_FPU if !CPU_SH4AL_DSP
+   select SH_INTC
select SYS_SUPPORTS_SH_TMU
select SYS_SUPPORTS_HUGETLBFS if MMU
 
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index 788ed9b..114203f 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -1,8 +1,7 @@
 #
 # Makefile for the SuperH specific drivers.
 #
-obj-$(CONFIG_SUPERH)   += intc/
-obj-$(CONFIG_ARCH_SHMOBILE_LEGACY) += intc/
+obj-$(CONFIG_SH_INTC)  += intc/
 ifneq ($(CONFIG_COMMON_CLK),y)
 obj-$(CONFIG_HAVE_CLK) += clk/
 endif
diff --git a/drivers/sh/intc/Kconfig b/drivers/sh/intc/Kconfig
index 60228fa..6a1b05d 100644
--- a/drivers/sh/intc/Kconfig
+++ b/drivers/sh/intc/Kconfig
@@ -1,7 +1,9 @@
 config SH_INTC
-   def_bool y
+   bool
select IRQ_DOMAIN
 
+if SH_INTC
+
 comment "Interrupt controller options"
 
 config INTC_USERIMASK
@@ -37,3 +39,5 @@ config INTC_MAPPING_DEBUG
  between system IRQs and the per-controller id tables.
 
  If in doubt, say N.
+
+endif
-- 
2.0.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv2 1/5] rtc: s3c: Define s3c_rtc structure to remove global variables.

2014-08-24 Thread Chanwoo Choi

Dear Andrew, 

On 08/23/2014 05:42 AM, Andrew Morton wrote:
> On Tue, 12 Aug 2014 11:01:07 +0900 y...@samsung.com wrote:
> 
>> This patch define s3c_rtc structure including necessary variables for S3C RTC
>> device instead of global variables. This patch improves the readability by
>> removing global variables.
> 
> Below is the v1->v2 delta.
> 
> Why were all those tests of info->base added?  Can it really be zero? 
> I don't see how.

If some functions (e.g., s3c_rtc_settime) accesses the rtc register
by using info->base before the initialization of info->base in s3c_rtc_probe,
I thought that null pointer error would happen.

But, I missed one point which info->base might have the garbate data instead of 
NULL.
I'll add the initialization code for info->base.
info->base = NULL;

If you don't agree it, I'll drop this code checking the state of info->base on 
next patchset(v3).

Best Regads,
Chanwoo Choi

> 
> --- 
> a/drivers/rtc/rtc-s3c.c~rtc-s3c-define-s3c_rtc-structure-to-remove-global-variables-v2
> +++ a/drivers/rtc/rtc-s3c.c
> @@ -121,6 +121,9 @@ static int s3c_rtc_setaie(struct device
>   struct s3c_rtc *info = dev_get_drvdata(dev);
>   unsigned int tmp;
>  
> + if (!info->base)
> + return -EINVAL;
> +
>   dev_dbg(info->dev, "%s: aie=%d\n", __func__, enabled);
>  
>   clk_enable(info->rtc_clk);
> @@ -180,6 +183,9 @@ static int s3c_rtc_gettime(struct device
>   struct s3c_rtc *info = dev_get_drvdata(dev);
>   unsigned int have_retried = 0;
>  
> + if (!info->base)
> + return -EINVAL;
> +
>   clk_enable(info->rtc_clk);
>   retry_get_time:
>   rtc_tm->tm_min  = readb(info->base + S3C2410_RTCMIN);
> @@ -224,6 +230,9 @@ static int s3c_rtc_settime(struct device
>   struct s3c_rtc *info = dev_get_drvdata(dev);
>   int year = tm->tm_year - 100;
>  
> + if (!info->base)
> + return -EINVAL;
> +
>   dev_dbg(dev, "set time %04d.%02d.%02d %02d:%02d:%02d\n",
>1900 + tm->tm_year, tm->tm_mon, tm->tm_mday,
>tm->tm_hour, tm->tm_min, tm->tm_sec);
> @@ -255,6 +264,9 @@ static int s3c_rtc_getalarm(struct devic
>   struct rtc_time *alm_tm = >time;
>   unsigned int alm_en;
>  
> + if (!info->base)
> + return -EINVAL;
> +
>   clk_enable(info->rtc_clk);
>   alm_tm->tm_sec  = readb(info->base + S3C2410_ALMSEC);
>   alm_tm->tm_min  = readb(info->base + S3C2410_ALMMIN);
> @@ -317,6 +329,9 @@ static int s3c_rtc_setalarm(struct devic
>   struct rtc_time *tm = >time;
>   unsigned int alrm_en;
>  
> + if (!info->base)
> + return -EINVAL;
> +
>   clk_enable(info->rtc_clk);
>   dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n",
>alrm->enabled,
> @@ -357,6 +372,9 @@ static int s3c_rtc_proc(struct device *d
>   struct s3c_rtc *info = dev_get_drvdata(dev);
>   unsigned int ticnt;
>  
> + if (!info->base)
> + return -EINVAL;
> +
>   clk_enable(info->rtc_clk);
>   if (info->cpu_type == TYPE_S3C64XX) {
>   ticnt = readw(info->base + S3C2410_RTCCON);
> @@ -548,7 +566,7 @@ static int s3c_rtc_probe(struct platform
>   rtc_tm.tm_min   = 0;
>   rtc_tm.tm_sec   = 0;
>  
> - s3c_rtc_settime(NULL, _tm);
> + s3c_rtc_settime(>dev, _tm);
>  
>   dev_warn(>dev, "warning: invalid RTC value so 
> initializing it\n");
>   }
> _
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] PM / sleep: Asynchronous threads for dpm_complete

2014-08-24 Thread xiaoming wang

In analogy with commits 5af84b82701a and 97df8c12995,
using asynchronous threads can improve the overall
resume time significantly.

This patch is for dpm_complete phase.

Signed-off-by: Chuansheng Liu 
Signed-off-by: xiaoming wang 
---
 drivers/base/power/main.c |   38 ++
 1 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index f9fe1b3..00c4bf1 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -889,14 +889,15 @@ void dpm_resume(pm_message_t state)
  * @dev: Device to handle.
  * @state: PM transition of the system being carried out.
  */
-static void device_complete(struct device *dev, pm_message_t state)
+static void device_complete(struct device *dev, pm_message_t state, bool async)
 {
void (*callback)(struct device *) = NULL;
char *info = NULL;
 
if (dev->power.syscore)
-   return;
+   goto Complete;
 
+   dpm_wait(dev->parent, async);
device_lock(dev);
 
if (dev->pm_domain) {
@@ -928,6 +929,17 @@ static void device_complete(struct device *dev, 
pm_message_t state)
device_unlock(dev);
 
pm_runtime_put(dev);
+
+Complete:
+   complete_all(>power.completion);
+}
+
+static void async_complete(void *data, async_cookie_t cookie)
+{
+   struct device *dev = (struct device *)data;
+
+   device_complete(dev, pm_transition, true);
+   put_device(dev);
 }
 
 /**
@@ -940,27 +952,45 @@ static void device_complete(struct device *dev, 
pm_message_t state)
 void dpm_complete(pm_message_t state)
 {
struct list_head list;
+   struct device *dev;
 
trace_suspend_resume(TPS("dpm_complete"), state.event, true);
might_sleep();
 
INIT_LIST_HEAD();
mutex_lock(_list_mtx);
+   pm_transition = state;
+
+   /*
+ * Advanced the async threads upfront,
+ * in case the starting of async threads is
+ * delayed by non-async resuming devices.
+ */
+   list_for_each_entry(dev, _prepared_list, power.entry) {
+   reinit_completion(>power.completion);
+   if (is_async(dev)) {
+   get_device(dev);
+   async_schedule(async_complete, dev);
+   }
+   }
+
while (!list_empty(_prepared_list)) {
-   struct device *dev = to_device(dpm_prepared_list.prev);
+   dev = to_device(dpm_prepared_list.prev);
 
get_device(dev);
dev->power.is_prepared = false;
list_move(>power.entry, );
mutex_unlock(_list_mtx);
 
-   device_complete(dev, state);
+   if (!is_async(dev))
+   device_complete(dev, state, false);
 
mutex_lock(_list_mtx);
put_device(dev);
}
list_splice(, _list);
mutex_unlock(_list_mtx);
+   async_synchronize_full();
trace_suspend_resume(TPS("dpm_complete"), state.event, false);
 }
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] PM / sleep: Asynchronous threads for dpm_prepare

2014-08-24 Thread xiaoming wang

In analogy with commits 5af84b82701a and 97df8c12995,
using asynchronous threads can improve the overall
suspend time significantly.

This patch is for dpm_prepare phase.

Signed-off-by: Chuansheng Liu 
Signed-off-by: xiaoming wang 
---
 drivers/base/power/main.c |   57 +
 1 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index b67d9ae..f9fe1b3 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1531,15 +1531,24 @@ int dpm_suspend(pm_message_t state)
  * Execute the ->prepare() callback(s) for given device.  No new children of 
the
  * device may be registered after this function has returned.
  */
-static int device_prepare(struct device *dev, pm_message_t state)
+static int __device_prepare(struct device *dev, pm_message_t state, bool async)
 {
int (*callback)(struct device *) = NULL;
char *info = NULL;
int ret = 0;
 
+   if (async_error)
+   goto Complete;
+
+   if (pm_wakeup_pending()) {
+   async_error = -EBUSY;
+   goto Complete;
+   }
+
if (dev->power.syscore)
-   return 0;
+   goto Complete;
 
+   dpm_wait_for_children(dev, async);
/*
 * If a device's parent goes into runtime suspend at the wrong time,
 * it won't be possible to resume the device.  To prevent this we
@@ -1582,7 +1591,7 @@ static int device_prepare(struct device *dev, 
pm_message_t state)
if (ret < 0) {
suspend_report_result(callback, ret);
pm_runtime_put(dev);
-   return ret;
+   goto Complete;
}
/*
 * A positive return value from ->prepare() means "this device appears
@@ -1594,9 +1603,40 @@ static int device_prepare(struct device *dev, 
pm_message_t state)
spin_lock_irq(>power.lock);
dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
spin_unlock_irq(>power.lock);
-   return 0;
+
+Complete:
+   complete_all(>power.completion);
+   if (ret)
+   async_error = ret;
+
+   return ret;
+}
+
+static void async_prepare(void *data, async_cookie_t cookie)
+{
+   struct device *dev = (struct device *)data;
+   int error;
+
+   error = __device_prepare(dev, pm_transition, true);
+   if (error) {
+   dpm_save_failed_dev(dev_name(dev));
+   pm_dev_err(dev, pm_transition, " async", error);
+   }
+   put_device(dev);
+}
+
+static int device_prepare(struct device *dev)
+{
+   reinit_completion(>power.completion);
+   if (pm_async_enabled && dev->power.async_suspend) {
+   get_device(dev);
+   async_schedule(async_prepare, dev);
+   return 0;
+   }
+   return __device_prepare(dev, pm_transition, false);
 }
 
+
 /**
  * dpm_prepare - Prepare all non-sysdev devices for a system PM transition.
  * @state: PM transition of the system being carried out.
@@ -1611,13 +1651,15 @@ int dpm_prepare(pm_message_t state)
might_sleep();
 
mutex_lock(_list_mtx);
+   pm_transition = state;
+   async_error = 0;
while (!list_empty(_list)) {
struct device *dev = to_device(dpm_list.next);
 
get_device(dev);
mutex_unlock(_list_mtx);
 
-   error = device_prepare(dev, state);
+   error = device_prepare(dev);
 
mutex_lock(_list_mtx);
if (error) {
@@ -1636,8 +1678,13 @@ int dpm_prepare(pm_message_t state)
if (!list_empty(>power.entry))
list_move_tail(>power.entry, _prepared_list);
put_device(dev);
+   if (async_error)
+   break;
}
mutex_unlock(_list_mtx);
+   async_synchronize_full();
+   if (!error)
+   error = async_error;
trace_suspend_resume(TPS("dpm_prepare"), state.event, false);
return error;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v5 net-next 00/29] BPF syscall, maps, verifier, samples, llvm

2014-08-24 Thread David Miller

From: Alexei Starovoitov 
Date: Sun, 24 Aug 2014 13:21:01 -0700

> enough RFCs, let's finalize it...

Please break this down into smaller, easier to review, sets
of changes.

Asking people to review nearly 30 patches at once isn't reasonable.

Shoot for something like about 10 at a time, at most.

Thank you.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3] zram: add num_discards for discarded pages stat

2014-08-24 Thread Minchan Kim

Hello Chao,

On Fri, Aug 22, 2014 at 04:21:01PM +0800, Chao Yu wrote:
> Since we have supported handling discard request in this commit
> f4659d8e620d08bd1a84a8aec5d2f5294a242764 (zram: support REQ_DISCARD), zram got
> one more chance to free unused memory whenever received discard request. But
> without stating for discard request, there is no method for user to know 
> whether
> discard request has been handled by zram or how many blocks were discarded by
> zram when user wants to know the effect of discard.

My concern is that how much we are able to know the effect of discard
exactly with your patch.

The issue I can think of is zram-swap discard.
Now, zram handles notification from VM to free duplicated copy between
VM-owned memory and zRAM-owned's one so discarding for zram-swap might
be pointless overhead but your stat indicates lots of free page discarded
without real freeing so that user might think "We should keep enable
swap discard for zRAM because the stat indicates it's really good".

In summary, wouldn't it better to have two?

num_discards,
num_failed_discards?

For it, we should modify zram_free_page has return value.
What do other guys think?

> 
> In this patch, we add num_discards to stat discarded pages, and export it to
> sysfs for users.
> 
> * From v1
>  * Update zram document to show num_discards in statistics list.
> 
> * From v2
>  * Update description of this patch with clear goal.
> 
> Signed-off-by: Chao Yu 
> ---
>  Documentation/ABI/testing/sysfs-block-zram | 10 ++
>  Documentation/blockdev/zram.txt|  1 +
>  drivers/block/zram/zram_drv.c  |  3 +++
>  drivers/block/zram/zram_drv.h  |  1 +
>  4 files changed, 15 insertions(+)
> 
> diff --git a/Documentation/ABI/testing/sysfs-block-zram 
> b/Documentation/ABI/testing/sysfs-block-zram
> index 70ec992..fa8936e 100644
> --- a/Documentation/ABI/testing/sysfs-block-zram
> +++ b/Documentation/ABI/testing/sysfs-block-zram
> @@ -57,6 +57,16 @@ Description:
>   The failed_writes file is read-only and specifies the number of
>   failed writes happened on this device.
>  
> +
> +What:/sys/block/zram/num_discards
> +Date:August 2014
> +Contact: Chao Yu 
> +Description:
> + The num_discards file is read-only and specifies the number of
> + physical blocks which are discarded by this device. These blocks
> + are included in discard request which is sended by filesystem as
> + the blocks are no longer used.
> +
>  What:/sys/block/zram/max_comp_streams
>  Date:February 2014
>  Contact: Sergey Senozhatsky 
> diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
> index 0595c3f..e50e18b 100644
> --- a/Documentation/blockdev/zram.txt
> +++ b/Documentation/blockdev/zram.txt
> @@ -89,6 +89,7 @@ size of the disk when not in use so a huge zram is wasteful.
>   num_writes
>   failed_reads
>   failed_writes
> + num_discards
>   invalid_io
>   notify_free
>   zero_pages
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index d00831c..904e7a5 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -606,6 +606,7 @@ static void zram_bio_discard(struct zram *zram, u32 index,
>   bit_spin_lock(ZRAM_ACCESS, >table[index].value);
>   zram_free_page(zram, index);
>   bit_spin_unlock(ZRAM_ACCESS, >table[index].value);
> + atomic64_inc(>stats.num_discards);
>   index++;
>   n -= PAGE_SIZE;
>   }
> @@ -866,6 +867,7 @@ ZRAM_ATTR_RO(num_reads);
>  ZRAM_ATTR_RO(num_writes);
>  ZRAM_ATTR_RO(failed_reads);
>  ZRAM_ATTR_RO(failed_writes);
> +ZRAM_ATTR_RO(num_discards);
>  ZRAM_ATTR_RO(invalid_io);
>  ZRAM_ATTR_RO(notify_free);
>  ZRAM_ATTR_RO(zero_pages);
> @@ -879,6 +881,7 @@ static struct attribute *zram_disk_attrs[] = {
>   _attr_num_writes.attr,
>   _attr_failed_reads.attr,
>   _attr_failed_writes.attr,
> + _attr_num_discards.attr,
>   _attr_invalid_io.attr,
>   _attr_notify_free.attr,
>   _attr_zero_pages.attr,
> diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
> index e0f725c..2994aaf 100644
> --- a/drivers/block/zram/zram_drv.h
> +++ b/drivers/block/zram/zram_drv.h
> @@ -86,6 +86,7 @@ struct zram_stats {
>   atomic64_t num_writes;  /* --do-- */
>   atomic64_t failed_reads;/* can happen when memory is too low */
>   atomic64_t failed_writes;   /* can happen when memory is too low */
> + atomic64_t num_discards;/* no. of discarded pages */
>   atomic64_t invalid_io;  /* non-page-aligned I/O requests */
>   atomic64_t notify_free; /* no. of swap slot free notifications */
>   atomic64_t zero_pages;  /* no. of zero filled pages */
>

HI

2014-08-24 Thread rosecarronrc

مرحبا عزيزتي
اسمي روز كرون، سيدة المحبة والرعاية، حصلت البريد الالكتروني الخاص بك اليوم 
عندما كنت
تصفح تبحث عن شريك صادقين، وأنا أشعر لإسقاط هذا قليل خط لكم،
يرجى الاتصال لي مع هذا عنوان البريد الإلكتروني (rosecar...@outlook.com)
حتى أستطيع أن اقول لكم المزيد عن نفسي ويرسل لك صوري

مع الحب وقبلة
ارتفع.
الاتصال بي مع عنوان بريدي الإلكتروني (rosecar...@outlook.com)

..

Hello dear
My name is Rose Carron, a loving and caring lady,i got your email today when i 
was
browsing looking for honest partner,i feel to drop this few line to you,
please contact me with this email address ( rosecar...@outlook.com )
so that i can tell you more about myself and send you my photos

with love and kiss
Rose.
contact me with my email address( rosecar...@outlook.com )

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Kära vän,

2014-08-24 Thread DR.JOHN MARTINS



Barclays Bank PLC
28 High Street
Nottinghamshire
Storbritannien
NG1 2bd
martinsjohn...@gmail.com

Kära vän,


Mitt namn är John k. Martins och jag chefstjänsteman Barclays bank  
nottingham, Storbritannien. Jag är gift med 3 barn.


Jag vill informera er om att jag behöver din brådskande hjälp. Summan  
av 16,5 miljoner brittiska pund var kvar i min bank av den sena Hugo  
Chavez, tidigare president i venezuela och jag var hans konton officer  
tills han dog.


Jag träffade Mr chavez i venezuela 2008 och han skapade kontot i min  
bank och informerade mig om att han ville investera i fastigheter i  
Storbritannien.


Jag behöver att överföra pengarna från min bank till din bank så att  
vi båda skall dela pengarna och jag behöver överföringen göras snarast.


Kontakta mig nu om du kan hjälpa mig och sända jag den nedan Detaljer


1} fullständiga namn och adress

2} adress

3} telefonnummer

Jag väntar att höra från dig nu.


John k.Martins

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 4/4] zram: report maximum used memory

2014-08-24 Thread Minchan Kim

Normally, zram user could get maximum memory usage zram consumed
via polling mem_used_total with sysfs in userspace.

But it has a critical problem because user can miss peak memory
usage during update inverval of polling. For avoiding that,
user should poll it with shorter interval(ie, 0.01s)
with mlocking to avoid page fault delay when memory pressure
is heavy. It would be troublesome.

This patch adds new knob "mem_used_max" so user could see
the maximum memory usage easily via reading the knob and reset
it via "echo 0 > /sys/block/zram0/mem_used_max".

Reviewed-by: Dan Streetman 
Signed-off-by: Minchan Kim 
---
 Documentation/ABI/testing/sysfs-block-zram | 10 +
 Documentation/blockdev/zram.txt|  1 +
 drivers/block/zram/zram_drv.c  | 60 +-
 drivers/block/zram/zram_drv.h  |  1 +
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-block-zram 
b/Documentation/ABI/testing/sysfs-block-zram
index dbe643775ec1..01a38eaf1552 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -120,6 +120,16 @@ Description:
statistic.
Unit: bytes
 
+What:  /sys/block/zram/mem_used_max
+Date:  August 2014
+Contact:   Minchan Kim 
+Description:
+   The mem_used_max file is read/write and specifies the amount
+   of maximum memory zram have consumed to store compressed data.
+   For resetting the value, you should write "0". Otherwise,
+   you could see -EINVAL.
+   Unit: bytes
+
 What:  /sys/block/zram/mem_limit
 Date:  August 2014
 Contact:   Minchan Kim 
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 82c6a41116db..7fcf9c6592ec 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -111,6 +111,7 @@ size of the disk when not in use so a huge zram is wasteful.
orig_data_size
compr_data_size
mem_used_total
+   mem_used_max
 
 8) Deactivate:
swapoff /dev/zram0
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 370c355eb127..1a2b3e320ea5 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -149,6 +149,41 @@ static ssize_t mem_limit_store(struct device *dev,
return len;
 }
 
+static ssize_t mem_used_max_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   u64 val = 0;
+   struct zram *zram = dev_to_zram(dev);
+
+   down_read(>init_lock);
+   if (init_done(zram))
+   val = atomic_long_read(>stats.max_used_pages);
+   up_read(>init_lock);
+
+   return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
+}
+
+static ssize_t mem_used_max_store(struct device *dev,
+   struct device_attribute *attr, const char *buf, size_t len)
+{
+   int err;
+   unsigned long val;
+   struct zram *zram = dev_to_zram(dev);
+   struct zram_meta *meta = zram->meta;
+
+   err = kstrtoul(buf, 10, );
+   if (err || val != 0)
+   return -EINVAL;
+
+   down_read(>init_lock);
+   if (init_done(zram))
+   atomic_long_set(>stats.max_used_pages,
+   zs_get_total_pages(meta->mem_pool));
+   up_read(>init_lock);
+
+   return len;
+}
+
 static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
 {
@@ -461,6 +496,21 @@ out_cleanup:
return ret;
 }
 
+static inline void update_used_max(struct zram *zram,
+   const unsigned long pages)
+{
+   int old_max, cur_max;
+
+   old_max = atomic_long_read(>stats.max_used_pages);
+
+   do {
+   cur_max = old_max;
+   if (pages > cur_max)
+   old_max = atomic_long_cmpxchg(
+   >stats.max_used_pages, cur_max, pages);
+   } while (old_max != cur_max);
+}
+
 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
   int offset)
 {
@@ -472,6 +522,7 @@ static int zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec, u32 index,
struct zram_meta *meta = zram->meta;
struct zcomp_strm *zstrm;
bool locked = false;
+   unsigned long alloced_pages;
 
page = bvec->bv_page;
if (is_partial_io(bvec)) {
@@ -541,13 +592,15 @@ static int zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec, u32 index,
goto out;
}
 
-   if (zram->limit_pages &&
-   zs_get_total_pages(meta->mem_pool) > zram->limit_pages) {
+   alloced_pages = zs_get_total_pages(meta->mem_pool);
+   if (zram->limit_pages && alloced_pages > zram->limit_pages) {

[PATCH v5 2/4] zsmalloc: change return value unit of zs_get_total_size_bytes

2014-08-24 Thread Minchan Kim

zs_get_total_size_bytes returns a amount of memory zsmalloc
consumed with *byte unit* but zsmalloc operates *page unit*
rather than byte unit so let's change the API so benefit
we could get is that reduce unnecessary overhead
(ie, change page unit with byte unit) in zsmalloc.

Since return type is pages, "zs_get_total_pages" is better than
"zs_get_total_size_bytes".

Reviewed-by: Dan Streetman 
Signed-off-by: Minchan Kim 
---
 drivers/block/zram/zram_drv.c | 4 ++--
 include/linux/zsmalloc.h  | 2 +-
 mm/zsmalloc.c | 9 -
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d00831c3d731..f0b8b30a7128 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev,
 
down_read(>init_lock);
if (init_done(zram))
-   val = zs_get_total_size_bytes(meta->mem_pool);
+   val = zs_get_total_pages(meta->mem_pool);
up_read(>init_lock);
 
-   return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+   return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
 }
 
 static ssize_t max_comp_streams_show(struct device *dev,
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index e44d634e7fb7..05c214760977 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -46,6 +46,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long 
handle,
enum zs_mapmode mm);
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool);
+unsigned long zs_get_total_pages(struct zs_pool *pool);
 
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2a4acf400846..c4a91578dc96 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -297,7 +297,7 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
 
 static u64 zs_zpool_total_size(void *pool)
 {
-   return zs_get_total_size_bytes(pool);
+   return zs_get_total_pages(pool) << PAGE_SHIFT;
 }
 
 static struct zpool_driver zs_zpool_driver = {
@@ -1181,12 +1181,11 @@ void zs_unmap_object(struct zs_pool *pool, unsigned 
long handle)
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool)
+unsigned long zs_get_total_pages(struct zs_pool *pool)
 {
-   u64 npages = atomic_long_read(>pages_allocated);
-   return npages << PAGE_SHIFT;
+   return atomic_long_read(>pages_allocated);
 }
-EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 module_init(zs_init);
 module_exit(zs_exit);
-- 
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 1/4] zsmalloc: move pages_allocated to zs_pool

2014-08-24 Thread Minchan Kim

pages_allocated has counted in size_class structure and when user
of zsmalloc want to see total_size_bytes, it should gather all of
count from each size_class to report the sum.

it's not bad if user don't see the value often but if user start
to see the value frequently, it would be not a good deal for
performance pov.

This patch moves the count from size_class to zs_pool so it could
reduce memory footprint (from [255 * 8byte] to
[sizeof(atomic_long_t)]).

Reviewed-by: Dan Streetman 
Signed-off-by: Minchan Kim 
---
 mm/zsmalloc.c | 23 ---
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 94f38fac5e81..2a4acf400846 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -199,9 +199,6 @@ struct size_class {
 
spinlock_t lock;
 
-   /* stats */
-   u64 pages_allocated;
-
struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
 };
 
@@ -220,6 +217,7 @@ struct zs_pool {
struct size_class size_class[ZS_SIZE_CLASSES];
 
gfp_t flags;/* allocation flags used when growing pool */
+   atomic_long_t pages_allocated;
 };
 
 /*
@@ -1028,8 +1026,9 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
return 0;
 
set_zspage_mapping(first_page, class->index, ZS_EMPTY);
+   atomic_long_add(class->pages_per_zspage,
+   >pages_allocated);
spin_lock(>lock);
-   class->pages_allocated += class->pages_per_zspage;
}
 
obj = (unsigned long)first_page->freelist;
@@ -1082,14 +1081,13 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
 
first_page->inuse--;
fullness = fix_fullness_group(pool, first_page);
-
-   if (fullness == ZS_EMPTY)
-   class->pages_allocated -= class->pages_per_zspage;
-
spin_unlock(>lock);
 
-   if (fullness == ZS_EMPTY)
+   if (fullness == ZS_EMPTY) {
+   atomic_long_sub(class->pages_per_zspage,
+   >pages_allocated);
free_zspage(first_page);
+   }
 }
 EXPORT_SYMBOL_GPL(zs_free);
 
@@ -1185,12 +1183,7 @@ EXPORT_SYMBOL_GPL(zs_unmap_object);
 
 u64 zs_get_total_size_bytes(struct zs_pool *pool)
 {
-   int i;
-   u64 npages = 0;
-
-   for (i = 0; i < ZS_SIZE_CLASSES; i++)
-   npages += pool->size_class[i].pages_allocated;
-
+   u64 npages = atomic_long_read(>pages_allocated);
return npages << PAGE_SHIFT;
 }
 EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
-- 
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 3/4] zram: zram memory size limitation

2014-08-24 Thread Minchan Kim

Since zram has no control feature to limit memory usage,
it makes hard to manage system memrory.

This patch adds new knob "mem_limit" via sysfs to set up the
a limit so that zram could fail allocation once it reaches
the limit.

In addition, user could change the limit in runtime so that
he could manage the memory more dynamically.

Initial state is no limit so it doesn't break old behavior.

Signed-off-by: Minchan Kim 
---
 Documentation/ABI/testing/sysfs-block-zram | 10 
 Documentation/blockdev/zram.txt| 24 ++---
 drivers/block/zram/zram_drv.c  | 41 ++
 drivers/block/zram/zram_drv.h  |  5 
 4 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-block-zram 
b/Documentation/ABI/testing/sysfs-block-zram
index 70ec992514d0..dbe643775ec1 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -119,3 +119,13 @@ Description:
efficiency can be calculated using compr_data_size and this
statistic.
Unit: bytes
+
+What:  /sys/block/zram/mem_limit
+Date:  August 2014
+Contact:   Minchan Kim 
+Description:
+   The mem_limit file is read/write and specifies the amount
+   of memory to be able to consume memory to store store
+   compressed data. The limit could be changed in run time
+   and "0" means disable the limit. No limit is the initial state.
+   Unit: bytes
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
index 0595c3f56ccf..82c6a41116db 100644
--- a/Documentation/blockdev/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -74,14 +74,30 @@ There is little point creating a zram of greater than twice 
the size of memory
 since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
 size of the disk when not in use so a huge zram is wasteful.
 
-5) Activate:
+5) Set memory limit: Optional
+   Set memory limit by writing the value to sysfs node 'mem_limit'.
+   The value can be either in bytes or you can use mem suffixes.
+   In addition, you could change the value in runtime.
+   Examples:
+   # limit /dev/zram0 with 50MB memory
+   echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+
+   # Using mem suffixes
+   echo 256K > /sys/block/zram0/mem_limit
+   echo 512M > /sys/block/zram0/mem_limit
+   echo 1G > /sys/block/zram0/mem_limit
+
+   # To disable memory limit
+   echo 0 > /sys/block/zram0/mem_limit
+
+6) Activate:
mkswap /dev/zram0
swapon /dev/zram0
 
mkfs.ext4 /dev/zram1
mount /dev/zram1 /tmp
 
-6) Stats:
+7) Stats:
Per-device statistics are exported as various nodes under
/sys/block/zram/
disksize
@@ -96,11 +112,11 @@ size of the disk when not in use so a huge zram is 
wasteful.
compr_data_size
mem_used_total
 
-7) Deactivate:
+8) Deactivate:
swapoff /dev/zram0
umount /dev/zram1
 
-8) Reset:
+9) Reset:
Write any positive value to 'reset' sysfs node
echo 1 > /sys/block/zram0/reset
echo 1 > /sys/block/zram1/reset
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index f0b8b30a7128..370c355eb127 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -122,6 +122,33 @@ static ssize_t max_comp_streams_show(struct device *dev,
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static ssize_t mem_limit_show(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   u64 val;
+   struct zram *zram = dev_to_zram(dev);
+
+   down_read(>init_lock);
+   val = zram->limit_pages;
+   up_read(>init_lock);
+
+   return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
+}
+
+static ssize_t mem_limit_store(struct device *dev,
+   struct device_attribute *attr, const char *buf, size_t len)
+{
+   u64 limit;
+   struct zram *zram = dev_to_zram(dev);
+
+   limit = memparse(buf, NULL);
+   down_write(>init_lock);
+   zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
+   up_write(>init_lock);
+
+   return len;
+}
+
 static ssize_t max_comp_streams_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
 {
@@ -513,6 +540,14 @@ static int zram_bvec_write(struct zram *zram, struct 
bio_vec *bvec, u32 index,
ret = -ENOMEM;
goto out;
}
+
+   if (zram->limit_pages &&
+   zs_get_total_pages(meta->mem_pool) > zram->limit_pages) {
+   zs_free(meta->mem_pool, handle);
+   ret = -ENOMEM;
+   goto out;
+   }
+
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);

Re: [PATCH 1/4] usbip: move usbip userspace code out of staging

2014-08-24 Thread Valentina Manea

On Tue, Aug 19, 2014 at 9:30 PM, Valentina Manea
 wrote:
> At this point, USB/IP userspace code is fully functional
> and can be moved out of staging.
>
> Signed-off-by: Valentina Manea 

Bumping this in case Greg missed the patch series.

Valentina
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 0/4] zram memory control enhance

2014-08-24 Thread Minchan Kim

Currently, zram has no feature to limit memory so theoretically
zram can deplete system memory.
Users have asked for a limit several times as even without exhaustion
zram makes it hard to control memory usage of the platform.
This patchset adds the feature.

Patch 1 makes zs_get_total_size_bytes faster because it would be
used frequently in later patches for the new feature.

Patch 2 changes zs_get_total_size_bytes's return unit from bytes
to page so that zsmalloc doesn't need unnecessary operation(ie,
<< PAGE_SHIFT).

Patch 3 adds new feature. I added the feature into zram layer,
not zsmalloc because limiation is zram's requirement, not zsmalloc
so any other user using zsmalloc(ie, zpool) shouldn't affected
by unnecessary branch of zsmalloc. In future, if every users
of zsmalloc want the feature, then, we could move the feature
from client side to zsmalloc easily but vice versa would be
painful.

Patch 4 adds news facility to report maximum memory usage of zram
so that this avoids user polling frequently via /sys/block/zram0/
mem_used_total and ensures transient max are not missed.

* From v4
 * Add Reviewed-by - Dan
 * Clean up document of mem_limit - David

* From v3
 * get_zs_total_size_byte function name change - Dan
 * clarifiction of the document - Dan
 * atomic account instead of introducing new lock in zsmalloc - David
 * remove unnecessary atomic instruction in updating max - David
 
* From v2
 * introduce helper funcntion to update max_used_pages
   for readability - David
 * avoid unncessary zs_get_total_size call in updating loop
   for max_used_pages - David

* From v1
 * rebased on next-20140815
 * fix up race problem - David, Dan
 * reset mem_used_max as current total_bytes, rather than 0 - David
 * resetting works with only "0" write for extensiblilty - David, Dan

Minchan Kim (4):
  zsmalloc: move pages_allocated to zs_pool
  zsmalloc: change return value unit of  zs_get_total_size_bytes
  zram: zram memory size limitation
  zram: report maximum used memory

 Documentation/ABI/testing/sysfs-block-zram |  20 ++
 Documentation/blockdev/zram.txt|  25 +--
 drivers/block/zram/zram_drv.c  | 101 -
 drivers/block/zram/zram_drv.h  |   6 ++
 include/linux/zsmalloc.h   |   2 +-
 mm/zsmalloc.c  |  30 -
 6 files changed, 158 insertions(+), 26 deletions(-)

-- 
2.0.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] ARM: SoC fixes for 3.17-rc

2014-08-24 Thread Olof Johansson

On Sun, Aug 24, 2014 at 3:58 PM, Linus Torvalds
 wrote:
> On Sun, Aug 24, 2014 at 11:40 AM, Olof Johansson  wrote:
>>
>> are available in the git repository at:
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git fixes
>
> Grr. Yes and no.
>
> You didn't really mean for me to pull that branch, you meant for me to
> pull your tag "fixes-for-linus".

Yes, of course -- and I missed that.

> Where did this fail? Do you still run an old broken git version that
> guesses at what the pull target is, and makes sh*t up? Please update
> if so.
>
> And if not, how did the tag contents get added to the pull request
> despite the pull request not mentioning the tag?

TL;DR: My fault, I'll double-check this in the future.

Long version:

100% operator error due to the tools changing. I'm still used to
looking for the warning that it doesn't find/use the remote tag as a
safety for these mistakes.

I'm still used to the older version that figured out tag name on its
own, so I did my usual:

 * run request-pull to double-check what's in the branch
 * create the tag
 * push the tag
 * rerun request-pull with the tag, redirect to file
...and then finally send the email with the file contents.

What I forgot to do was change the command line between the first and
the second run -- the first one referenced the branch, the second
should have referenced the tag but I just reused the same command from
history.

The old version of git that auto-guessed branch/tag name used to warn
if it used a tag to create the pull request, but didn't find the tag
in the remote repo. I suppose it'd be useful if the current version
warned if the third argument wasn't referring to the same tag as well,
it would definitely have saved me here.

-Olof
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Problem with commit: x86, iosf: Make IOSF driver modular and usable by more drivers

2014-08-24 Thread David E. Box

On Sat, Aug 23, 2014 at 10:40:14AM -0700, Randy Dunlap wrote:
> On 08/23/14 02:31, Fejes József wrote:
> > Hi,
> > 
> > I think there's a problem with commit
> > 6b8f0c8780c71d78624f736d7849645b64cc88b7: config IOSF_MBI is
> > automatically a module and I cannot change that.
> > 
> > I've been using 3.15.* stable kernels. I have module support enabled,
> > but I build everything into the kernel, so I don't actually have any
> > modules built. I just upgraded to 3.16.1, and found out that I now
> > have this one module. I cannot find it in the menu, so I edited the
> > .config file by hand, but it changes back from =y to =m. Could you
> > please look into fixing it, and push it to 3.16.* stable branch?
> 
> [adding David E. Box to email]
> 
> This is a mainline issue, not just a -stable issue.
> Once fixed in mainline (if ever), then that fix can be added to -stable.
> 
> Fejes, you could just disable module support and then iosf_mbi would be built
> into the kernel.  But as a loadable module, it won't waste memory if it's
> not needed.
> 
> David, any other suggestions?
> Why can't the users of IOSF_MBI just select it? That's what many other
> drivers do when they need to be sure that some functionality is present.

They could, but it's only required on SoC's. Some registers, while available
through an MSR on x86 core systems, are only available through the sideband on
x86 SoC's. So these drivers would waste space on core platforms. There is no
Kconfig option that builds exclusively for x86 SoC's.

> I'm surprised that someone else (e.g. Linus) has not complained about the
> 'default m' for this driver.

I'll just move to prompt for selection. I was advised against this early on
since the driver doesn't have a hook to userspace. Unfortunantely I didn't
question this and locked myself into pursuing the 'default m' option as a way to
make sure the IOSF_MBI was available for the drivers that will use it. That was
obviosuly a mistake.

Dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 0/4] zram memory control enhance

2014-08-24 Thread Minchan Kim

Hello Dan,

On Fri, Aug 22, 2014 at 03:15:36PM -0400, Dan Streetman wrote:
> On Thu, Aug 21, 2014 at 8:42 PM, Minchan Kim  wrote:
> > Currently, zram has no feature to limit memory so theoretically
> > zram can deplete system memory.
> > Users have asked for a limit several times as even without exhaustion
> > zram makes it hard to control memory usage of the platform.
> > This patchset adds the feature.
> >
> > Patch 1 makes zs_get_total_size_bytes faster because it would be
> > used frequently in later patches for the new feature.
> >
> > Patch 2 changes zs_get_total_size_bytes's return unit from bytes
> > to page so that zsmalloc doesn't need unnecessary operation(ie,
> > << PAGE_SHIFT).
> >
> > Patch 3 adds new feature. I added the feature into zram layer,
> > not zsmalloc because limiation is zram's requirement, not zsmalloc
> > so any other user using zsmalloc(ie, zpool) shouldn't affected
> > by unnecessary branch of zsmalloc. In future, if every users
> > of zsmalloc want the feature, then, we could move the feature
> > from client side to zsmalloc easily but vice versa would be
> > painful.
> >
> > Patch 4 adds news facility to report maximum memory usage of zram
> > so that this avoids user polling frequently via /sys/block/zram0/
> > mem_used_total and ensures transient max are not missed.
> 
> FWIW, with the minor update to checking the memparse in patch 3 David
> mentioned, feel free to add to all the patches:

I replied David's reply, it's not critical for the goal
of this patchset. And if we should fix, it should be memparse and handle
all of cases, not just only null case.
So I will take your Reviewed-by except 3 patch. :)

> 
> Reviewed-by: Dan Streetman 

Thanks!

> 
> >
> > * From v3
> >  * get_zs_total_size_byte function name change - Dan
> >  * clarifiction of the document - Dan
> >  * atomic account instead of introducing new lock in zsmalloc - David
> >  * remove unnecessary atomic instruction in updating max - David
> >
> > * From v2
> >  * introduce helper funcntion to update max_used_pages
> >for readability - David
> >  * avoid unncessary zs_get_total_size call in updating loop
> >for max_used_pages - David
> >
> > * From v1
> >  * rebased on next-20140815
> >  * fix up race problem - David, Dan
> >  * reset mem_used_max as current total_bytes, rather than 0 - David
> >  * resetting works with only "0" write for extensiblilty - David, Dan
> >
> > Minchan Kim (4):
> >   zsmalloc: move pages_allocated to zs_pool
> >   zsmalloc: change return value unit of  zs_get_total_size_bytes
> >   zram: zram memory size limitation
> >   zram: report maximum used memory
> >
> >  Documentation/ABI/testing/sysfs-block-zram |  20 ++
> >  Documentation/blockdev/zram.txt|  25 +--
> >  drivers/block/zram/zram_drv.c  | 101 
> > -
> >  drivers/block/zram/zram_drv.h  |   6 ++
> >  include/linux/zsmalloc.h   |   2 +-
> >  mm/zsmalloc.c  |  30 -
> >  6 files changed, 158 insertions(+), 26 deletions(-)
> >
> > --
> > 2.0.0
> >
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org;> em...@kvack.org 

-- 
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v4 3/4] zram: zram memory size limitation

2014-08-24 Thread Minchan Kim

Hello David,

On Fri, Aug 22, 2014 at 06:55:38AM -0400, David Horner wrote:
> On Thu, Aug 21, 2014 at 8:42 PM, Minchan Kim  wrote:
> > Since zram has no control feature to limit memory usage,
> > it makes hard to manage system memrory.
> >
> > This patch adds new knob "mem_limit" via sysfs to set up the
> > a limit so that zram could fail allocation once it reaches
> > the limit.
> >
> > In addition, user could change the limit in runtime so that
> > he could manage the memory more dynamically.
> >
> - Default is no limit so it doesn't break old behavior.
> + Initial state is no limit so it doesn't break old behavior.
> 
> I understand your previous post now.
> 
> I was saying that setting to either a null value or garbage
>  (which is interpreted as zero by memparse(buf, NULL);)
> removes the limit.
> 
> I think this is "surprise" behaviour and rather the null case should
> return  -EINVAL
> The test below should be "good enough" though not catching all garbage.

Thanks for suggesting but as I said, it should be fixed in memparse itself,
not caller if it is really problem so I don't want to touch it in this
patchset. It's not critical for adding the feature.

> 
> >
> > Signed-off-by: Minchan Kim 
> > ---
> >  Documentation/ABI/testing/sysfs-block-zram | 10 
> >  Documentation/blockdev/zram.txt| 24 ++---
> >  drivers/block/zram/zram_drv.c  | 41 
> > ++
> >  drivers/block/zram/zram_drv.h  |  5 
> >  4 files changed, 76 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/ABI/testing/sysfs-block-zram 
> > b/Documentation/ABI/testing/sysfs-block-zram
> > index 70ec992514d0..b8c779d64968 100644
> > --- a/Documentation/ABI/testing/sysfs-block-zram
> > +++ b/Documentation/ABI/testing/sysfs-block-zram
> > @@ -119,3 +119,13 @@ Description:
> > efficiency can be calculated using compr_data_size and this
> > statistic.
> > Unit: bytes
> > +
> > +What:  /sys/block/zram/mem_limit
> > +Date:  August 2014
> > +Contact:   Minchan Kim 
> > +Description:
> > +   The mem_limit file is read/write and specifies the amount
> > +   of memory to be able to consume memory to store store
> > +   compressed data. The limit could be changed in run time
> > -   and "0" is default which means disable the limit.
> > +   and "0" means disable the limit. No limit is the initial 
> > state.
> 
> there should be no default in the API.

Thanks.

> 
> > +   Unit: bytes
> > diff --git a/Documentation/blockdev/zram.txt 
> > b/Documentation/blockdev/zram.txt
> > index 0595c3f56ccf..82c6a41116db 100644
> > --- a/Documentation/blockdev/zram.txt
> > +++ b/Documentation/blockdev/zram.txt
> > @@ -74,14 +74,30 @@ There is little point creating a zram of greater than 
> > twice the size of memory
> >  since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of 
> > the
> >  size of the disk when not in use so a huge zram is wasteful.
> >
> > -5) Activate:
> > +5) Set memory limit: Optional
> > +   Set memory limit by writing the value to sysfs node 'mem_limit'.
> > +   The value can be either in bytes or you can use mem suffixes.
> > +   In addition, you could change the value in runtime.
> > +   Examples:
> > +   # limit /dev/zram0 with 50MB memory
> > +   echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
> > +
> > +   # Using mem suffixes
> > +   echo 256K > /sys/block/zram0/mem_limit
> > +   echo 512M > /sys/block/zram0/mem_limit
> > +   echo 1G > /sys/block/zram0/mem_limit
> > +
> > +   # To disable memory limit
> > +   echo 0 > /sys/block/zram0/mem_limit
> > +
> > +6) Activate:
> > mkswap /dev/zram0
> > swapon /dev/zram0
> >
> > mkfs.ext4 /dev/zram1
> > mount /dev/zram1 /tmp
> >
> > -6) Stats:
> > +7) Stats:
> > Per-device statistics are exported as various nodes under
> > /sys/block/zram/
> > disksize
> > @@ -96,11 +112,11 @@ size of the disk when not in use so a huge zram is 
> > wasteful.
> > compr_data_size
> > mem_used_total
> >
> > -7) Deactivate:
> > +8) Deactivate:
> > swapoff /dev/zram0
> > umount /dev/zram1
> >
> > -8) Reset:
> > +9) Reset:
> > Write any positive value to 'reset' sysfs node
> > echo 1 > /sys/block/zram0/reset
> > echo 1 > /sys/block/zram1/reset
> > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > index f0b8b30a7128..370c355eb127 100644
> > --- a/drivers/block/zram/zram_drv.c
> > +++ b/drivers/block/zram/zram_drv.c
> > @@ -122,6 +122,33 @@ static ssize_t max_comp_streams_show(struct device 
> > *dev,
> > return scnprintf(buf, PAGE_SIZE, "%d\n", val);
> >  }
> >
> > +static ssize_t mem_limit_show(struct device *dev,
>

Re: [PATCH 0/7] MIPS: Move device-tree files to a common location

2014-08-24 Thread Rob Herring

On Sat, Aug 23, 2014 at 11:14 AM, Olof Johansson  wrote:
> On Sat, Aug 23, 2014 at 03:56:42PM +0200, Arnd Bergmann wrote:
>> On Saturday 23 August 2014, Olof Johansson wrote:
>> > On Fri, Aug 22, 2014 at 02:10:23PM -0700, Andrew Bresticker wrote:
>> > > On Fri, Aug 22, 2014 at 1:42 PM, Florian Fainelli  
>> > > wrote:
>> > > >
>> > > > On Aug 21, 2014 3:05 PM, "Andrew Bresticker"  
>> > > > wrote:
>> > > > >
>> > > > > To be consistent with other architectures and to avoid unnecessary
>> > > > > makefile duplication, move all MIPS device-trees to 
>> > > > > arch/mips/boot/dts
>> > > > > and build them with a common makefile.
>> > > >
>> > > > I recall reading that the ARM organization for DTS files was a bit 
>> > > > unfortunate
>> > > > and should have been something like:
>> > > >
>> > > > arch/arm/boot/dts//
>> > > >
>> > > > Is this something we should do for the MIPS and update the other 
>> > > > architectures
>> > > > to follow that scheme?
>> > >
>> > > I recall reading that as well and that it would be adopted for ARM64,
>> > > but that hasn't seemed to have happened.  Perhaps Olof (CC'ed) will no
>> > > more.
>> >
>> > Yeah, I highly recommend having a directory per vendor. We didn't on ARM,
>> > and the amount of files in that directory is becoming pretty
>> > insane. Moving to a subdirectory structure later gets messy which is
>> > why we've been holding off on it.
>>
>> Another argument is that we plan to actually move all the dts files out of
>> the kernel into a separate project in the future. We really don't want to
>> have the churn of moving all the files now when they get deleted in one
>> of the next merge windows.
>
> To be honest, I don't see that happening within the forseeable
> future. Some of us maintainers like talking about this, but everyone who
> actually develops have nightmares about this scenario. Nobody knows how
> it'll be done without causing some real serious impact on productivity.
>
>> I don't know if we talked about whether that move should be done for
>> all architectures at the same time. If that is the plan, I think it
>> would be best to not move the MIPS files at all but also wait until
>> they can get removed from the kernel tree.
>
> If MIPS can restructure now before things start growing, then I'd really
> recommend that they do so and not hold off waiting on some event that
> might never happen. :)

Yes, I agree on both points.

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] drivers: staging: rtl8821ae: Fix spaces required around that '<' errors

2014-08-24 Thread Greg Donald

Fix checkpatch.pl spaces required around that '<' errors

Signed-off-by: Greg Donald 
---
 drivers/staging/rtl8821ae/btcoexist/halbtc8723b2ant.c |  2 +-
 drivers/staging/rtl8821ae/rtl8821ae/phy.c | 18 +-
 drivers/staging/rtl8821ae/rtl8821ae/rf.c  |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/rtl8821ae/btcoexist/halbtc8723b2ant.c 
b/drivers/staging/rtl8821ae/btcoexist/halbtc8723b2ant.c
index da3f62d..edcf8dd 100644
--- a/drivers/staging/rtl8821ae/btcoexist/halbtc8723b2ant.c
+++ b/drivers/staging/rtl8821ae/btcoexist/halbtc8723b2ant.c
@@ -1323,7 +1323,7 @@ static void halbtc8723b2ant_set_ant_path(struct 
btc_coexist *btcoexist,
btcoexist->btc_get(btcoexist, BTC_GET_BL_EXT_SWITCH, _ext_switch);
btcoexist->btc_get(btcoexist, BTC_GET_U4_WIFI_FW_VER, _ver);
 
-   if ((fw_ver<0xc) || pg_ext_switch)
+   if ((fw_ver < 0xc) || pg_ext_switch)
use_ext_switch = true;
 
if (init_hwcfg) {
diff --git a/drivers/staging/rtl8821ae/rtl8821ae/phy.c 
b/drivers/staging/rtl8821ae/rtl8821ae/phy.c
index 1dd3301..c56936d 100644
--- a/drivers/staging/rtl8821ae/rtl8821ae/phy.c
+++ b/drivers/staging/rtl8821ae/rtl8821ae/phy.c
@@ -1252,7 +1252,7 @@ static bool 
_rtl8812ae_phy_config_mac_with_headerfile(struct ieee80211_hw *hw)
for (i = 0; i < arraylength; i += 2) {
v1 = ptrarray[i];
v2 = (u8) ptrarray[i + 1];
-   if (v1<0xCDCDCDCD) {
+   if (v1 < 0xCDCDCDCD) {
rtl_write_byte(rtlpriv, v1, (u8) v2);
} else {
if (!_rtl8821ae_check_condition(hw,v1)) {
@@ -1296,7 +1296,7 @@ static bool 
_rtl8821ae_phy_config_mac_with_headerfile(struct ieee80211_hw *hw)
for (i = 0; i < arraylength; i += 2) {
v1 = ptrarray[i];
v2 = (u8) ptrarray[i + 1];
-   if (v1<0xCDCDCDCD) {
+   if (v1 < 0xCDCDCDCD) {
rtl_write_byte(rtlpriv, v1, (u8) v2);
continue;
} else {
@@ -1342,7 +1342,7 @@ static bool 
_rtl8812ae_phy_config_bb_with_headerfile(struct ieee80211_hw *hw,
for (i = 0; i < arraylen; i += 2) {
v1 = array_table[i];
v2 = array_table[i+1];
-   if (v1<0xCDCDCDCD) {
+   if (v1 < 0xCDCDCDCD) {
_rtl8812ae_config_bb_reg(hw, v1, v2);
continue;
} else {/*This line is the start line of branch.*/
@@ -1431,7 +1431,7 @@ static bool 
_rtl8821ae_phy_config_bb_with_headerfile(struct ieee80211_hw *hw,
for (i = 0; i < arraylen; i += 2) {
v1 = array_table[i];
v2 = array_table[i+1];
-   if (v1<0xCDCDCDCD) {
+   if (v1 < 0xCDCDCDCD) {
_rtl8821ae_config_bb_reg(hw, v1, v2);
continue;
} else {/*This line is the start line of branch.*/
@@ -1566,7 +1566,7 @@ static bool 
_rtl8812ae_phy_config_bb_with_pgheaderfile(struct ieee80211_hw *hw,
v5 = phy_regarray_table_pg[i+4];
v6 = phy_regarray_table_pg[i+5];
 
-   if (v1<0xCDCDCDCD) {
+   if (v1 < 0xCDCDCDCD) {
if ( (v4 == 0xfe) || (v4 == 0xffe))
mdelay(50);
else
@@ -1617,7 +1617,7 @@ static bool 
_rtl8821ae_phy_config_bb_with_pgheaderfile(struct ieee80211_hw *hw,
v5 = phy_regarray_table_pg[i+4];
v6 = phy_regarray_table_pg[i+5];
 
-   if (v1<0xCDCDCDCD) {
+   if (v1 < 0xCDCDCDCD) {
if (v4 == 0xfe)
mdelay(50);
else if (v4 == 0xfd)
@@ -1682,7 +1682,7 @@ bool rtl8812ae_phy_config_rf_with_headerfile(struct 
ieee80211_hw * hw,
for (i = 0; i < radioa_arraylen_a; i = i + 2) {
v1 = radioa_array_table_a[i];
v2 = radioa_array_table_a[i+1];
-   if (v1<0xcdcdcdcd) {
+   if (v1 < 0xcdcdcdcd) {
_rtl8821ae_config_rf_radio_a(hw,v1,v2);
continue;
}else{/*This line is the start line of branch.*/
@@ -1714,7 +1714,7 @@ bool rtl8812ae_phy_config_rf_with_headerfile(struct 
ieee80211_hw * hw,
for (i = 0; i < radioa_arraylen_b; i = i + 2) {
v1 = radioa_array_table_b[i];
v2 = radioa_array_table_b[i+1];
-   if (v1<0xcdcdcdcd) {
+   if

Re: [PATCH v9 04/12] PCI: OF: Fix the conversion of IO ranges into IO resources.

2014-08-24 Thread Rob Herring

On Fri, Aug 22, 2014 at 8:06 AM, Liviu Dudau  wrote:
> On Thu, Aug 21, 2014 at 11:08:48PM -0500, Rob Herring wrote:
>> On Tue, Aug 12, 2014 at 11:25 AM, Liviu Dudau  wrote:
>> > The ranges property for a host bridge controller in DT describes
>> > the mapping between the PCI bus address and the CPU physical address.
>> > The resources framework however expects that the IO resources start
>> > at a pseudo "port" address 0 (zero) and have a maximum size of 
>> > IO_SPACE_LIMIT.
>> > The conversion from pci ranges to resources failed to take that into 
>> > account.
>> >
>> > In the process move the function into drivers/of/address.c as it now
>> > depends on pci_address_to_pio() code and make it return an error code.
>> >
>> > Cc: Grant Likely 
>> > Cc: Rob Herring 
>>
>> Humm, this says I'm cc'ed, but I'm not which defeats the point of
>> recording the Cc's in the commit.
>
> Appologies, I've screwed up my git send-email arguments.
>
>>
>> I still have the same concerns that this will break existing users.
>> Are you sure integrator is the only platform affected?
>
> microblaze and powerpc have their similar handcoded routine for parsing ranges
> where they pre-compute the io_base and adjust the values again when 
> registering
> resources. I'm not absolutely sure they are not broken as I lack the 
> appropriate
> platforms to test (I've been asking for an FPGA engineer to build me a 
> microblaze
> image with all the bits included but haven't received anything yet and it is
> possible Xilinx has now shifted their interests towards ARM + PCI as the ML605
> board that I have seems to have been discontinued).

I will settle for "I've read through the $arch code and believe they
are not broken". It is unrealistic for you to test on everything.

> mips is doing the same thing and I believe is not affected, pci-host-generic.c
> was adjusting the returned values afterwards so that will not be needed and 
> Lorenzo
> has a patch for the driver to adapt it to this series anyway.
>
> pcie-designware.c also recalculates the io.start and io.end values, so that's 
> fine
> for now. The only ones that I believe are still affected are pci-tegra.c and
> pcie-rcar.c for which I will need to provide a patch similar to integrator 
> unless
> the code gets converted to the new range parsing.

Well, the latter would be nice, but they certainly have to be fixed.
Now that I think about it, this needs to be handled in a bisectable
way. So I think you need to fix all affected platforms in this patch
rather than a separate patch as you have done.

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] KVM-Use value reading from MSR when construct the eptp in VMX mode

2014-08-24 Thread Wanpeng Li

Please Cc kvm ml.
On Sun, Aug 24, 2014 at 11:54:32AM +0800, Dennis Chen wrote:
>This patch is used to construct the eptp in vmx mode with values
>readed from MSR according to the intel x86 software developer's
>manual.
>
>Signed-off-by: Dennis Chen 
>---
> arch/x86/include/asm/vmx.h |1 +
> arch/x86/kvm/vmx.c |   21 +
> 2 files changed, 18 insertions(+), 4 deletions(-)
>
>diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
>index bcbfade..bf82a77 100644
>--- a/arch/x86/include/asm/vmx.h
>+++ b/arch/x86/include/asm/vmx.h
>@@ -417,6 +417,7 @@ enum vmcs_field {
> #define VMX_EPT_GAW_EPTP_SHIFT3
> #define VMX_EPT_AD_ENABLE_BIT(1ull << 6)
> #define VMX_EPT_DEFAULT_MT0x6ull
>+#define VMX_EPT_UC_MT0x0ull
> #define VMX_EPT_READABLE_MASK0x1ull
> #define VMX_EPT_WRITABLE_MASK0x2ull
> #define VMX_EPT_EXECUTABLE_MASK0x4ull
>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>index bfe11cf..7add5ce 100644
>--- a/arch/x86/kvm/vmx.c
>+++ b/arch/x86/kvm/vmx.c
>@@ -3477,11 +3477,24 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu,
>unsigned long cr0)
>
> static u64 construct_eptp(unsigned long root_hpa)
> {
>-u64 eptp;
>+u64 eptp, pwl;
>+
>+if (cpu_has_vmx_ept_4levels())
>+pwl = VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
>+else {
>+WARN(1, "Unsupported page-walk length of 4.\n");
>+BUG();
>+}
>+
>+if (cpu_has_vmx_eptp_writeback())
>+eptp = VMX_EPT_DEFAULT_MT | pwl;
>+else if (cpu_has_vmx_eptp_uncacheable())
>+eptp = VMX_EPT_UC_MT | pwl;
>+else {
>+WARN(1, "Unsupported memory type config in vmx eptp.\n");
>+BUG();
>+}
>
>-/* TODO write the value reading from MSR */
>-eptp = VMX_EPT_DEFAULT_MT |
>-VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
> if (enable_ept_ad_bits)
> eptp |= VMX_EPT_AD_ENABLE_BIT;
> eptp |= (root_hpa & PAGE_MASK);
>-- 
>1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] UML: UBD: Fix for processes stuck in D state forever in UserModeLinux

2014-08-24 Thread Thorsten Knabe

On 08/24/2014 02:11 PM, Richard Weinberger wrote:
> Am 23.08.2014 19:43, schrieb Thorsten Knabe:
>> Hi Richard.
>>
>> On 08/23/2014 05:34 PM, Richard Weinberger wrote:
>>> Hi!
>>>
>>> Am 23.08.2014 15:47, schrieb Thorsten Knabe:
 From: Thorsten Knabe 

 UML: UBD: Fix for processes stuck in D state forever in UserModeLinux.

 Starting with Linux 3.12 processes get stuck in D state forever in
 UserModeLinux under sync heavy workloads. This bug was introduced by
 commit 805f11a0d5 (um: ubd: Add REQ_FLUSH suppport).
 Fix bug by adding a check if FLUSH request was successfully submitted to
 the I/O thread and keeping the FLUSH request on the request queue on
 submission failures.

 Fixes: 805f11a0d5 (um: ubd: Add REQ_FLUSH suppport)
 Signed-off-by: Thorsten Knabe 
>>>
>>> Thanks a lot for hunting this issue down.
>>>
 ---
 Patch applies to 3.16.1.

 diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
 index 3716e69..b7d2840 100644
 --- a/arch/um/drivers/ubd_kern.c
 +++ b/arch/um/drivers/ubd_kern.c
 @@ -1277,7 +1277,7 @@ static void do_ubd_request(struct request_queue *q)

while(1){
struct ubd *dev = q->queuedata;
 -  if(dev->end_sg == 0){
 +  if(dev->request == NULL){
>>>
>>> Why do we need this specific change?
>>
>> This change is required, because for FLUSH requests dev->end_sg is
>> initialized to 0 by blk_rq_map_sg() a few lines above, as FLUSH requests
>> have no data blocks attached to themselves.
> 
> You meant "below"? Looks like I really miss something here.
> At the bottom of the while(1) loop we have
> dev->end_sg = 0;
> dev->request = NULL;

No. The problematic line is:
dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
and blk_rq_map_sg() returning 0 for REQ_FLUSH requests, because they
have no associated data blocks.

Hence on the next iteration of the while(1) loop:
if(dev->end_sg == 0){
will be true, even if the request has not been successfully submitted to
the I/O thread in the previous iteration of the while(1) loop and a new
request will be fetched:
struct request *req = blk_fetch_request(q);
if(req == NULL)
return;

dev->request = req;
dev->rq_pos = blk_rq_pos(req);
dev->start_sg = 0;
dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
}

Thus the REQ_FLUSH request got lost and will never get submitted to the
I/O thread, there will be no matching answer from the I/O thread and the
lost REQ_FLUSH request will never complete...

Regards
Thorsten

> 
> Thanks,
> //richard
> 


-- 
___
 || / E-Mail: li...@thorsten-knabe.de
 |horsten |/\nabeWWW: http://linux.thorsten-knabe.de
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] ARM: SoC fixes for 3.17-rc

2014-08-24 Thread Linus Torvalds

On Sun, Aug 24, 2014 at 11:40 AM, Olof Johansson  wrote:
>
> are available in the git repository at:
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/arm/arm-soc.git fixes

Grr. Yes and no.

You didn't really mean for me to pull that branch, you meant for me to
pull your tag "fixes-for-linus".

Where did this fail? Do you still run an old broken git version that
guesses at what the pull target is, and makes sh*t up? Please update
if so.

And if not, how did the tag contents get added to the pull request
despite the pull request not mentioning the tag?

   Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] drivers: staging: rtl8192u: Fix switch and case should be at the same indent errors

2014-08-24 Thread Greg Donald

Fix checkpatch.pl switch and case should be at the same indent errors

Signed-off-by: Greg Donald 
---
 drivers/staging/rtl8192u/r8192U_core.c | 510 -
 1 file changed, 252 insertions(+), 258 deletions(-)

diff --git a/drivers/staging/rtl8192u/r8192U_core.c 
b/drivers/staging/rtl8192u/r8192U_core.c
index eb96bed..3707d03 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -1071,83 +1071,83 @@ static void rtl8192_config_rate(struct net_device *dev, 
u16 *rate_config)
for (i = 0; i < net->rates_len; i++) {
basic_rate = net->rates[i]&0x7f;
switch (basic_rate) {
-   case MGN_1M:
-   *rate_config |= RRSR_1M;
-   break;
-   case MGN_2M:
-   *rate_config |= RRSR_2M;
-   break;
-   case MGN_5_5M:
-   *rate_config |= RRSR_5_5M;
-   break;
-   case MGN_11M:
-   *rate_config |= RRSR_11M;
-   break;
-   case MGN_6M:
-   *rate_config |= RRSR_6M;
-   break;
-   case MGN_9M:
-   *rate_config |= RRSR_9M;
-   break;
-   case MGN_12M:
-   *rate_config |= RRSR_12M;
-   break;
-   case MGN_18M:
-   *rate_config |= RRSR_18M;
-   break;
-   case MGN_24M:
-   *rate_config |= RRSR_24M;
-   break;
-   case MGN_36M:
-   *rate_config |= RRSR_36M;
-   break;
-   case MGN_48M:
-   *rate_config |= RRSR_48M;
-   break;
-   case MGN_54M:
-   *rate_config |= RRSR_54M;
-   break;
+   case MGN_1M:
+   *rate_config |= RRSR_1M;
+   break;
+   case MGN_2M:
+   *rate_config |= RRSR_2M;
+   break;
+   case MGN_5_5M:
+   *rate_config |= RRSR_5_5M;
+   break;
+   case MGN_11M:
+   *rate_config |= RRSR_11M;
+   break;
+   case MGN_6M:
+   *rate_config |= RRSR_6M;
+   break;
+   case MGN_9M:
+   *rate_config |= RRSR_9M;
+   break;
+   case MGN_12M:
+   *rate_config |= RRSR_12M;
+   break;
+   case MGN_18M:
+   *rate_config |= RRSR_18M;
+   break;
+   case MGN_24M:
+   *rate_config |= RRSR_24M;
+   break;
+   case MGN_36M:
+   *rate_config |= RRSR_36M;
+   break;
+   case MGN_48M:
+   *rate_config |= RRSR_48M;
+   break;
+   case MGN_54M:
+   *rate_config |= RRSR_54M;
+   break;
}
}
for (i = 0; i < net->rates_ex_len; i++) {
basic_rate = net->rates_ex[i]&0x7f;
switch (basic_rate) {
-   case MGN_1M:
-   *rate_config |= RRSR_1M;
-   break;
-   case MGN_2M:
-   *rate_config |= RRSR_2M;
-   break;
-   case MGN_5_5M:
-   *rate_config |= RRSR_5_5M;
-   break;
-   case MGN_11M:
-   *rate_config |= RRSR_11M;
-   break;
-   case MGN_6M:
-   *rate_config |= RRSR_6M;
-   break;
-   case MGN_9M:
-   *rate_config |= RRSR_9M;
-   break;
-   case MGN_12M:
-   *rate_config |= RRSR_12M;
-   break;
-   case MGN_18M:
-   *rate_config |= RRSR_18M;
-   break;
-   case MGN_24M:
-   *rate_config |= RRSR_24M;
-

[PATCH 1/6] autofs4: allow RCU-walk to walk through autofs4.

2014-08-24 Thread NeilBrown

Any attempt to look up a pathname that passes though an
autofs4 mount is currently forced out of RCU-walk into
REF-walk.

This can significantly hurt performance of many-thread work
loads on many-core systems, especially if the automounted
filesystem supports RCU-walk but doesn't get to benefit from
it.

So if autofs4_d_manage is called with rcu_walk set, only
fail with -ECHILD if it is necessary to wait longer than
a spinlock.

Reviewed-by: Ian Kent 
Tested-by: Ian Kent 
Signed-off-by: NeilBrown 
---
 fs/autofs4/autofs_i.h  |2 +-
 fs/autofs4/dev-ioctl.c |2 +-
 fs/autofs4/expire.c|4 +++-
 fs/autofs4/root.c  |   44 +---
 4 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 9e359fb20c0a..2f1032f12d91 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -148,7 +148,7 @@ void autofs4_free_ino(struct autofs_info *);
 
 /* Expiration */
 int is_autofs4_dentry(struct dentry *);
-int autofs4_expire_wait(struct dentry *dentry);
+int autofs4_expire_wait(struct dentry *dentry, int rcu_walk);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
struct autofs_sb_info *,
struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 5b570b6efa28..aaf96cb25452 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -450,7 +450,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
ino = autofs4_dentry_ino(path.dentry);
if (ino) {
err = 0;
-   autofs4_expire_wait(path.dentry);
+   autofs4_expire_wait(path.dentry, 0);
spin_lock(>fs_lock);
param->requester.uid = from_kuid_munged(current_user_ns(), 
ino->uid);
param->requester.gid = from_kgid_munged(current_user_ns(), 
ino->gid);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index a7be57e39be7..7e2f22ce6954 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -467,7 +467,7 @@ found:
return expired;
 }
 
-int autofs4_expire_wait(struct dentry *dentry)
+int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
 {
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -477,6 +477,8 @@ int autofs4_expire_wait(struct dentry *dentry)
spin_lock(>fs_lock);
if (ino->flags & AUTOFS_INF_EXPIRING) {
spin_unlock(>fs_lock);
+   if (rcu_walk)
+   return -ECHILD;
 
DPRINTK("waiting for expire %p name=%.*s",
 dentry, dentry->d_name.len, dentry->d_name.name);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index cdb25ebccc4c..2296c8301b66 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -210,7 +210,8 @@ next:
return NULL;
 }
 
-static struct dentry *autofs4_lookup_expiring(struct dentry *dentry)
+static struct dentry *autofs4_lookup_expiring(struct dentry *dentry,
+ bool rcu_walk)
 {
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct dentry *parent = dentry->d_parent;
@@ -229,6 +230,11 @@ static struct dentry *autofs4_lookup_expiring(struct 
dentry *dentry)
struct dentry *expiring;
struct qstr *qstr;
 
+   if (rcu_walk) {
+   spin_unlock(>lookup_lock);
+   return ERR_PTR(-ECHILD);
+   }
+
ino = list_entry(p, struct autofs_info, expiring);
expiring = ino->dentry;
 
@@ -264,13 +270,15 @@ next:
return NULL;
 }
 
-static int autofs4_mount_wait(struct dentry *dentry)
+static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk)
 {
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status = 0;
 
if (ino->flags & AUTOFS_INF_PENDING) {
+   if (rcu_walk)
+   return -ECHILD;
DPRINTK("waiting for mount name=%.*s",
dentry->d_name.len, dentry->d_name.name);
status = autofs4_wait(sbi, dentry, NFY_MOUNT);
@@ -280,20 +288,22 @@ static int autofs4_mount_wait(struct dentry *dentry)
return status;
 }
 
-static int do_expire_wait(struct dentry *dentry)
+static int do_expire_wait(struct dentry *dentry, bool rcu_walk)
 {
struct dentry *expiring;
 
-   expiring = autofs4_lookup_expiring(dentry);
+   expiring = autofs4_lookup_expiring(dentry, rcu_walk);
+   if (IS_ERR(expiring))
+   return PTR_ERR(expiring);
if (!expiring)
-   return autofs4_expire_wait(dentry);
+   return autofs4_expire_wait(dentry, rcu_walk);
else {
/*
 * If we are

[PATCH 3/6] autofs4: make "autofs4_can_expire" idempotent.

2014-08-24 Thread NeilBrown

Have a "test" function change the value it is testing can
be confusing, particularly as a future patch will be calling
this function twice.

So move the update for 'last_used' to avoid repeat expiry
to the place where the final determination on what to expire is known.

Reviewed-by: Ian Kent 
Tested-by: Ian Kent 
Signed-off-by: NeilBrown 
---
 fs/autofs4/expire.c |   10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index bee939efca2b..af09dada91bc 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -30,12 +30,6 @@ static inline int autofs4_can_expire(struct dentry *dentry,
/* Too young to die */
if (!timeout || time_after(ino->last_used + timeout, now))
return 0;
-
-   /* update last_used here :-
-  - obviously makes sense if it is in use now
-  - less obviously, prevents rapid-fire expire
-attempts if expire fails the first time */
-   ino->last_used = now;
}
return 1;
 }
@@ -541,6 +535,8 @@ int autofs4_expire_run(struct super_block *sb,
 
spin_lock(>fs_lock);
ino = autofs4_dentry_ino(dentry);
+   /* avoid rapid-fire expire attempts if expiry fails */
+   ino->last_used = now;
ino->flags &= ~AUTOFS_INF_EXPIRING;
complete_all(>expire_complete);
spin_unlock(>fs_lock);
@@ -567,6 +563,8 @@ int autofs4_do_expire_multi(struct super_block *sb, struct 
vfsmount *mnt,
ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
 
spin_lock(>fs_lock);
+   /* avoid rapid-fire expire attempts if expiry fails */
+   ino->last_used = now;
ino->flags &= ~AUTOFS_INF_EXPIRING;
complete_all(>expire_complete);
spin_unlock(>fs_lock);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/6] autofs4: factor should_expire() out of autofs4_expire_indirect.

2014-08-24 Thread NeilBrown

Future patch will potentially call this twice, so make it
separate.

Reviewed-by: Ian Kent 
Tested-by: Ian Kent 
Signed-off-by: NeilBrown 
---
 fs/autofs4/expire.c |  162 ---
 1 file changed, 88 insertions(+), 74 deletions(-)

diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 7e2f22ce6954..bee939efca2b 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -345,6 +345,89 @@ out:
return NULL;
 }
 
+/* Check if 'dentry' should expire, or return a nearby
+ * dentry that is suitable.
+ * If returned dentry is different from arg dentry,
+ * then a dget() reference was taken, else not.
+ */
+static struct dentry *should_expire(struct dentry *dentry,
+   struct vfsmount *mnt,
+   unsigned long timeout,
+   int how)
+{
+   int do_now = how & AUTOFS_EXP_IMMEDIATE;
+   int exp_leaves = how & AUTOFS_EXP_LEAVES;
+   struct autofs_info *ino = autofs4_dentry_ino(dentry);
+   unsigned int ino_count;
+
+   /* No point expiring a pending mount */
+   if (ino->flags & AUTOFS_INF_PENDING)
+   return NULL;
+
+   /*
+* Case 1: (i) indirect mount or top level pseudo direct mount
+* (autofs-4.1).
+* (ii) indirect mount with offset mount, check the "/"
+* offset (autofs-5.0+).
+*/
+   if (d_mountpoint(dentry)) {
+   DPRINTK("checking mountpoint %p %.*s",
+   dentry, (int)dentry->d_name.len, dentry->d_name.name);
+
+   /* Can we umount this guy */
+   if (autofs4_mount_busy(mnt, dentry))
+   return NULL;
+
+   /* Can we expire this guy */
+   if (autofs4_can_expire(dentry, timeout, do_now))
+   return dentry;
+   return NULL;
+   }
+
+   if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) {
+   DPRINTK("checking symlink %p %.*s",
+   dentry, (int)dentry->d_name.len, dentry->d_name.name);
+   /*
+* A symlink can't be "busy" in the usual sense so
+* just check last used for expire timeout.
+*/
+   if (autofs4_can_expire(dentry, timeout, do_now))
+   return dentry;
+   return NULL;
+   }
+
+   if (simple_empty(dentry))
+   return NULL;
+
+   /* Case 2: tree mount, expire iff entire tree is not busy */
+   if (!exp_leaves) {
+   /* Path walk currently on this dentry? */
+   ino_count = atomic_read(>count) + 1;
+   if (d_count(dentry) > ino_count)
+   return NULL;
+
+   if (!autofs4_tree_busy(mnt, dentry, timeout, do_now))
+   return dentry;
+   /*
+* Case 3: pseudo direct mount, expire individual leaves
+* (autofs-4.1).
+*/
+   } else {
+   /* Path walk currently on this dentry? */
+   struct dentry *expired;
+   ino_count = atomic_read(>count) + 1;
+   if (d_count(dentry) > ino_count)
+   return NULL;
+
+   expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
+   if (expired) {
+   if (expired == dentry)
+   dput(dentry);
+   return expired;
+   }
+   }
+   return NULL;
+}
 /*
  * Find an eligible tree to time-out
  * A tree is eligible if :-
@@ -359,11 +442,8 @@ struct dentry *autofs4_expire_indirect(struct super_block 
*sb,
unsigned long timeout;
struct dentry *root = sb->s_root;
struct dentry *dentry;
-   struct dentry *expired = NULL;
-   int do_now = how & AUTOFS_EXP_IMMEDIATE;
-   int exp_leaves = how & AUTOFS_EXP_LEAVES;
+   struct dentry *expired;
struct autofs_info *ino;
-   unsigned int ino_count;
 
if (!root)
return NULL;
@@ -374,78 +454,12 @@ struct dentry *autofs4_expire_indirect(struct super_block 
*sb,
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(>fs_lock);
-   ino = autofs4_dentry_ino(dentry);
-   /* No point expiring a pending mount */
-   if (ino->flags & AUTOFS_INF_PENDING)
-   goto next;
-
-   /*
-* Case 1: (i) indirect mount or top level pseudo direct mount
-* (autofs-4.1).
-* (ii) indirect mount with offset mount, check the "/"
-* offset (autofs-5.0+).
-*/
-   if (d_mountpoint(dentry)) {
-   DPRINTK("checking mountpoint %p %.*s",
-   dentry,

[PATCH 4/6] autofs4: avoid taking fs_lock during rcu-walk

2014-08-24 Thread NeilBrown

->fs_lock protects AUTOFS_INF_EXPIRING.  We need to be sure
that once the flag is set, no new references beneath the dentry
are taken.  So rcu-walk currently needs to take fs_lock before
checking the flag.  This hurts performance.

Change the expiry to a two-stage process.
First set AUTOFS_INF_NO_RCU which forces any path walk into
ref-walk mode, then drop the lock and call synchronize_rcu().
Once that returns we can be sure no rcu-walk is active beneath
the dentry and we can check reference counts again.

Now during an RCU-walk we can test AUTOFS_INF_EXPIRING without
taking the lock as along as we test AUTOFS_INF_NO_RCU too.
If either are set, we must abort the RCU-walk
If neither are set, we know that refcounts will be tested again
after we finish the RCU-walk so we are safe to continue.

->fs_lock is still taken in d_manage() to check for a non-trap
directory.  That will be resolved in the next patch.

Reviewed-by: Ian Kent 
Tested-by: Ian Kent 
Signed-off-by: NeilBrown 
---
 fs/autofs4/autofs_i.h |4 
 fs/autofs4/expire.c   |   46 ++
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2f1032f12d91..8e98cf954bab 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -79,6 +79,10 @@ struct autofs_info {
 };
 
 #define AUTOFS_INF_EXPIRING(1<<0) /* dentry is in the process of expiring 
*/
+#define AUTOFS_INF_NO_RCU  (1<<1) /* the dentry is being considered
+   * for expiry, so RCU_walk is
+   * not permitted
+   */
 #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
 
 struct autofs_wait_queue {
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index af09dada91bc..4d52272952ec 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -327,10 +327,19 @@ struct dentry *autofs4_expire_direct(struct super_block 
*sb,
if (ino->flags & AUTOFS_INF_PENDING)
goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
-   ino->flags |= AUTOFS_INF_EXPIRING;
-   init_completion(>expire_complete);
+   ino->flags |= AUTOFS_INF_NO_RCU;
spin_unlock(>fs_lock);
-   return root;
+   synchronize_rcu();
+   spin_lock(>fs_lock);
+   if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
+   ino->flags |= AUTOFS_INF_EXPIRING;
+   smp_mb();
+   ino->flags &= ~AUTOFS_INF_NO_RCU;
+   init_completion(>expire_complete);
+   spin_unlock(>fs_lock);
+   return root;
+   }
+   ino->flags &= ~AUTOFS_INF_NO_RCU;
}
 out:
spin_unlock(>fs_lock);
@@ -448,12 +457,29 @@ struct dentry *autofs4_expire_indirect(struct super_block 
*sb,
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(>fs_lock);
-   expired = should_expire(dentry, mnt, timeout, how);
-   if (expired) {
+   ino = autofs4_dentry_ino(dentry);
+   if (ino->flags & AUTOFS_INF_NO_RCU)
+   expired = NULL;
+   else
+   expired = should_expire(dentry, mnt, timeout, how);
+   if (!expired) {
+   spin_unlock(>fs_lock);
+   continue;
+   }
+   ino = autofs4_dentry_ino(expired);
+   ino->flags |= AUTOFS_INF_NO_RCU;
+   spin_unlock(>fs_lock);
+   synchronize_rcu();
+   spin_lock(>fs_lock);
+   if (should_expire(expired, mnt, timeout, how)) {
if (expired != dentry)
dput(dentry);
goto found;
}
+
+   ino->flags &= ~AUTOFS_INF_NO_RCU;
+   if (expired != dentry)
+   dput(expired);
spin_unlock(>fs_lock);
}
return NULL;
@@ -461,8 +487,9 @@ struct dentry *autofs4_expire_indirect(struct super_block 
*sb,
 found:
DPRINTK("returning %p %.*s",
expired, (int)expired->d_name.len, expired->d_name.name);
-   ino = autofs4_dentry_ino(expired);
ino->flags |= AUTOFS_INF_EXPIRING;
+   smp_mb();
+   ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(>expire_complete);
spin_unlock(>fs_lock);
spin_lock(>lookup_lock);
@@ -482,11 +509,14 @@ int autofs4_expire_wait(struct dentry *dentry, int 
rcu_walk)
int status;
 
/* Block on any pending expire */
+   if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+   return 0;
+   if (rcu_walk)
+   return -ECHILD;
+

[PATCH 6/6] autofs: the documentation I wanted to read

2014-08-24 Thread NeilBrown

This documents autofs from the perspective of what the module actually
supports rather than how automount is expected to use it.

It is formatted using "markdown" and works best with Markdown.pl
(markdown_py doesn't like some constructs).

Copy-edited-by: Randy Dunlap 
Signed-off-by: NeilBrown 
Acked-by: Ian Kent 
---
 Documentation/filesystems/autofs4.txt |  520 +
 1 file changed, 520 insertions(+)
 create mode 100644 Documentation/filesystems/autofs4.txt

diff --git a/Documentation/filesystems/autofs4.txt 
b/Documentation/filesystems/autofs4.txt
new file mode 100644
index ..39d02e19fb62
--- /dev/null
+++ b/Documentation/filesystems/autofs4.txt
@@ -0,0 +1,520 @@
+
+ p { max-width:50em} ol, ul {max-width: 40em}
+
+
+autofs - how it works
+=
+
+Purpose
+---
+
+The goal of autofs is to provide on-demand mounting and race free
+automatic unmounting of various other filesystems.  This provides two
+key advantages:
+
+1. There is no need to delay boot until all filesystems that
+   might be needed are mounted.  Processes that try to access those
+   slow filesystems might be delayed but other processes can
+   continue freely.  This is particularly important for
+   network filesystems (e.g. NFS) or filesystems stored on
+   media with a media-changing robot.
+
+2. The names and locations of filesystems can be stored in
+   a remote database and can change at any time.  The content
+   in that data base at the time of access will be used to provide
+   a target for the access.  The interpretation of names in the
+   filesystem can even be programmatic rather than database-backed,
+   allowing wildcards for example, and can vary based on the user who
+   first accessed a name.
+
+Context
+---
+
+The "autofs4" filesystem module is only one part of an autofs system.
+There also needs to be a user-space program which looks up names
+and mounts filesystems.  This will often be the "automount" program,
+though other tools including "systemd" can make use of "autofs4".
+This document describes only the kernel module and the interactions
+required with any user-space program.  Subsequent text refers to this
+as the "automount daemon" or simply "the daemon".
+
+"autofs4" is a Linux kernel module with provides the "autofs"
+filesystem type.  Several "autofs" filesystems can be mounted and they
+can each be managed separately, or all managed by the same daemon.
+
+Content
+---
+
+An autofs filesystem can contain 3 sorts of objects: directories,
+symbolic links and mount traps.  Mount traps are directories with
+extra properties as described in the next section.
+
+Objects can only be created by the automount daemon: symlinks are
+created with a regular `symlink` system call, while directories and
+mount traps are created with `mkdir`.  The determination of whether a
+directory should be a mount trap or not is quite _ad hoc_, largely for
+historical reasons, and is determined in part by the
+*direct*/*indirect*/*offset* mount options, and the *maxproto* mount option.
+
+If neither the *direct* or *offset* mount options are given (so the
+mount is considered to be *indirect*), then the root directory is
+always a regular directory, otherwise it is a mount trap when it is
+empty and a regular directory when not empty.  Note that *direct* and
+*offset* are treated identically so a concise summary is that the root
+directory is a mount trap only if the filesystem is mounted *direct*
+and the root is empty.
+
+Directories created in the root directory are mount traps only if the
+filesystem is mounted  *indirect* and they are empty.
+
+Directories further down the tree depend on the *maxproto* mount
+option and particularly whether it is less than five or not.
+When *maxproto* is five, no directories further down the
+tree are ever mount traps, they are always regular directories.  When
+the *maxproto* is four (or three), these directories are mount traps
+precisely when they are empty.
+
+So: non-empty (i.e. non-leaf) directories are never mount traps. Empty
+directories are sometimes mount traps, and sometimes not depending on
+where in the tree they are (root, top level, or lower), the *maxproto*,
+and whether the mount was *indirect* or not.
+
+Mount Traps
+---
+
+A core element of the implementation of autofs is the Mount Traps
+which are provided by the Linux VFS.  Any directory provided by a
+filesystem can be designated as a trap.  This involves two separate
+features that work together to allow autofs to do its job.
+
+**DCACHE_NEED_AUTOMOUNT**
+
+If a dentry has the DCACHE_NEED_AUTOMOUNT flag set (which gets set if
+the inode has S_AUTOMOUNT set, or can be set directly) then it is
+(potentially) a mount trap.  Any access to this directory beyond a
+"`stat`" will (normally) cause the `d_op->d_automount()` dentry operation
+to be called. The task of this method is to find the filesystem that
+should be mounted on the directory and

[PATCH 5/6] autofs4: d_manage() should return -EISDIR when appropriate in rcu-walk mode.

2014-08-24 Thread NeilBrown

If rcu-walk mode we don't *have* to return -EISDIR for non-mount-traps
as we will simply drop into REF-walk and handling DCACHE_NEED_AUTOMOUNT
dentrys the slow way.  But it is better if we do when possible.

In 'oz_mode', use the same condition as ref-walk: if not a mountpoint,
then it must be -EISDIR.

In regular mode there are most tests needed.  Most of them can be
performed without taking any spinlocks.
If we find a directory that isn't obviously empty, and isn't mounted
on, we need to call 'simple_empty()' which does take a spinlock.
If this turned out to hurt performance, some other approach could
be found to signal when a directory is known to be empty.

Reviewed-by: Ian Kent 
Tested-by: Ian Kent 
Signed-off-by: NeilBrown 
---
 fs/autofs4/root.c |   26 --
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 2296c8301b66..71e4413d65c8 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -433,8 +433,6 @@ static int autofs4_d_manage(struct dentry *dentry, bool 
rcu_walk)
 
/* The daemon never waits. */
if (autofs4_oz_mode(sbi)) {
-   if (rcu_walk)
-   return 0;
if (!d_mountpoint(dentry))
return -EISDIR;
return 0;
@@ -452,12 +450,28 @@ static int autofs4_d_manage(struct dentry *dentry, bool 
rcu_walk)
if (status)
return status;
 
-   if (rcu_walk)
-   /* it is always safe to return 0 as the worst that
-* will happen is we retry in REF-walk mode.
-* Better than always taking a lock.
+   if (rcu_walk) {
+   /* We don't need fs_lock in rcu_walk mode,
+* just testing 'AUTOFS_INFO_NO_RCU' is enough.
+* simple_empty() takes a spinlock, so leave it
+* to last.
+* We only return -EISDIR when certain this isn't
+* a mount-trap.
 */
+   struct inode *inode;
+   if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+   return 0;
+   if (d_mountpoint(dentry))
+   return 0;
+   inode = rcu_dereference(dentry->d_inode);
+   if (inode && S_ISLNK(inode->i_mode))
+   return -EISDIR;
+   if (list_empty(>d_subdirs))
+   return 0;
+   if (!simple_empty(dentry))
+   return -EISDIR;
return 0;
+   }
 
spin_lock(>fs_lock);
/*


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/6] Teach autofs about RCU-walk

2014-08-24 Thread NeilBrown

Hi Andrew,
 this series teaches autofs about RCU-walk so that we don't drop
 straight into REF-walk when we hit an autofs directory, and so that
 we avoid spinlocks as much as possible when performing an RCU-walk.

 This is needed so that the benefits of the recent NFS support for
 RCU-walk are fully available when NFS filesystems are automounted.

 Patches have been carefully reviewed and tested both with test suites
 and in production - thanks a lot to Ian Kent for his support there.

 If they could be queued for 3.18, I would appreciate it.

Thanks,
NeilBrown

---

NeilBrown (6):
  autofs4: allow RCU-walk to walk through autofs4.
  autofs4: factor should_expire() out of autofs4_expire_indirect.
  autofs4: make "autofs4_can_expire" idempotent.
  autofs4: avoid taking fs_lock during rcu-walk
  autofs4: d_manage() should return -EISDIR when appropriate in rcu-walk 
mode.
  autofs: the documentation I wanted to read


 Documentation/filesystems/autofs4.txt |  520 +
 fs/autofs4/autofs_i.h |6 
 fs/autofs4/dev-ioctl.c|2 
 fs/autofs4/expire.c   |  210 -
 fs/autofs4/root.c |   62 +++-
 5 files changed, 698 insertions(+), 102 deletions(-)
 create mode 100644 Documentation/filesystems/autofs4.txt

-- 
Signature

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V4 2/4] audit: clean simple fsnotify implementation

2014-08-24 Thread Richard Guy Briggs

From: Eric Paris 

This is to be used to audit by executable rules, but audit watches
should be able to share this code eventually.

At the moment the audit watch code is a lot more complex, that code only
creates one fsnotify watch per parent directory.  That 'audit_parent' in
turn has a list of 'audit_watches' which contain the name, ino, dev of
the specific object we care about.  This just creates one fsnotify watch
per object we care about.  So if you watch 100 inodes in /etc this code
will create 100 fsnotify watches on /etc.  The audit_watch code will
instead create 1 fsnotify watch on /etc (the audit_parent) and then 100
individual watches chained from that fsnotify mark.

We should be able to convert the audit_watch code to do one fsnotify
mark per watch and simplify things/remove a whole lot of code.  After
that conversion we should be able to convert the audit_fsnotify code to
support that hierarchy if the optomization is necessary.

RGB: Move the access to the entry for audit_match_signal() to the beginning of
the function in case the entry found is the same one passed in.  This will
enable it to be used by audit_remove_mark_rule().
RGB: Rename several "watch" references to "mark".
RGB: Rename audit_remove_rule() to audit_remove_mark_rule().
RGB: Let audit_free_rule() take care of calling audit_remove_mark().

Signed-off-by: Eric Paris 
Signed-off-by: Richard Guy Briggs 
---
 kernel/Makefile |2 +-
 kernel/audit.h  |   29 ++
 kernel/audit_fsnotify.c |  245 +++
 kernel/auditfilter.c|   10 +-
 4 files changed, 280 insertions(+), 6 deletions(-)
 create mode 100644 kernel/audit_fsnotify.c

diff --git a/kernel/Makefile b/kernel/Makefile
index a1d5715..32617ef 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
-obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_exe.o
+obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_exe.o audit_fsnotify.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/kernel/audit.h b/kernel/audit.h
index c975569..1eed1ed 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -56,6 +56,7 @@ enum audit_state {
 
 /* Rule lists */
 struct audit_watch;
+struct audit_fsnotify_mark;
 struct audit_exe;
 struct audit_tree;
 struct audit_chunk;
@@ -267,6 +268,7 @@ struct audit_net {
 extern int selinux_audit_rule_update(void);
 
 extern struct mutex audit_filter_mutex;
+extern int audit_del_rule(struct audit_entry *);
 extern void audit_free_rule_rcu(struct rcu_head *);
 extern struct list_head audit_filter_list[];
 
@@ -282,6 +284,11 @@ extern void audit_remove_watch_rule(struct audit_krule 
*krule);
 extern char *audit_watch_path(struct audit_watch *watch);
 extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, 
dev_t dev);
 
+struct audit_fsnotify_mark *audit_alloc_mark(char *pathname, int len, struct 
audit_krule *krule);
+char *audit_mark_path(struct audit_fsnotify_mark *mark);
+void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
+int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, 
dev_t dev);
+
 int audit_make_exe_rule(struct audit_krule *krule, char *pathname, int len, 
u32 op);
 void audit_remove_exe_rule(struct audit_krule *krule);
 char *audit_exe_path(struct audit_exe *exe);
@@ -297,6 +304,28 @@ int audit_exe_compare(struct task_struct *tsk, struct 
audit_exe *exe);
 #define audit_watch_path(w) ""
 #define audit_watch_compare(w, i, d) 0
 
+static inline struct audit_fsnotify_mark *audit_alloc_mark(char *pathname, int 
len, struct audit_krule *krule)
+{
+   return ERR_PTR(-EINVAL);
+}
+
+static inline char *audit_mark_path(struct audit_fsnotify_mark *mark)
+{
+   BUG();
+   return "";
+}
+
+static inline void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
+{
+   BUG();
+}
+
+static inline int audit_mark_compare(struct audit_fsnotify_mark *mark, 
unsigned long ino, dev_t dev)
+{
+   BUG();
+   return 0;
+}
+
 static inline int audit_make_exe_rule(struct audit_krule *krule, char 
*pathname, int len, u32 op)
 {
return -EINVAL;
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
new file mode 100644
index 000..3492b9c
--- /dev/null
+++ b/kernel/audit_fsnotify.c
@@ -0,0 +1,245 @@
+/* audit_fsnotify.c -- tracking inodes
+ *
+ * Copyright 2003-2009 Red Hat, Inc.
+ * Copyright 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright 2005 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is

[PATCH V4 4/4] audit: avoid double copying the audit_exe path string

2014-08-24 Thread Richard Guy Briggs

Make this interface consistent with watch and filter key, avoiding the extra
string copy and simply consume the new string pointer.

Signed-off-by: Richard Guy Briggs 
---
 kernel/audit_exe.c  |5 -
 kernel/audit_fsnotify.c |   12 ++--
 kernel/auditfilter.c|2 +-
 3 files changed, 7 insertions(+), 12 deletions(-)

diff --git a/kernel/audit_exe.c b/kernel/audit_exe.c
index 0c7ee8d..ff6e3d6 100644
--- a/kernel/audit_exe.c
+++ b/kernel/audit_exe.c
@@ -27,10 +27,13 @@ int audit_dup_exe(struct audit_krule *new, struct 
audit_krule *old)
struct audit_fsnotify_mark *audit_mark;
char *pathname;
 
-   pathname = audit_mark_path(old->exe);
+   pathname = kstrdup(audit_mark_path(old->exe), GFP_KERNEL);
+   if (!pathname)
+   return -ENOMEM;
 
audit_mark = audit_alloc_mark(new, pathname, strlen(pathname));
if (IS_ERR(audit_mark))
+   kfree(pathname);
return PTR_ERR(audit_mark);
new->exe = audit_mark;
 
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 704ce46..07b33f7 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -94,7 +94,6 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct 
audit_krule *krule, char *pa
struct dentry *dentry;
struct inode *inode;
unsigned long ino;
-   char *local_pathname;
dev_t dev;
int ret;
 
@@ -115,20 +114,13 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct 
audit_krule *krule, char *pa
ino = dentry->d_inode->i_ino;
}
 
-   audit_mark = ERR_PTR(-ENOMEM);
-   local_pathname = kstrdup(pathname, GFP_KERNEL);
-   if (!local_pathname)
-   goto out;
-
audit_mark = kzalloc(sizeof(*audit_mark), GFP_KERNEL);
-   if (unlikely(!audit_mark)) {
-   kfree(local_pathname);
+   if (unlikely(!audit_mark))
goto out;
-   }
 
fsnotify_init_mark(_mark->mark, audit_free_fsnotify_mark);
audit_mark->mark.mask = AUDIT_FS_EVENTS;
-   audit_mark->path = local_pathname;
+   audit_mark->path = pathname;
audit_mark->ino = ino;
audit_mark->dev = dev;
audit_mark->rule = krule;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 149588d..ff99749 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -563,8 +563,8 @@ static struct audit_entry *audit_data_to_entry(struct 
audit_rule_data *data,
entry->rule.buflen += f->val;
 
audit_mark = audit_alloc_mark(>rule, str, 
f->val);
-   kfree(str);
if (IS_ERR(audit_mark)) {
+   kfree(str);
err = PTR_ERR(audit_mark);
goto exit_free;
}
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V4 3/4] audit: convert audit_exe to audit_fsnotify

2014-08-24 Thread Richard Guy Briggs

From: Eric Paris 

Instead of just hard coding the ino and dev of the executable we care
about at the moment the rule is inserted into the kernel, use the new
audit_fsnotify infrastructure.  This means that if the inode in question
is unlinked and creat'd (aka updated) the rule will just continue to
work.

RGB: Put audit_alloc_mark() arguments in same order as watch, tree and inode.

Signed-off-by: Eric Paris 
Signed-off-by: Richard Guy Briggs 
---
 include/linux/audit.h   |2 +-
 kernel/audit.h  |   34 +++---
 kernel/audit_exe.c  |   87 ++
 kernel/audit_fsnotify.c |2 +-
 kernel/auditfilter.c|   15 +---
 5 files changed, 29 insertions(+), 111 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 227171c..f2a8044 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -59,7 +59,7 @@ struct audit_krule {
struct audit_field  *inode_f; /* quick access to an inode field */
struct audit_watch  *watch; /* associated watch */
struct audit_tree   *tree;  /* associated watched tree */
-   struct audit_exe*exe;
+   struct audit_fsnotify_mark  *exe;
struct list_headrlist;  /* entry in audit_{watch,tree}.rules 
list */
struct list_headlist;   /* for AUDIT_LIST* purposes only */
u64 prio;
diff --git a/kernel/audit.h b/kernel/audit.h
index 1eed1ed..7bf3138 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -57,7 +57,6 @@ enum audit_state {
 /* Rule lists */
 struct audit_watch;
 struct audit_fsnotify_mark;
-struct audit_exe;
 struct audit_tree;
 struct audit_chunk;
 
@@ -284,16 +283,13 @@ extern void audit_remove_watch_rule(struct audit_krule 
*krule);
 extern char *audit_watch_path(struct audit_watch *watch);
 extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, 
dev_t dev);
 
-struct audit_fsnotify_mark *audit_alloc_mark(char *pathname, int len, struct 
audit_krule *krule);
+struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char 
*pathname, int len);
 char *audit_mark_path(struct audit_fsnotify_mark *mark);
 void audit_remove_mark(struct audit_fsnotify_mark *audit_mark);
 int audit_mark_compare(struct audit_fsnotify_mark *mark, unsigned long ino, 
dev_t dev);
 
-int audit_make_exe_rule(struct audit_krule *krule, char *pathname, int len, 
u32 op);
-void audit_remove_exe_rule(struct audit_krule *krule);
-char *audit_exe_path(struct audit_exe *exe);
 int audit_dup_exe(struct audit_krule *new, struct audit_krule *old);
-int audit_exe_compare(struct task_struct *tsk, struct audit_exe *exe);
+int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark 
*mark);
 
 #else
 #define audit_put_watch(w) {}
@@ -320,36 +316,18 @@ static inline void audit_remove_mark(struct 
audit_fsnotify_mark *audit_mark)
BUG();
 }
 
-static inline int audit_mark_compare(struct audit_fsnotify_mark *mark, 
unsigned long ino, dev_t dev)
+static inline int audit_exe_compare(struct task_struct *tsk, struct 
audit_fsnotify_mark *mark)
 {
BUG();
-   return 0;
-}
-
-static inline int audit_make_exe_rule(struct audit_krule *krule, char 
*pathname, int len, u32 op)
-{
return -EINVAL;
 }
-static inline void audit_remove_exe_rule(struct audit_krule *krule)
-{
-   BUG();
-   return 0;
-}
-static inline char *audit_exe_path(struct audit_exe *exe)
-{
-   BUG();
-   return "";
-}
+
 static inline int audit_dup_exe(struct audit_krule *new, struct audit_krule 
*old)
 {
BUG();
-   return -EINVAL
-}
-static inline int audit_exe_compare(struct task_struct *tsk, struct audit_exe 
*exe)
-{
-   BUG();
-   return 0;
+   return -EINVAL;
 }
+
 #endif /* CONFIG_AUDIT_WATCH */
 
 #ifdef CONFIG_AUDIT_TREE
diff --git a/kernel/audit_exe.c b/kernel/audit_exe.c
index ec3231b..0c7ee8d 100644
--- a/kernel/audit_exe.c
+++ b/kernel/audit_exe.c
@@ -17,93 +17,30 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
 #include "audit.h"
 
-struct audit_exe {
-   char *pathname;
-   unsigned long ino;
-   dev_t dev;
-};
-
-/* Translate a watch string to kernel respresentation. */
-int audit_make_exe_rule(struct audit_krule *krule, char *pathname, int len, 
u32 op)
-{
-   struct audit_exe *exe;
-   struct path path;
-   struct dentry *dentry;
-   unsigned long ino;
-   dev_t dev;
-
-   if (pathname[0] != '/' || pathname[len-1] == '/')
-   return -EINVAL;
-
-   dentry = kern_path_locked(pathname, );
-   if (IS_ERR(dentry))
-   return PTR_ERR(dentry);
-   mutex_unlock(>d_inode->i_mutex);
-
-   if (!dentry->d_inode)
-   return -ENOENT;
-   dev = dentry->d_inode->i_sb->s_dev;
-   ino = dentry->d_inode->i_ino;
-   dput(dentry);
-
-   exe = kmalloc(sizeof(*exe), GFP_KERNEL);
-   if (!exe)
-   return

[PATCH V4 0/4] audit by executable name

2014-08-24 Thread Richard Guy Briggs

This is a part of Peter Moody, my and Eric Paris' work to implement
audit by executable name.

Please see the accompanying userspace patch:
https://www.redhat.com/archives/linux-audit/2014-May/msg00019.html
The userspace interface is not expected to change appreciably unless something
important has been overlooked.  Setting and deleting rules works as expected.

If the path does not exist at rule creation time, it will be re-evaluated every
time there is a change to the parent directory at which point the change in
device and inode will be noted.


Here's a test run:

# /usr/local/sbin/auditctl -a always,exit -F dir=/tmp -F exe=/bin/touch -F 
key=touch_tmp
# /usr/local/sbin/ausearch --start recent -k touch_tmp
time->Mon Jun 30 14:15:06 2014
type=CONFIG_CHANGE msg=audit(1404152106.683:149): auid=0 ses=1 
subj=unconfined_u :unconfined_r:auditctl_t:s0-s0:c0.c1023 op="add rule" 
key="touch_tmp" list=4 res =1

# /usr/local/sbin/auditctl -l
-a always,exit -S all -F dir=/tmp -F exe=/bin/touch -F key=touch_tmp

# touch /tmp/test

# /usr/local/sbin/ausearch --start recent -k touch_tmp
time->Wed Jul  2 12:18:47 2014
type=UNKNOWN[1327] msg=audit(1404317927.319:132): 
proctitle=746F756368002F746D702F74657374
type=PATH msg=audit(1404317927.319:132): item=1 name="/tmp/test" inode=25997 
dev=00:20 mode=0100644 ouid=0 ogid=0 rdev=00:00 
obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE
type=PATH msg=audit(1404317927.319:132): item=0 name="/tmp/" inode=11144 
dev=00:20 mode=041777 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:tmp_t:s0 
nametype=PARENT
type=CWD msg=audit(1404317927.319:132):  cwd="/root"
type=SYSCALL msg=audit(1404317927.319:132): arch=c03e syscall=2 success=yes 
exit=3 a0=7a403dd5 a1=941 a2=1b6 a3=34b65b2c6c items=2 ppid=4321 pid=6436 
auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 ses=1 
comm="touch" exe="/usr/bin/touch" 
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="touch_tmp"


Revision history:
v4: Re-order and squash down fixups
Fix audit_dup_exe() to copy pathname string before calling 
audit_alloc_mark().

v3: Rationalize and rename some function names and clean up get/put and free 
code.
Rename several "watch" references to "mark".
Rename audit_remove_rule() to audit_remove_mark_rule().
Let audit_free_rule() take care of calling audit_remove_mark().
Put audit_alloc_mark() arguments in same order as watch, tree and inode.
Move the access to the entry for audit_match_signal() to the beginning
 of the function in case the entry found is the same one passed in.
 This will enable it to be used by audit_remove_mark_rule().
https://www.redhat.com/archives/linux-audit/2014-July/msg0.html

v2: Misguided attempt to add in audit_exe similar to watches
https://www.redhat.com/archives/linux-audit/2014-June/msg00066.html

v1.5: eparis' switch to fsnotify
https://www.redhat.com/archives/linux-audit/2014-May/msg00046.html
https://www.redhat.com/archives/linux-audit/2014-May/msg00066.html

v1: Change to path interface instead of inode
https://www.redhat.com/archives/linux-audit/2014-May/msg00017.html

v0: Peter Moodie's original patches
https://www.redhat.com/archives/linux-audit/2012-August/msg00033.html


Next step:
Get full-path notify working.


Eric Paris (3):
  audit: implement audit by executable
  audit: clean simple fsnotify implementation
  audit: convert audit_exe to audit_fsnotify

Richard Guy Briggs (1):
  audit: avoid double copying the audit_exe path string

 include/linux/audit.h  |1 +
 include/uapi/linux/audit.h |2 +
 kernel/Makefile|2 +-
 kernel/audit.h |   39 +++
 kernel/audit_exe.c |   49 +
 kernel/audit_fsnotify.c|  237 
 kernel/auditfilter.c   |   51 +-
 kernel/auditsc.c   |   16 +++
 8 files changed, 394 insertions(+), 3 deletions(-)
 create mode 100644 kernel/audit_exe.c
 create mode 100644 kernel/audit_fsnotify.c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V4 1/4] audit: implement audit by executable

2014-08-24 Thread Richard Guy Briggs

From: Eric Paris 

This patch implements the ability to filter on the executable.  It is
clearly incomplete!  This patch adds the inode/dev of the executable at
the moment the rule is loaded.  It does not update if the executable is
updated/moved/whatever.  That should be added.  But at this moment, this
patch works.

Based-on-user-interface-by: Richard Guy Briggs 
Cc: r...@redhat.com
Based-on-idea-by: Peter Moody 
Cc: pmo...@google.com
Signed-off-by: Eric Paris 
Signed-off-by: Richard Guy Briggs 
---
 include/linux/audit.h  |1 +
 include/uapi/linux/audit.h |2 +
 kernel/Makefile|2 +-
 kernel/audit.h |   32 +
 kernel/audit_exe.c |  109 
 kernel/auditfilter.c   |   44 ++
 kernel/auditsc.c   |   16 ++
 7 files changed, 205 insertions(+), 1 deletions(-)
 create mode 100644 kernel/audit_exe.c

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 22cfddb..227171c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -59,6 +59,7 @@ struct audit_krule {
struct audit_field  *inode_f; /* quick access to an inode field */
struct audit_watch  *watch; /* associated watch */
struct audit_tree   *tree;  /* associated watched tree */
+   struct audit_exe*exe;
struct list_headrlist;  /* entry in audit_{watch,tree}.rules 
list */
struct list_headlist;   /* for AUDIT_LIST* purposes only */
u64 prio;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 573dc36..f4a72b9 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -266,6 +266,8 @@
 #define AUDIT_OBJ_UID  109
 #define AUDIT_OBJ_GID  110
 #define AUDIT_FIELD_COMPARE111
+#define AUDIT_EXE  112
+#define AUDIT_EXE_CHILDREN 113
 
 #define AUDIT_ARG0  200
 #define AUDIT_ARG1  (AUDIT_ARG0+1)
diff --git a/kernel/Makefile b/kernel/Makefile
index bc010ee..a1d5715 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -61,7 +61,7 @@ obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
-obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o
+obj-$(CONFIG_AUDIT_WATCH) += audit_watch.o audit_exe.o
 obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
 obj-$(CONFIG_GCOV_KERNEL) += gcov/
 obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/kernel/audit.h b/kernel/audit.h
index 7bb6573..c975569 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -56,6 +56,7 @@ enum audit_state {
 
 /* Rule lists */
 struct audit_watch;
+struct audit_exe;
 struct audit_tree;
 struct audit_chunk;
 
@@ -280,6 +281,13 @@ extern int audit_add_watch(struct audit_krule *krule, 
struct list_head **list);
 extern void audit_remove_watch_rule(struct audit_krule *krule);
 extern char *audit_watch_path(struct audit_watch *watch);
 extern int audit_watch_compare(struct audit_watch *watch, unsigned long ino, 
dev_t dev);
+
+int audit_make_exe_rule(struct audit_krule *krule, char *pathname, int len, 
u32 op);
+void audit_remove_exe_rule(struct audit_krule *krule);
+char *audit_exe_path(struct audit_exe *exe);
+int audit_dup_exe(struct audit_krule *new, struct audit_krule *old);
+int audit_exe_compare(struct task_struct *tsk, struct audit_exe *exe);
+
 #else
 #define audit_put_watch(w) {}
 #define audit_get_watch(w) {}
@@ -289,6 +297,30 @@ extern int audit_watch_compare(struct audit_watch *watch, 
unsigned long ino, dev
 #define audit_watch_path(w) ""
 #define audit_watch_compare(w, i, d) 0
 
+static inline int audit_make_exe_rule(struct audit_krule *krule, char 
*pathname, int len, u32 op)
+{
+   return -EINVAL;
+}
+static inline void audit_remove_exe_rule(struct audit_krule *krule)
+{
+   BUG();
+   return 0;
+}
+static inline char *audit_exe_path(struct audit_exe *exe)
+{
+   BUG();
+   return "";
+}
+static inline int audit_dup_exe(struct audit_krule *new, struct audit_krule 
*old)
+{
+   BUG();
+   return -EINVAL
+}
+static inline int audit_exe_compare(struct task_struct *tsk, struct audit_exe 
*exe)
+{
+   BUG();
+   return 0;
+}
 #endif /* CONFIG_AUDIT_WATCH */
 
 #ifdef CONFIG_AUDIT_TREE
diff --git a/kernel/audit_exe.c b/kernel/audit_exe.c
new file mode 100644
index 000..ec3231b
--- /dev/null
+++ b/kernel/audit_exe.c
@@ -0,0 +1,109 @@
+/* audit_exe.c -- filtering of audit events
+ *
+ * Copyright 2014 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A

Re: [PATCH v4 6/14] input: cyapa: add gen3 trackpad device basic functions support

2014-08-24 Thread Dmitry Torokhov

On Thu, Jul 17, 2014 at 02:53:48PM +0800, Dudley Du wrote:
> Based on the cyapa core, add the gen3 trackpad device's basic functions
> supported, so gen3 trackpad device can work with kernel input system.
> The basic function is absolutely same as previous cyapa driver only
> support gen3 trackpad device.
> TEST=test on Chromebooks.
> 
> Signed-off-by: Dudley Du 
> ---
>  drivers/input/mouse/Makefile |2 +-
>  drivers/input/mouse/cyapa.c  |   96 -
>  drivers/input/mouse/cyapa.h  |1 +
>  drivers/input/mouse/cyapa_gen3.c |  784 
> ++
>  4 files changed, 881 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/input/mouse/cyapa_gen3.c
> 
> diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
> index 8608eb7..63b42e0 100644
> --- a/drivers/input/mouse/Makefile
> +++ b/drivers/input/mouse/Makefile
> @@ -35,4 +35,4 @@ psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT)  += trackpoint.o
>  psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT) += touchkit_ps2.o
>  psmouse-$(CONFIG_MOUSE_PS2_CYPRESS)  += cypress_ps2.o
>  
> -cyapatp-y := cyapa.o
> +cyapatp-y := cyapa.o cyapa_gen3.o
> diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
> index ae24b02..5c62503 100644
> --- a/drivers/input/mouse/cyapa.c
> +++ b/drivers/input/mouse/cyapa.c
> @@ -113,6 +113,15 @@ ssize_t cyapa_i2c_write(struct cyapa *cyapa, u8 reg,
>  
>  void cyapa_default_irq_handler(struct cyapa *cyapa)
>  {
> + bool cont;
> +
> + /* Interrupt triggerred by command response in detecting. */
> + cont = true;
> + if (cyapa_gen3_ops.irq_cmd_handler)
> + cont = cyapa_gen3_ops.irq_cmd_handler(cyapa);

Why not simply

cont = cyapa->ops->irq_cmd_handler(cyapa)?


> + if (!cont)
> + return;
> +
>   /*
>* Do redetecting when device states is still unknown and
>* interrupt envent is received from device.
> @@ -252,6 +261,9 @@ static int cyapa_check_is_operational(struct cyapa *cyapa)
>   return ret;
>  
>   switch (cyapa->gen) {
> + case CYAPA_GEN3:
> + cyapa->ops = _gen3_ops;
> + break;
>   default:
>   cyapa->ops = _default_ops;
>   cyapa->gen = CYAPA_GEN_UNKNOWN;
> @@ -314,9 +326,85 @@ out:
>   */
>  static int cyapa_get_state(struct cyapa *cyapa)
>  {
> + int ret;
> + u8 status[BL_STATUS_SIZE];
> + u8 cmd[32];
> + /* The i2c address of gen4 and gen5 trackpad device must be even. */
> + bool even_addr = ((cyapa->client->addr & 0x0001) == 0);
> + bool smbus = false;
> + int retries = 2;
> +
>   cyapa->state = CYAPA_STATE_NO_DEVICE;
>  
> - return -ENODEV;
> + /*
> +  * Get trackpad status by reading 3 registers starting from 0.
> +  * If the device is in the bootloader, this will be BL_HEAD.
> +  * If the device is in operation mode, this will be the DATA regs.
> +  *
> +  */
> + ret = cyapa_i2c_reg_read_block(cyapa, BL_HEAD_OFFSET, BL_STATUS_SIZE,
> +status);
> +
> + /*
> +  * On smbus systems in OP mode, the i2c_reg_read will fail with
> +  * -ETIMEDOUT.  In this case, try again using the smbus equivalent
> +  * command.  This should return a BL_HEAD indicating CYAPA_STATE_OP.
> +  */
> + if (cyapa->smbus && (ret == -ETIMEDOUT || ret == -ENXIO)) {
> + if (!even_addr)
> + ret = cyapa_read_block(cyapa,
> + CYAPA_CMD_BL_STATUS, status);
> + smbus = true;
> + }
> + if (ret != BL_STATUS_SIZE)
> + goto error;
> +
> + /*
> +  * Detect trackpad protocol based on characristic registers and bits.
> +  */
> + do {
> + cyapa->status[REG_OP_STATUS] = status[REG_OP_STATUS];
> + cyapa->status[REG_BL_STATUS] = status[REG_BL_STATUS];
> + cyapa->status[REG_BL_ERROR] = status[REG_BL_ERROR];
> +
> + if (cyapa->gen == CYAPA_GEN_UNKNOWN ||
> + cyapa->gen == CYAPA_GEN3) {
> + ret = cyapa_gen3_ops.state_parse(cyapa,
> + status, BL_STATUS_SIZE);
> + if (ret == 0)
> + goto out_detected;
> + }
> +
> + /*
> +  * Cannot detect communication protocol based on current
> +  * charateristic registers and bits.
> +  * So write error command to do further detection.
> +  * this method only valid on I2C bus.
> +  * for smbus interface, it won't have overwrite issue.
> +  */
> + if (!smbus) {
> + cmd[0] = 0x00;
> + cmd[1] = 0x00;
> + ret = cyapa_i2c_write(cyapa, 0, 2, cmd);
> + if (ret)
> + goto error;
> +
> + msleep(50);
> +
>

Re: [PATCH v4 5/14] input: cyapa: add read firmware image and raw data interfaces in debugfs system

2014-08-24 Thread Dmitry Torokhov

Hi Dudley,

On Thu, Jul 17, 2014 at 02:52:36PM +0800, Dudley Du wrote:
> Add read_fw and raw_data debugfs interfaces for easier issues location
> and collection when report by user.
> TEST=test on Chromebooks.
> 
> Signed-off-by: Dudley Du 
> ---
>  drivers/input/mouse/cyapa.c |  219 
> +++
>  1 file changed, 219 insertions(+)
> 
> diff --git a/drivers/input/mouse/cyapa.c b/drivers/input/mouse/cyapa.c
> index 53c9d59..ae24b02 100644
> --- a/drivers/input/mouse/cyapa.c
> +++ b/drivers/input/mouse/cyapa.c
> @@ -39,6 +39,8 @@
>  
>  const char unique_str[] = "CYTRA";
>  
> +/* Global root node of the cyapa debugfs directory. */
> +static struct dentry *cyapa_debugfs_root;
>  
>  
>  ssize_t cyapa_i2c_reg_read_block(struct cyapa *cyapa, u8 reg, size_t len,
> @@ -461,6 +463,205 @@ done:
>  }
>  
>  /*
> + **
> + * debugfs interface
> + **
> +*/
> +static int cyapa_debugfs_open(struct inode *inode, struct file *file)
> +{
> + struct cyapa *cyapa = inode->i_private;
> + int ret;
> +
> + if (!cyapa)
> + return -ENODEV;
> +
> + ret = mutex_lock_interruptible(>debugfs_mutex);
> + if (ret)
> + return ret;
> +
> + if (!kobject_get(>client->dev.kobj)) {

Why not get_device() here and elsewhere?

> + ret = -ENODEV;
> + goto out;
> + }
> +
> + file->private_data = cyapa;
> +
> + if (cyapa->fw_image) {
> + ret = 0;
> + goto out;
> + }
> +
> + mutex_lock(>state_sync_lock);
> + /*
> +  * If firmware hasn't been read yet, read it all in one pass.
> +  * Subsequent opens will reuse the data in this same buffer.
> +  */
> + if (cyapa->ops->read_fw)
> + ret = cyapa->ops->read_fw(cyapa);
> + else
> + ret = -EPERM;
> + mutex_unlock(>state_sync_lock);
> +
> + /* Redetect trackpad device states. */
> + cyapa_detect_async(cyapa, 0);
> +
> +out:
> + mutex_unlock(>debugfs_mutex);
> + return ret;
> +}
> +
> +static int cyapa_debugfs_release(struct inode *inode, struct file *file)
> +{
> + struct cyapa *cyapa = file->private_data;
> + int ret;
> +
> + if (!cyapa)
> + return 0;
> +
> + ret = mutex_lock_interruptible(>debugfs_mutex);
> + if (ret)
> + return ret;
> + file->private_data = NULL;
> + kobject_put(>client->dev.kobj);
> + mutex_unlock(>debugfs_mutex);
> +
> + return 0;
> +}
> +
> +/* Return some bytes from the buffered firmware image, starting from *ppos */
> +static ssize_t cyapa_debugfs_read_fw(struct file *file, char __user *buffer,
> +  size_t count, loff_t *ppos)
> +{
> + struct cyapa *cyapa = file->private_data;
> +
> + if (!cyapa->fw_image)
> + return -EINVAL;
> +
> + if (*ppos >= cyapa->fw_image_size)
> + return 0;
> +
> + if (count + *ppos > cyapa->fw_image_size)
> + count = cyapa->fw_image_size - *ppos;
> +
> + if (copy_to_user(buffer, >fw_image[*ppos], count))
> + return -EFAULT;
> +
> + *ppos += count;
> + return count;
> +}
> +
> +static const struct file_operations cyapa_read_fw_fops = {
> + .open = cyapa_debugfs_open,
> + .release = cyapa_debugfs_release,
> + .read = cyapa_debugfs_read_fw
> +};
> +
> +static int cyapa_debugfs_raw_data_open(struct inode *inode, struct file 
> *file)
> +{
> + struct cyapa *cyapa = inode->i_private;
> + int ret;
> +
> + if (!cyapa)
> + return -ENODEV;
> +
> + /* Start to be supported after Gen5 trackpad devices. */
> + if (cyapa->gen < CYAPA_GEN5)
> + return -ENOTSUPP;
> +
> + ret = mutex_lock_interruptible(>debugfs_mutex);
> + if (ret)
> + return ret;
> +
> + if (!kobject_get(>client->dev.kobj)) {
> + ret = -ENODEV;
> + goto out;
> + }
> +
> + file->private_data = cyapa;
> +
> + mutex_lock(>state_sync_lock);
> + if (cyapa->ops->read_raw_data)
> + ret = cyapa->ops->read_raw_data(cyapa);
> + else
> + ret = -EPERM;
> + mutex_unlock(>state_sync_lock);
> +out:
> + mutex_unlock(>debugfs_mutex);
> + return ret;
> +}
> +
> +static int cyapa_debugfs_raw_data_release(struct inode *inode,
> + struct file *file)
> +{
> + struct cyapa *cyapa = file->private_data;
> + int ret;
> +
> + if (!cyapa)
> + return 0;
> +
> + ret = mutex_lock_interruptible(>debugfs_mutex);
> + if (ret)
> + return ret;
> + file->private_data = NULL;
> + kobject_put(>client->dev.kobj);
> + mutex_unlock(>debugfs_mutex);
> +
> + return 0;
> +}
> +
> +/* Always return the sensors' latest raw data from trackpad device. */
> +static ssize_t cyapa_debugfs_read_raw_data(struct file

Re: [PATCH 09/11 v4] coresight: adding support for beagle and beagleXM

2014-08-24 Thread Marcin Jabrzyk

Hi,

W dniu 20.08.2014 o 19:03, mathieu.poir...@linaro.org pisze:
> From: Mathieu Poirier 
>
> Currently supporting ETM and ETB.  Support for TPIU
> and SDTI are yet to be added.
Did you tried running the drivers on board or are there any special
preparation needed?
I've BeagleBoard-xM Rev. C applied your patches and enabled the
functions the in menuconfig.
But on dmesg I see that:

[0.685028] of_amba_device_create(): amba_device_add() failed (-19)
for /etb@5401b000
[0.685119] of_amba_device_create(): amba_device_add() failed (-19)
for /etm@5401

There are no nodes according coresight in /sys/kernel/debug/ .

Best regards,
Marcin

>
> Signed-off-by: Mathieu Poirier 
> ---
>  arch/arm/boot/dts/omap3-beagle-xm.dts | 28 
>  arch/arm/boot/dts/omap3-beagle.dts| 28 
>  2 files changed, 56 insertions(+)
>
> diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts 
> b/arch/arm/boot/dts/omap3-beagle-xm.dts
> index 1becefc..eec73d8 100644
> --- a/arch/arm/boot/dts/omap3-beagle-xm.dts
> +++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
> @@ -145,6 +145,34 @@
>   };
>   };
>   };
> +
> + etb@5401b000 {
> + compatible = "arm,coresight-etb10", "arm,primecell";
> + reg = <0x5401b000 0x1000>;
> +
> + coresight-default-sink;
> + clocks = <_src_ck>;
> + clock-names = "apb_pclk";
> + port {
> + etb_in: endpoint {
> + slave-mode;
> + remote-endpoint = <_out>;
> + };
> + };
> + };
> +
> + etm@5401 {
> + compatible = "arm,coresight-etm3x", "arm,primecell";
> + reg = <0x5401 0x1000>;
> +
> + clocks = <_src_ck>;
> + clock-names = "apb_pclk";
> + port {
> + etm_out: endpoint {
> + remote-endpoint = <_in>;
> + };
> + };
> + };
>  };
>  
>  _pmx_wkup {
> diff --git a/arch/arm/boot/dts/omap3-beagle.dts 
> b/arch/arm/boot/dts/omap3-beagle.dts
> index 3c3e6da..a151daf 100644
> --- a/arch/arm/boot/dts/omap3-beagle.dts
> +++ b/arch/arm/boot/dts/omap3-beagle.dts
> @@ -140,6 +140,34 @@
>   };
>   };
>   };
> +
> + etb@54000 {
> + compatible = "arm,coresight-etb10", "arm,primecell";
> + reg = <0x5401b000 0x1000>;
> +
> + coresight-default-sink;
> + clocks = <_src_ck>;
> + clock-names = "apb_pclk";
> + port {
> + etb_in: endpoint {
> + slave-mode;
> + remote-endpoint = <_out>;
> + };
> + };
> + };
> +
> + etm@5401 {
> + compatible = "arm,coresight-etm3x", "arm,primecell";
> + reg = <0x5401 0x1000>;
> +
> + clocks = <_src_ck>;
> + clock-names = "apb_pclk";
> + port {
> + etm_out: endpoint {
> + remote-endpoint = <_in>;
> + };
> + };
> + };
>  };
>  
>  _pmx_wkup {

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[patch] mm: remove noisy remainder of the scan_unevictable interface

2014-08-24 Thread Johannes Weiner

The deprecation warnings for the scan_unevictable interface triggers
by scripts doing `sysctl -a | grep something else'.  This is annoying
and not helpful.

The interface has been defunct since 264e56d8247e ("mm: disable user
interface to manually rescue unevictable pages"), which was in 2011,
and there haven't been any reports of usecases for it, only reports
that the deprecation warnings are annying.  It's unlikely that anybody
is using this interface specifically at this point, so remove it.

Signed-off-by: Johannes Weiner 
---
 Documentation/ABI/stable/sysfs-devices-node |  8 
 drivers/base/node.c |  3 --
 include/linux/swap.h| 16 
 kernel/sysctl.c |  7 
 mm/vmscan.c | 63 -
 5 files changed, 97 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-devices-node 
b/Documentation/ABI/stable/sysfs-devices-node
index ce259c13c36a..5b2d0f08867c 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -85,14 +85,6 @@ Description:
will be compacted. When it completes, memory will be freed
into blocks which have as many contiguous pages as possible
 
-What:  /sys/devices/system/node/nodeX/scan_unevictable_pages
-Date:  October 2008
-Contact:   Lee Schermerhorn 
-Description:
-   When set, it triggers scanning the node's unevictable lists
-   and move any pages that have become evictable onto the 
respective
-   zone's inactive list. See mm/vmscan.c
-
 What:  /sys/devices/system/node/nodeX/hugepages/hugepages-/
 Date:  December 2009
 Contact:   Lee Schermerhorn 
diff --git a/drivers/base/node.c b/drivers/base/node.c
index c6d3ae05f1ca..52ed9f64bf9c 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -289,8 +289,6 @@ static int register_node(struct node *node, int num, struct 
node *parent)
device_create_file(>dev, _attr_distance);
device_create_file(>dev, _attr_vmstat);
 
-   scan_unevictable_register_node(node);
-
hugetlb_register_node(node);
 
compaction_register_node(node);
@@ -314,7 +312,6 @@ void unregister_node(struct node *node)
device_remove_file(>dev, _attr_distance);
device_remove_file(>dev, _attr_vmstat);
 
-   scan_unevictable_unregister_node(node);
hugetlb_unregister_node(node);  /* no-op, if memoryless node */
 
device_unregister(>dev);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f94614a2668a..37a585beef5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -356,22 +356,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, 
unsigned int order)
 extern int page_evictable(struct page *page);
 extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
-extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int,
-   void __user *, size_t *, loff_t *);
-#ifdef CONFIG_NUMA
-extern int scan_unevictable_register_node(struct node *node);
-extern void scan_unevictable_unregister_node(struct node *node);
-#else
-static inline int scan_unevictable_register_node(struct node *node)
-{
-   return 0;
-}
-static inline void scan_unevictable_unregister_node(struct node *node)
-{
-}
-#endif
-
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 #ifdef CONFIG_MEMCG
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75875a741b5e..91180987e40e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1460,13 +1460,6 @@ static struct ctl_table vm_table[] = {
.extra2 = ,
},
 #endif
-   {
-   .procname   = "scan_unevictable_pages",
-   .data   = _unevictable_pages,
-   .maxlen = sizeof(scan_unevictable_pages),
-   .mode   = 0644,
-   .proc_handler   = scan_unevictable_handler,
-   },
 #ifdef CONFIG_MEMORY_FAILURE
{
.procname   = "memory_failure_early_kill",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f1609423821b..d40b8ce3fb0b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3792,66 +3792,3 @@ void check_move_unevictable_pages(struct page **pages, 
int nr_pages)
}
 }
 #endif /* CONFIG_SHMEM */
-
-static void warn_scan_unevictable_pages(void)
-{
-   printk_once(KERN_WARNING
-   "%s: The scan_unevictable_pages sysctl/node-interface has 
been "
-   "disabled for lack of a legitimate use case.  If you have "
-   "one, please send an email to linux...@kvack.org.\n",
-   current->comm);
-}
-
-/*
- * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
- * all nodes' unevictable lists for

Re: [PATCH net-next 4/4] r8152: support firmware files

2014-08-24 Thread Francois Romieu

Hayes Wang  :
> diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
> index 937d132..63542cc 100644
> --- a/drivers/net/usb/r8152.c
> +++ b/drivers/net/usb/r8152.c
[...]
> +static void rtl_request_firmware(struct r8152 *tp)
> +{
> + char *fw_name = NULL;
> +
> + if (tp->rtl_fw.fw)
> + goto out_request;
> +
> + switch (tp->version) {
> + case RTL_VER_01:
> + fw_name = "rtl_nic/rtl8152-1.fw";
> + break;
> + case RTL_VER_02:
> + fw_name = "rtl_nic/rtl8152-2.fw";
> + break;
> + case RTL_VER_03:
> + fw_name = "rtl_nic/rtl8153-1.fw";
> + break;
> + case RTL_VER_04:
> + fw_name = "rtl_nic/rtl8153-2.fw";
> + break;
> + case RTL_VER_05:
> + fw_name = "rtl_nic/rtl8153-3.fw";
> + break;

The driver should use MODULE_FIRMWARE() for these files.

-- 
Ueimor
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] drm/exynos/fbdev: set smem_len for fbdev

2014-08-24 Thread Günther Noack


This patch works on my hardware.  Xorg starts up fine with fbdev now.
Thanks for the fix!

--Günther

Daniel Kurtz  writes:

> Commit [0] stopped setting fix.smem_start and fix.smem_len when creating
> the fbdev.
>
> [0] 2f1eab8d8ab59e799f7d51d62410b398607a7bc3
>   drm/exynos/fbdev: don't set fix.smem/mmio_{start,len}
>
> However, smem_len is used by some userland applications to calculate the
> size for mmap.  In particular, it is used by xf86-video-fbdev:
>
> http://cgit.freedesktop.org/xorg/xserver/tree/hw/xfree86/fbdevhw/fbdevhw.c?id=xorg-server-1.15.99.903#n571
>
> So, let's restore setting the smem_len to unbreak things for these users.
>
> Note: we are still leaving smem_start set to 0.
>
> Reported-by: Siarhei Siamashka 
> Reported-by: Günther Noack 
> Signed-off-by: Daniel Kurtz 
> ---
> I am able to compile test this change, but would appreciate help testing it
> on a system that uses xf86-video-fbdev.
>
> Thanks!
>
>  drivers/gpu/drm/exynos/exynos_drm_fbdev.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c 
> b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
> index 5a7cd8b..f865736 100644
> --- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
> +++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
> @@ -125,6 +125,7 @@ static int exynos_drm_fbdev_update(struct drm_fb_helper 
> *helper,
>  
>   fbi->screen_base = buffer->kvaddr;
>   fbi->screen_size = size;
> + fbi->fix.smem_len = size;
>  
>   return 0;
>  }
> -- 
> 2.1.0.rc2.206.gedb03e5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

scsi logging future directions, was Re: [RFC PATCH -logging 00/10] scsi/constants: Output continuous error messages on trace

2014-08-24 Thread Christoph Hellwig

On Fri, Aug 22, 2014 at 12:39:59AM +, Elliott, Robert (Server Storage) 
wrote:
> If you trigger hundreds of errors (e.g., hot remove a device
> during heavy IO), then all the prints to the linux serial console
> bog down the system, causing timeouts in commands to other
> devices and soft lockups for applications.
> 
> Some changes that would help are:
> 1. Put them under SCSI logging level control
> 2. Use printk_ratelimited so an excessive number are trimmed
> 
> Would you like to include something like this in your
> patch set?

I think we should come to an agreement where we want to go with scsi
logging first before doing various smaller adjustments.  (Although your
example is one that's urgent enough that I'd like to put it in ASAP,
I had issues with it a few times).

I had a chat with Martin at Linuxcon about these issues, and we were
both in favor of getting rid of the old scsi logging mechansisms and
instead replace it by an extended version of the scsi tracepoints that
cover all places, and dump all data from the old logging mechanism
that people find useful.

In a few places we'd still want to log normal dev_printk style errors,
and the I/O completion is one of them, even if they really need to be
ratelimited and condensed.

If someone has arguments in favour of keeping the old logging code
I'd love to hear them, but in practive the traceevent code has huge
benefits:

 - almost zero overhead if disabled
 - can easily be used without any tools through configs, but can be used
   even better with tools like trace-cmd or perf
 - allows both fine and coarse grained selections of events to trace
 - allows to capture statistics on each trace point without event enabling the
   output
 - doesn't have any of the console lockup problems.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] [v3] warn on performance-impacting configs aka. TAINT_PERFORMANCE

2014-08-24 Thread Dave Hansen

On 08/24/2014 07:49 AM, Ingo Molnar wrote:
 > >> + buf_left = buf_len;
 > >> + for (i = 0; i < ARRAY_SIZE(perfomance_killing_configs); i++) {
 > >> + buf_written += snprintf(buf + buf_written, buf_left,
 > >> + "%s%s\n", config_prefix,
 > >> + perfomance_killing_configs[i]);
 > >> + buf_left = buf_len - buf_written;
...
>>> > > Also, do you want to check buf_left and break out early from 
>>> > > the loop if it goes non-positive?
>> > 
>> > You're slowly inflating my patch for no practical gain. :)
> AFAICS it's a potential memory corruption and security bug, 
> should the array ever grow large enough to overflow the passed
> in buffer size.

Let's say there is 1 "buf_left" and I attempt a 100-byte snprintf().
Won't snprintf() return 1, and buf_written will then equal buf_len?
buf_left=0 at that point, and will get passed in to the next snprintf()
as the buffer length.  I'm expecting snprintf() to just return 0 when it
gets a 0 for its 'size'.

Exhausting the buffer will, at worst, mean a bunch of useless calls to
snprintf() that do nothing, but I don't think it will run over the end
of the buffer.

Or am I missing something?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] arm64/crypto: remove redundant update of data

2014-08-24 Thread Colin King

From: Colin Ian King 

Originally found by cppcheck:

[arch/arm64/crypto/sha2-ce-glue.c:153]: (warning) Assignment of
  function parameter has no effect outside the function. Did you
  forget dereferencing it?

Updating data by blocks * SHA256_BLOCK_SIZE at the end of
ha2_finup is redundant code and can be removed.

Signed-off-by: Colin Ian King 
---
 arch/arm64/crypto/sha2-ce-glue.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index c294e67..ae67e88 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -150,7 +150,6 @@ static void sha2_finup(struct shash_desc *desc, const u8 
*data,
kernel_neon_begin_partial(28);
sha2_ce_transform(blocks, data, sctx->state, NULL, len);
kernel_neon_end();
-   data += blocks * SHA256_BLOCK_SIZE;
 }
 
 static int sha224_finup(struct shash_desc *desc, const u8 *data,
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V4 1/8] namespaces: assign each namespace instance a serial number

2014-08-24 Thread Richard Guy Briggs

On 14/08/23, Eric W. Biederman wrote:
> Richard Guy Briggs  writes:
> 
> > Generate and assign a serial number per namespace instance since boot.
> >
> > Use a serial number per namespace (unique across one boot of one kernel)
> > instead of the inode number (which is claimed to have had the right to 
> > change
> > reserved and is not necessarily unique if there is more than one proc fs) to
> > uniquely identify it per kernel boot.
> 
> This approach is just broken.
> 
> For this to work with migration (aka criu) you need to implement a
> namespace of namespaces.  You haven't done this, and therefore
> such an interface will break existing userspace.
> 
> Inside of audit I can understand not caring about these issues,
> but you go foward and expose these serial numbers in proc,
> and generally make this infrastructure available to others.
> 
> The deep issue with migration is that we move tasks from one machine
> from another and on the destination machine we need to have all of the
> same global identifiers for software to function properly.
> 
> My weasel words around the proc inode numbers is to preserve to allow us
> room to be able to restore those ids if it every becomes relevant for
> migration.

What do you do if the inode number is already in use on the target host?

> That is the proc inode numbers (technically) live in a pid namespace,
> (aka a mount of proc).  So depending on the pid namespace you are in
> or the mount of proc you look in the numbers could change.
> 
> Qualifications like that must exist to have a prayer of ever supporting
> process migration in the crazy corner cases where people start caring
> about inode numbers.
> 
> We currently don't and inode numbers for a namespace will never change
> after a namespace is created.  So I think you really are ok using the
> proc inode numbers.  I am happy declaring by fiat that the inode numbers
> that audit uses are the numbers connected to the initial pid namespace.

But once a namespace/container is migrated, it is a different audit that
is looking at it (unless we create an audit manager or entity that
functions at the level of a container manager), so audit should not care.

> At a fairly basic level anything that is used to identify namespaces for
> any general purpose use needs to have most if not all of the same
> properties of the proc inode numbers.  The most important of which is
> being tied to some context/namespace so there is a ability if we ever
> need it to migrate those numbers from one machine to another.

Sooo...  does it make any sense to have those inode or serial numbers be
blank inside the namespace/container itself, but only visible to its
manager outside the container (unless it is the initial namespace)?

> Eric
> 
> > diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> > index 8e78110..93cb380 100644
> > --- a/kernel/nsproxy.c
> > +++ b/kernel/nsproxy.c
> > @@ -41,6 +41,23 @@ struct nsproxy init_nsproxy = {
> >  #endif
> >  };
> >  
> > +/**
> > + * ns_serial - compute a serial number for the namespace
> > + *
> > + * Compute a serial number for the namespace to uniquely identify it in
> > + * audit records.
> > + */
> > +long long ns_serial(void)
> > +{
> > +   static atomic64_t serial = ATOMIC_INIT(4); /* reserved for IPC, UTS, 
> > user, PID */
> > +   long long ret;
> > +
> > +   ret = atomic64_add_return(1, );
> > +   BUG_ON(!ret);
> > +
> > +   return ret;
> > +}
> > +
> >  static inline struct nsproxy *create_nsproxy(void)
> >  {
> > struct nsproxy *nsproxy;
> 
> --
> Linux-audit mailing list
> linux-au...@redhat.com
> https://www.redhat.com/mailman/listinfo/linux-audit

- RGB

--
Richard Guy Briggs 
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red 
Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 net-next 02/29] net: filter: add "load 64-bit immediate" eBPF instruction

2014-08-24 Thread Alexei Starovoitov

add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register.
All previous instructions were 8-byte. This is first 16-byte instruction.
Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction:
insn[0].code = BPF_LD | BPF_DW | BPF_IMM
insn[0].dst_reg = destination register
insn[0].imm = lower 32-bit
insn[1].code = 0
insn[1].imm = upper 32-bit
All unused fields must be zero.

Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM
which loads 32-bit immediate value into a register.

x64 JITs it as single 'movabsq %rax, imm64'
arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn

Note that old eBPF programs are binary compatible with new interpreter.

In the following patches this instruction is used to store eBPF map pointers:
 BPF_LD_IMM64(R1, const_imm_map_ptr)
 BPF_CALL(BPF_FUNC_map_lookup_elem)
and verifier is introduced to check validity of the programs.

Later LLVM compiler is using this insn as generic load of 64-bit immediate
constant and as a load of map pointer with relocation.

Signed-off-by: Alexei Starovoitov 
---
 Documentation/networking/filter.txt |8 +++-
 arch/x86/net/bpf_jit_comp.c |   17 +
 include/linux/filter.h  |   18 ++
 kernel/bpf/core.c   |5 +
 lib/test_bpf.c  |   21 +
 5 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/filter.txt 
b/Documentation/networking/filter.txt
index c48a9704bda8..81916ab5d96f 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -951,7 +951,7 @@ Size modifier is one of ...
 
 Mode modifier is one of:
 
-  BPF_IMM  0x00  /* classic BPF only, reserved in eBPF */
+  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
   BPF_ABS  0x20
   BPF_IND  0x40
   BPF_MEM  0x60
@@ -995,6 +995,12 @@ BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + 
off16) += src_reg
 Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
 2 byte atomic increments are not supported.
 
+eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single
+instruction that loads 64-bit immediate value into a dst_reg.
+Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
+32-bit immediate value into a register.
+
 Testing
 ---
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b08a98c59530..98837147ee57 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -393,6 +393,23 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
break;
 
+   case BPF_LD | BPF_IMM | BPF_DW:
+   if (insn[1].code != 0 || insn[1].src_reg != 0 ||
+   insn[1].dst_reg != 0 || insn[1].off != 0) {
+   /* verifier must catch invalid insns */
+   pr_err("invalid BPF_LD_IMM64 insn\n");
+   return -EINVAL;
+   }
+
+   /* movabsq %rax, imm64 */
+   EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+   EMIT(insn[0].imm, 4);
+   EMIT(insn[1].imm, 4);
+
+   insn++;
+   i++;
+   break;
+
/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU | BPF_DIV | BPF_X:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5227ab8ccb1..f3262b598262 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -161,6 +161,24 @@ enum {
.off   = 0, \
.imm   = IMM })
 
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM) \
+   BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)\
+   ((struct bpf_insn) {\
+   .code  = BPF_LD | BPF_DW | BPF_IMM, \
+   .dst_reg = DST, \
+   .src_reg = SRC, \
+   .off   = 0, \
+   .imm   = (__u32) (IMM) }),  \
+   ((struct bpf_insn) {\
+   .code  = 0, /* zero is reserved opcode */   \
+   .dst_reg = 0,   \
+   .src_reg = 0,   \
+   .off   =

[PATCH v5 net-next 01/29] bpf: x86: add missing 'shift by register' instructions to x64 eBPF JIT

2014-08-24 Thread Alexei Starovoitov

'shift by register' operations are supported by eBPF interpreter, but were
accidently left out of x64 JIT compiler. Fix it and add a testcase.

Signed-off-by: Alexei Starovoitov 
---
 arch/x86/net/bpf_jit_comp.c |   42 ++
 lib/test_bpf.c  |   38 ++
 2 files changed, 80 insertions(+)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 5c8cb8043c5a..b08a98c59530 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -515,6 +515,48 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, 
u8 *image,
EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
break;
 
+   case BPF_ALU | BPF_LSH | BPF_X:
+   case BPF_ALU | BPF_RSH | BPF_X:
+   case BPF_ALU | BPF_ARSH | BPF_X:
+   case BPF_ALU64 | BPF_LSH | BPF_X:
+   case BPF_ALU64 | BPF_RSH | BPF_X:
+   case BPF_ALU64 | BPF_ARSH | BPF_X:
+
+   /* check for bad case when dst_reg == rcx */
+   if (dst_reg == BPF_REG_4) {
+   /* mov r11, dst_reg */
+   EMIT_mov(AUX_REG, dst_reg);
+   dst_reg = AUX_REG;
+   }
+
+   if (src_reg != BPF_REG_4) { /* common case */
+   EMIT1(0x51); /* push rcx */
+
+   /* mov rcx, src_reg */
+   EMIT_mov(BPF_REG_4, src_reg);
+   }
+
+   /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */
+   if (BPF_CLASS(insn->code) == BPF_ALU64)
+   EMIT1(add_1mod(0x48, dst_reg));
+   else if (is_ereg(dst_reg))
+   EMIT1(add_1mod(0x40, dst_reg));
+
+   switch (BPF_OP(insn->code)) {
+   case BPF_LSH: b3 = 0xE0; break;
+   case BPF_RSH: b3 = 0xE8; break;
+   case BPF_ARSH: b3 = 0xF8; break;
+   }
+   EMIT2(0xD3, add_1reg(b3, dst_reg));
+
+   if (src_reg != BPF_REG_4)
+   EMIT1(0x59); /* pop rcx */
+
+   if (insn->dst_reg == BPF_REG_4)
+   /* mov dst_reg, r11 */
+   EMIT_mov(insn->dst_reg, AUX_REG);
+   break;
+
case BPF_ALU | BPF_END | BPF_FROM_BE:
switch (imm32) {
case 16:
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 89e0345733bd..8c66c6aace04 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1342,6 +1342,44 @@ static struct bpf_test tests[] = {
{ { 0, -1 } }
},
{
+   "INT: shifts by register",
+   .u.insns_int = {
+   BPF_MOV64_IMM(R0, -1234),
+   BPF_MOV64_IMM(R1, 1),
+   BPF_ALU32_REG(BPF_RSH, R0, R1),
+   BPF_JMP_IMM(BPF_JEQ, R0, 0x7d97, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(R2, 1),
+   BPF_ALU64_REG(BPF_LSH, R0, R2),
+   BPF_MOV32_IMM(R4, -1234),
+   BPF_JMP_REG(BPF_JEQ, R0, R4, 1),
+   BPF_EXIT_INSN(),
+   BPF_ALU64_IMM(BPF_AND, R4, 63),
+   BPF_ALU64_REG(BPF_LSH, R0, R4), /* R0 <= 46 */
+   BPF_MOV64_IMM(R3, 47),
+   BPF_ALU64_REG(BPF_ARSH, R0, R3),
+   BPF_JMP_IMM(BPF_JEQ, R0, -617, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(R2, 1),
+   BPF_ALU64_REG(BPF_LSH, R4, R2), /* R4 = 46 << 1 */
+   BPF_JMP_IMM(BPF_JEQ, R4, 92, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(R4, 4),
+   BPF_ALU64_REG(BPF_LSH, R4, R4), /* R4 = 4 << 4 */
+   BPF_JMP_IMM(BPF_JEQ, R4, 64, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(R4, 5),
+   BPF_ALU32_REG(BPF_LSH, R4, R4), /* R4 = 5 << 5 */
+   BPF_JMP_IMM(BPF_JEQ, R4, 160, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(R0, -1),
+   BPF_EXIT_INSN(),
+   },
+   INTERNAL,
+   { },
+   { { 0, -1 } }
+   },
+   {
"INT: DIV + ABS",
.u.insns_int = {
BPF_ALU64_REG(BPF_MOV, R6, R1),
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More

[PATCH v5 net-next 06/29] bpf: add lookup/update/delete/iterate methods to BPF maps

2014-08-24 Thread Alexei Starovoitov

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf_map_create(map_type, struct nlattr *attr, int len)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf_map_lookup_elem(int fd, void *key, void *value)
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf_map_update_elem(int fd, void *key, void *value)
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf_map_delete_elem(int fd, void *key)

- iterate map elements (based on input key return next_key)
  err = bpf_map_get_next_key(int fd, void *key, void *next_key)

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov 
---
 include/linux/bpf.h  |8 ++
 include/uapi/linux/bpf.h |   25 ++
 kernel/bpf/syscall.c |  198 ++
 3 files changed, 231 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 607ca53fe2af..fd1ac4b5ba8b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
 
 #include 
 #include 
+#include 
 
 struct bpf_map;
 struct nlattr;
@@ -18,6 +19,12 @@ struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
struct bpf_map *(*map_alloc)(struct nlattr *attrs[BPF_MAP_ATTR_MAX + 
1]);
void (*map_free)(struct bpf_map *);
+   int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+   /* funcs callable from userspace and from eBPF programs */
+   void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+   int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+   int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
@@ -38,5 +45,6 @@ struct bpf_map_type_list {
 
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
+struct bpf_map *bpf_map_get(struct fd f);
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index de21c8ecf0bb..f68edb2681f8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -326,6 +326,31 @@ enum bpf_cmd {
 * map is deleted when fd is closed
 */
BPF_MAP_CREATE,
+
+   /* lookup key in a given map
+* err = bpf_map_lookup_elem(int fd, void *key, void *value)
+* returns zero and stores found elem into value
+* or negative error
+*/
+   BPF_MAP_LOOKUP_ELEM,
+
+   /* create or update key/value pair in a given map
+* err = bpf_map_update_elem(int fd, void *key, void *value)
+* returns zero or negative error
+*/
+   BPF_MAP_UPDATE_ELEM,
+
+   /* find and delete elem by key in a given map
+* err = bpf_map_delete_elem(int fd, void *key)
+* returns zero or negative error
+*/
+   BPF_MAP_DELETE_ELEM,
+
+   /* lookup key in a given map and return next key
+* err = bpf_map_get_elem(int fd, void *key, void *next_key)
+* returns zero and stores next key or negative error
+*/
+   BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_attributes {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 04cdf7948f8f..45e100ece1b7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static LIST_HEAD(bpf_map_types);
 
@@ -131,6 +132,189 @@ free_attr:
return err;
 }
 
+/* if error is returned, fd is released.
+ * On success caller should complete fd access with matching fdput()
+ */
+struct bpf_map *bpf_map_get(struct fd f)
+{
+   struct bpf_map *map;
+
+   if (!f.file)
+   return ERR_PTR(-EBADF);
+
+   if (f.file->f_op != _map_fops) {
+   fdput(f);
+   return ERR_PTR(-EINVAL);
+   }
+
+   map = f.file->private_data;
+
+   return map;
+}
+
+static int map_lookup_elem(int ufd, void __user *ukey, void __user *uvalue)
+{
+   struct fd f = fdget(ufd);
+   struct bpf_map *map;
+   void *key, *value;
+   int err;
+
+   map = bpf_map_get(f);
+   if (IS_ERR(map))
+   return PTR_ERR(map);
+
+   err = -ENOMEM;
+   key = kmalloc(map->key_size, GFP_USER);
+   if (!key)
+   goto err_put;
+
+   err = -EFAULT;
+   if (copy_from_user(key, ukey, map->key_size) != 0)
+   goto free_key;
+
+   err = -ESRCH;
+   rcu_read_lock();
+   value = map->ops->map_lookup_elem(map, key);
+   if (!value)
+   goto err_unlock;
+
+   err = -EFAULT;
+   if (copy_to_user(uvalue, value, map->value_size) != 0)
+   goto err_unlock;
+
+   err = 0;
+
+err_unlock:
+   rcu_read_unlock();
+free_key:
+   kfree(key);
+err_put:

[PATCH v5 net-next 05/29] bpf: enable bpf syscall on x64 and i386

2014-08-24 Thread Alexei Starovoitov

done as separate commit to ease conflict resolution

Signed-off-by: Alexei Starovoitov 
---
 arch/x86/syscalls/syscall_32.tbl  |1 +
 arch/x86/syscalls/syscall_64.tbl  |1 +
 include/linux/syscalls.h  |3 ++-
 include/uapi/asm-generic/unistd.h |4 +++-
 kernel/sys_ni.c   |3 +++
 5 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 028b78168d85..9fe1b5d002f0 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -363,3 +363,4 @@
 354i386seccomp sys_seccomp
 355i386getrandom   sys_getrandom
 356i386memfd_createsys_memfd_create
+357i386bpf sys_bpf
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 35dd922727b9..281150b539a2 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -327,6 +327,7 @@
 318common  getrandom   sys_getrandom
 319common  memfd_createsys_memfd_create
 320common  kexec_file_load sys_kexec_file_load
+321common  bpf sys_bpf
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0f86d85a9ce4..61bc112b9fa5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -875,5 +875,6 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int 
flags,
const char __user *uargs);
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
  unsigned int flags);
-
+asmlinkage long sys_bpf(int cmd, unsigned long arg2, unsigned long arg3,
+   unsigned long arg4, unsigned long arg5);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h 
b/include/uapi/asm-generic/unistd.h
index 11d11bc5c78f..22749c134117 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp)
 __SYSCALL(__NR_getrandom, sys_getrandom)
 #define __NR_memfd_create 279
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
 
 #undef __NR_syscalls
-#define __NR_syscalls 280
+#define __NR_syscalls 281
 
 /*
  * All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 391d4ddb6f4b..b4b5083f5f5e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -218,3 +218,6 @@ cond_syscall(sys_kcmp);
 
 /* operate on Secure Computing state */
 cond_syscall(sys_seccomp);
+
+/* access BPF programs and maps */
+cond_syscall(sys_bpf);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v5 net-next 04/29] bpf: introduce syscall(BPF, ...) and BPF maps

2014-08-24 Thread Alexei Starovoitov

BPF syscall is a demux for different BPF releated commands.

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps can be created from user space via BPF syscall:
- create a map with given type and attributes
  fd = bpf_map_create(map_type, struct nlattr *attr, int len)
  returns fd or negative error

- close(fd) deletes the map

Next patch allows userspace programs to populate/read maps that eBPF programs
are concurrently updating.

maps can have different types: hash, bloom filter, radix-tree, etc.

The map is defined by:
  . type
  . max number of elements
  . key size in bytes
  . value size in bytes

This patch establishes core infrastructure for BPF maps.
Next patches implement lookup/update and hashtable type.
More map types can be added in the future.

syscall is using type-length-value style of passing arguments to be backwards
compatible with future extensions to map attributes. Different map types may
use different attributes as well.
The concept of type-lenght-value is borrowed from netlink, but netlink itself
is not applicable here, since BPF programs and maps can be used in NET-less
configurations.

Signed-off-by: Alexei Starovoitov 
---
 Documentation/networking/filter.txt |   71 
 include/linux/bpf.h |   42 ++
 include/uapi/linux/bpf.h|   24 ++
 kernel/bpf/Makefile |2 +-
 kernel/bpf/syscall.c|  156 +++
 5 files changed, 294 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/bpf.h
 create mode 100644 kernel/bpf/syscall.c

diff --git a/Documentation/networking/filter.txt 
b/Documentation/networking/filter.txt
index 81916ab5d96f..27a0a6c6acb4 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,77 @@ instruction that loads 64-bit immediate value into a 
dst_reg.
 Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
 32-bit immediate value into a register.
 
+eBPF maps
+-
+'maps' is a generic storage of different types for sharing data between kernel
+and userspace.
+
+The maps are accessed from user space via BPF syscall, which has commands:
+- create a map with given type and attributes
+  map_fd = bpf_map_create(map_type, struct nlattr *attr, int len)
+  returns process-local file descriptor or negative error
+
+- lookup key in a given map
+  err = bpf_map_lookup_elem(int fd, void *key, void *value)
+  returns zero and stores found elem into value or negative error
+
+- create or update key/value pair in a given map
+  err = bpf_map_update_elem(int fd, void *key, void *value)
+  returns zero or negative error
+
+- find and delete element by key in a given map
+  err = bpf_map_delete_elem(int fd, void *key)
+
+- to delete map: close(fd)
+  Exiting process will delete maps automatically
+
+userspace programs uses this API to create/populate/read maps that eBPF 
programs
+are concurrently updating.
+
+maps can have different types: hash, array, bloom filter, radix-tree, etc.
+
+The map is defined by:
+  . type
+  . max number of elements
+  . key size in bytes
+  . value size in bytes
+
+The maps are accesible from eBPF program with API:
+  void * bpf_map_lookup_elem(u32 map_fd, void *key);
+  int bpf_map_update_elem(u32 map_fd, void *key, void *value);
+  int bpf_map_delete_elem(u32 map_fd, void *key);
+
+The kernel replaces process-local map_fd with kernel internal map pointer,
+while loading eBPF program.
+
+If eBPF verifier is configured to recognize extra calls in the program
+bpf_map_lookup_elem() and bpf_map_update_elem() then access to maps looks like:
+  ...
+  ptr_to_value = bpf_map_lookup_elem(map_fd, key)
+  access memory range [ptr_to_value, ptr_to_value + value_size_in_bytes)
+  ...
+  prepare key2 and value2 on stack of key_size and value_size
+  err = bpf_map_update_elem(map_fd, key2, value2)
+  ...
+
+eBPF program cannot create or delete maps
+(such calls will be unknown to verifier)
+
+During program loading the refcnt of used maps is incremented, so they don't 
get
+deleted while program is running
+
+bpf_map_update_elem() can fail if maximum number of elements reached.
+if key2 already exists, bpf_map_update_elem() replaces it with value2 
atomically
+
+bpf_map_lookup_elem() returns NULL or ptr_to_value, so program must do
+if (ptr_to_value != NULL) check before accessing it.
+NULL means that element with given 'key' was not found.
+
+The verifier will check that the program accesses map elements within specified
+size. It will not let programs pass junk values to bpf_map_*_elem() functions,
+so these functions (implemented in C inside kernel) can safely access
+the pointers in all cases.
+
 Testing
 ---
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index ..607ca53fe2af
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,42 @@
+/* Copyright (c) 2011-2014

[PATCH v5 net-next 07/29] bpf: add hashtable type of BPF maps

2014-08-24 Thread Alexei Starovoitov

add new map type: BPF_MAP_TYPE_HASH
and its simple (not auto resizeable) hash table implementation

Signed-off-by: Alexei Starovoitov 
---
 include/uapi/linux/bpf.h |1 +
 kernel/bpf/Makefile  |2 +-
 kernel/bpf/hashtab.c |  372 ++
 3 files changed, 374 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/hashtab.c

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f68edb2681f8..8069ab7b64cf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -365,6 +365,7 @@ enum bpf_map_attributes {
 
 enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
+   BPF_MAP_TYPE_HASH,
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e9f7334ed07a..558e12712ebc 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1 @@
-obj-y := core.o syscall.o
+obj-y := core.o syscall.o hashtab.o
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
new file mode 100644
index ..bc8d32f0f720
--- /dev/null
+++ b/kernel/bpf/hashtab.c
@@ -0,0 +1,372 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include 
+#include 
+#include 
+
+struct bpf_htab {
+   struct bpf_map map;
+   struct hlist_head *buckets;
+   struct kmem_cache *elem_cache;
+   spinlock_t lock;
+   u32 count; /* number of elements in this hashtable */
+   u32 n_buckets; /* number of hash buckets */
+   u32 elem_size; /* size of each element in bytes */
+};
+
+/* each htab element is struct htab_elem + key + value */
+struct htab_elem {
+   struct hlist_node hash_node;
+   struct rcu_head rcu;
+   struct bpf_htab *htab;
+   u32 hash;
+   u32 pad;
+   char key[0];
+};
+
+#define HASH_MAX_BUCKETS 1024
+#define BPF_MAP_MAX_KEY_SIZE 256
+static struct bpf_map *htab_map_alloc(struct nlattr *attr[BPF_MAP_ATTR_MAX + 
1])
+{
+   struct bpf_htab *htab;
+   int err, i;
+
+   htab = kzalloc(sizeof(*htab), GFP_USER);
+   if (!htab)
+   return ERR_PTR(-ENOMEM);
+
+   /* look for mandatory map attributes */
+   err = -EINVAL;
+   if (!attr[BPF_MAP_KEY_SIZE])
+   goto free_htab;
+   htab->map.key_size = nla_get_u32(attr[BPF_MAP_KEY_SIZE]);
+
+   if (!attr[BPF_MAP_VALUE_SIZE])
+   goto free_htab;
+   htab->map.value_size = nla_get_u32(attr[BPF_MAP_VALUE_SIZE]);
+
+   if (!attr[BPF_MAP_MAX_ENTRIES])
+   goto free_htab;
+   htab->map.max_entries = nla_get_u32(attr[BPF_MAP_MAX_ENTRIES]);
+
+   htab->n_buckets = (htab->map.max_entries <= HASH_MAX_BUCKETS) ?
+ htab->map.max_entries : HASH_MAX_BUCKETS;
+
+   /* hash table size must be power of 2 */
+   if ((htab->n_buckets & (htab->n_buckets - 1)) != 0)
+   goto free_htab;
+
+   err = -E2BIG;
+   if (htab->map.key_size > BPF_MAP_MAX_KEY_SIZE)
+   goto free_htab;
+
+   err = -ENOMEM;
+   htab->buckets = kmalloc_array(htab->n_buckets,
+ sizeof(struct hlist_head), GFP_USER);
+
+   if (!htab->buckets)
+   goto free_htab;
+
+   for (i = 0; i < htab->n_buckets; i++)
+   INIT_HLIST_HEAD(>buckets[i]);
+
+   spin_lock_init(>lock);
+   htab->count = 0;
+
+   htab->elem_size = sizeof(struct htab_elem) +
+ round_up(htab->map.key_size, 8) +
+ htab->map.value_size;
+
+   htab->elem_cache = kmem_cache_create("bpf_htab", htab->elem_size, 0, 0,
+NULL);
+   if (!htab->elem_cache)
+   goto free_buckets;
+
+   return >map;
+
+free_buckets:
+   kfree(htab->buckets);
+free_htab:
+   kfree(htab);
+   return ERR_PTR(err);
+}
+
+static inline u32 htab_map_hash(const void *key, u32 key_len)
+{
+   return jhash(key, key_len, 0);
+}
+
+static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
+{
+   return >buckets[hash & (htab->n_buckets - 1)];
+}
+
+static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
+void *key, u32 key_size)
+{
+   struct htab_elem *l;
+
+   hlist_for_each_entry_rcu(l, head, hash_node) {
+   if (l->hash == hash && !memcmp(>key, key, key_size))
+   return l;
+   }
+   return NULL;
+}
+
+/* Must be called with rcu_read_lock. */
+static void

[PATCH v5 net-next 08/29] bpf: expand BPF syscall with program load/unload

2014-08-24 Thread Alexei Starovoitov

eBPF programs are safe run-to-completion functions with load/unload
methods from userspace similar to kernel modules.

User space API:

- load eBPF program
  fd = bpf_prog_load(bpf_prog_type, struct nlattr *prog, int len)

  where 'prog' is a sequence of sections (TEXT, LICENSE)
  TEXT - array of eBPF instructions
  LICENSE - must be GPL compatible to call helper functions marked gpl_only

- unload eBPF program
  close(fd)

User space example of syscall(__NR_bpf, BPF_PROG_LOAD, prog_type, ...)
follows in later patches

Signed-off-by: Alexei Starovoitov 
---
 include/linux/bpf.h  |   36 +
 include/linux/filter.h   |9 ++-
 include/uapi/linux/bpf.h |   28 +++
 kernel/bpf/syscall.c |  196 ++
 net/core/filter.c|2 +
 5 files changed, 269 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fd1ac4b5ba8b..ac6320f44812 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -47,4 +47,40 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF 
programs
+ * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
+ * instructions after verifying
+ */
+struct bpf_func_proto {
+   u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+   bool gpl_only;
+};
+
+struct bpf_verifier_ops {
+   /* return eBPF function prototype for verification */
+   const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id 
func_id);
+};
+
+struct bpf_prog_type_list {
+   struct list_head list_node;
+   struct bpf_verifier_ops *ops;
+   enum bpf_prog_type type;
+};
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
+struct bpf_prog_info {
+   atomic_t refcnt;
+   bool is_gpl_compatible;
+   enum bpf_prog_type prog_type;
+   struct bpf_verifier_ops *ops;
+   struct bpf_map **used_maps;
+   u32 used_map_cnt;
+};
+
+struct bpf_prog;
+
+void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f04793474d16..f06913b29861 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -31,11 +31,16 @@ struct sock_fprog_kern {
 struct sk_buff;
 struct sock;
 struct seccomp_data;
+struct bpf_prog_info;
 
 struct bpf_prog {
u32 jited:1,/* Is our filter JIT'ed? */
-   len:31; /* Number of filter blocks */
-   struct sock_fprog_kern  *orig_prog; /* Original BPF program */
+   has_info:1, /* whether 'info' is valid */
+   len:30; /* Number of filter blocks */
+   union {
+   struct sock_fprog_kern  *orig_prog; /* Original BPF program 
*/
+   struct bpf_prog_info*info;
+   };
unsigned int(*bpf_func)(const struct sk_buff *skb,
const struct bpf_insn *filter);
union {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8069ab7b64cf..7468fe55db7b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -351,6 +351,13 @@ enum bpf_cmd {
 * returns zero and stores next key or negative error
 */
BPF_MAP_GET_NEXT_KEY,
+
+   /* verify and load eBPF program
+* prog_id = bpf_prog_load(bpf_prog_type, struct nlattr *prog, int len)
+* prog is a sequence of sections
+* returns fd or negative error
+*/
+   BPF_PROG_LOAD,
 };
 
 enum bpf_map_attributes {
@@ -368,4 +375,25 @@ enum bpf_map_type {
BPF_MAP_TYPE_HASH,
 };
 
+enum bpf_prog_attributes {
+   BPF_PROG_UNSPEC,
+   BPF_PROG_TEXT,  /* array of eBPF instructions */
+   BPF_PROG_LICENSE,   /* license string */
+   __BPF_PROG_ATTR_MAX,
+};
+#define BPF_PROG_ATTR_MAX (__BPF_PROG_ATTR_MAX - 1)
+#define BPF_PROG_MAX_ATTR_SIZE 65535
+
+enum bpf_prog_type {
+   BPF_PROG_TYPE_UNSPEC,
+};
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+   BPF_FUNC_unspec,
+   __BPF_FUNC_MAX_ID,
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 45e100ece1b7..4c5f5169f6fc 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -14,6 +14,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 static LIST_HEAD(bpf_map_types);
 
@@ -315,6 +317,197 @@ err_put:
return err;
 }
 
+static LIST_HEAD(bpf_prog_types);
+
+static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
+{
+   struct bpf_prog_type_list *tl;
+
+   list_for_each_entry(tl, _prog_types, list_node)

1 2 3 4 5 >

1 - 100 of 456 matches

Mail list logo