[PATCH RFT] regulator: s5m8767: Convert to use regulator_[enable|disable|is_enabled]_regmap

2014-03-20 Thread Axel Lin
Since commit ca5d1b3524b4d
"regulator: helpers: Modify helpers enabling multi-bit control",
we can set enable_val setting for device that use multiple bits for control.

Signed-off-by: Axel Lin 
---
 drivers/regulator/s5m8767.c | 78 +++--
 1 file changed, 19 insertions(+), 59 deletions(-)

diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index 342a66ab..f05bada 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -167,12 +167,11 @@ static unsigned int s5m8767_opmode_reg[][4] = {
{0x0, 0x3, 0x1, 0x1}, /* BUCK9 */
 };
 
-static int s5m8767_get_register(struct regulator_dev *rdev, int *reg,
-   int *enable_ctrl)
+static int s5m8767_get_register(struct s5m8767_info *s5m8767, int reg_id,
+   int *reg, int *enable_ctrl)
 {
-   int i, reg_id = rdev_get_id(rdev);
+   int i;
unsigned int mode;
-   struct s5m8767_info *s5m8767 = rdev_get_drvdata(rdev);
 
switch (reg_id) {
case S5M8767_LDO1 ... S5M8767_LDO2:
@@ -211,53 +210,6 @@ static int s5m8767_get_register(struct regulator_dev 
*rdev, int *reg,
return 0;
 }
 
-static int s5m8767_reg_is_enabled(struct regulator_dev *rdev)
-{
-   struct s5m8767_info *s5m8767 = rdev_get_drvdata(rdev);
-   int ret, reg;
-   int enable_ctrl;
-   unsigned int val;
-
-   ret = s5m8767_get_register(rdev, ®, &enable_ctrl);
-   if (ret == -EINVAL)
-   return 1;
-   else if (ret)
-   return ret;
-
-   ret = regmap_read(s5m8767->iodev->regmap_pmic, reg, &val);
-   if (ret)
-   return ret;
-
-   return (val & S5M8767_ENCTRL_MASK) == enable_ctrl;
-}
-
-static int s5m8767_reg_enable(struct regulator_dev *rdev)
-{
-   struct s5m8767_info *s5m8767 = rdev_get_drvdata(rdev);
-   int ret, reg;
-   int enable_ctrl;
-
-   ret = s5m8767_get_register(rdev, ®, &enable_ctrl);
-   if (ret)
-   return ret;
-
-   return regmap_update_bits(s5m8767->iodev->regmap_pmic, reg,
-   S5M8767_ENCTRL_MASK, enable_ctrl);
-}
-
-static int s5m8767_reg_disable(struct regulator_dev *rdev)
-{
-   struct s5m8767_info *s5m8767 = rdev_get_drvdata(rdev);
-   int ret, reg, enable_ctrl;
-
-   ret = s5m8767_get_register(rdev, ®, &enable_ctrl);
-   if (ret)
-   return ret;
-
-   return regmap_update_bits(s5m8767->iodev->regmap_pmic, reg,
-   S5M8767_ENCTRL_MASK, ~S5M8767_ENCTRL_MASK);
-}
-
 static int s5m8767_get_vsel_reg(int reg_id, struct s5m8767_info *s5m8767)
 {
int reg;
@@ -407,9 +359,9 @@ static int s5m8767_set_voltage_time_sel(struct 
regulator_dev *rdev,
 
 static struct regulator_ops s5m8767_ops = {
.list_voltage   = regulator_list_voltage_linear,
-   .is_enabled = s5m8767_reg_is_enabled,
-   .enable = s5m8767_reg_enable,
-   .disable= s5m8767_reg_disable,
+   .is_enabled = regulator_is_enabled_regmap,
+   .enable = regulator_enable_regmap,
+   .disable= regulator_disable_regmap,
.get_voltage_sel= regulator_get_voltage_sel_regmap,
.set_voltage_sel= s5m8767_set_voltage_sel,
.set_voltage_time_sel   = s5m8767_set_voltage_time_sel,
@@ -417,9 +369,9 @@ static struct regulator_ops s5m8767_ops = {
 
 static struct regulator_ops s5m8767_buck78_ops = {
.list_voltage   = regulator_list_voltage_linear,
-   .is_enabled = s5m8767_reg_is_enabled,
-   .enable = s5m8767_reg_enable,
-   .disable= s5m8767_reg_disable,
+   .is_enabled = regulator_is_enabled_regmap,
+   .enable = regulator_enable_regmap,
+   .disable= regulator_disable_regmap,
.get_voltage_sel= regulator_get_voltage_sel_regmap,
.set_voltage_sel= regulator_set_voltage_sel_regmap,
 };
@@ -524,12 +476,13 @@ static void s5m8767_regulator_config_ext_control(struct 
s5m8767_info *s5m8767,
 static int s5m8767_enable_ext_control(struct s5m8767_info *s5m8767,
struct regulator_dev *rdev)
 {
+   int id = rdev_get_id(rdev);
int ret, reg, enable_ctrl;
 
-   if (rdev_get_id(rdev) != S5M8767_BUCK9)
+   if (id != S5M8767_BUCK9)
return -EINVAL;
 
-   ret = s5m8767_get_register(rdev, ®, &enable_ctrl);
+   ret = s5m8767_get_register(s5m8767, id, ®, &enable_ctrl);
if (ret)
return ret;
 
@@ -984,6 +937,7 @@ static int s5m8767_pmic_probe(struct platform_device *pdev)
for (i = 0; i < pdata->num_regulators; i++) {
const struct sec_voltage_desc *desc;
int id = pdata->regulators[i].id;
+   int enable_reg, enable_val;
 
desc = reg_voltage_map[id];

Re: [PATCH] mmc: sdhci: don't read cd-gpio while holding spinlock

2014-03-20 Thread Adrian Hunter
On 20.03.2014 20:47, Andrew Bresticker wrote:
> mmc_request() reads the cd-gpio via mmc_gpio_get_cd(), which can sleep,
> while holding host->lock.  This may result in the following BUG:
> 
>   BUG: spinlock wrong CPU on CPU#2, kworker/u8:16/4296
>   lock: 0xea6b9c80, .magic: dead4ead, .owner: kworker/u8:16/4296, .owner_cpu: > 0
>   CPU: 2 PID: 4296 Comm: kworker/u8:16 Tainted: G C   3.10.18 #137
>   Workqueue: kmmcd mmc_rescan
>   [<8020cf8c>] (unwind_backtrace+0x0/0x118) from [<8020a0c8>] 
> (show_stack+0x20/0x24)
>   [<8020a0c8>] (show_stack+0x20/0x24) from [<8075e5b8>] (dump_stack+0x20/0x28)
>   [<8075e5b8>] (dump_stack+0x20/0x28) from [<804184a8>] (spin_dump+0x80/0x94)
>   [<804184a8>] (spin_dump+0x80/0x94) from [<804184e8>] (spin_bug+0x2c/0x30)
>   [<804184e8>] (spin_bug+0x2c/0x30) from [<80418790>] 
> (do_raw_spin_unlock+0x94/0xd4)
>   [<80418790>] (do_raw_spin_unlock+0x94/0xd4) from [<80761a44>] 
> (_raw_spin_unlock_irqrestore+0x1c/0x24)
>   [<80761a44>] (_raw_spin_unlock_irqrestore+0x1c/0x24) from [<805ff66c>] 
> (sdhci_request+0x1c8/0x1d0)
>   [<805ff66c>] (sdhci_request+0x1c8/0x1d0) from [<805ebb5c>] 
> (mmc_start_request+0xec/0xf4)
>   [<805ebb5c>] (mmc_start_request+0xec/0xf4) from [<805ebcbc>] 
> (mmc_wait_for_req+0x80/0xf4)
>   ...
> 
> Read the cd-gpio before acquiring the spinlock instead.

The same problem appears to be in sdhci_card_event() which calls
sdhci_do_get_cd() under spinlock which then calls mmc_gpio_get_cd()

Will you fix that too?


> 
> Signed-off-by: Andrew Bresticker 
> ---
>  drivers/mmc/host/sdhci.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
> index 04a5e25..f2ef978 100644
> --- a/drivers/mmc/host/sdhci.c
> +++ b/drivers/mmc/host/sdhci.c
> @@ -1340,6 +1340,7 @@ static void sdhci_request(struct mmc_host *mmc, struct 
> mmc_request *mrq)
>   u32 tuning_opcode;
>  
>   host = mmc_priv(mmc);
> + present = mmc_gpio_get_cd(host->mmc);
>  
>   sdhci_runtime_pm_get(host);
>  
> @@ -1371,7 +1372,6 @@ static void sdhci_request(struct mmc_host *mmc, struct 
> mmc_request *mrq)
>* zero: cd-gpio is used, and card is removed
>* one: cd-gpio is used, and card is present
>*/
> - present = mmc_gpio_get_cd(host->mmc);
>   if (present < 0) {
>   /* If polling, assume that the card is always present. */
>   if (host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION)
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the akpm tree with the tip tree

2014-03-20 Thread Stephen Rothwell
Hi Andrew,

Today's linux-next merge of the akpm tree got a conflict in
arch/x86/include/asm/Kbuild between commit 073d8224d299 ("arch: Remove
stub cputime.h headers") from the tip tree and commit "x86: use generic
early_ioremap" from the akpm tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc arch/x86/include/asm/Kbuild
index 4acddc43ee0c,c98cd05d1cdf..
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@@ -5,5 -5,5 +5,6 @@@ genhdr-y += unistd_64.
  genhdr-y += unistd_x32.h
  
  generic-y += clkdev.h
 +generic-y += cputime.h
+ generic-y += early_ioremap.h
  generic-y += mcs_spinlock.h


pgpMSQQldSPkq.pgp
Description: PGP signature


RE: [PATCH 2/2] ASoC: fsl: Make Freescale SAI/ESAI/SPDIF to be visible in Kconfig

2014-03-20 Thread li.xi...@freescale.com
Hi Mark,

Has this patch been missing? I couldn't found it anywhere in the next branch.
:)

Thanks very much,

--
Best Regards,
Xiubo



> -Original Message-
> From: Nicolin Chen [mailto:guangyu.c...@freescale.com]
> Sent: Thursday, February 20, 2014 11:08 AM
> To: Mark Brown
> Cc: Xiubo Li-B47053; lgirdw...@gmail.com; shawn@linaro.org; 
> pe...@perex.cz;
> ti...@suse.de; Estevam Fabio-R49496; alsa-de...@alsa-project.org; linux-
> ker...@vger.kernel.org
> Subject: Re: [PATCH 2/2] ASoC: fsl: Make Freescale SAI/ESAI/SPDIF to be
> visible in Kconfig
> 
> On Thu, Feb 20, 2014 at 11:44:32AM +0900, Mark Brown wrote:
> > On Thu, Feb 20, 2014 at 02:06:20AM +, li.xi...@freescale.com wrote:
> >
> > > I'm not very sure of this patch, maybe should we add one menu
> > > in Kconfig for all visible CPU DAIs firstly like for code drivers?
> >
> > Allowing them to be individually selected is definitely useful if people
> > are trying to minimise their kernel size and/or build time.  However
> > none of the other Freescale people have commented on this patch (which
> > I'd have expected) so I was giving them time and IIRC it needs a rebase
> > against current code.
> 
> Last month I was revising a new CODEC driver for ESAI and meanwhile abort
> to plan to try simple card for this combination but being suspended due to
> some tough internal issues. So I think it should be a good idea for us to
> move towards simple card starting from this patch.
> 
> Surely, Acked.
> Nicolin Chen
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v4 3/3] memstick: Add realtek USB memstick host driver

2014-03-20 Thread Roger

On 03/21/2014 07:18 AM, Andrew Morton wrote:

On Wed, 12 Feb 2014 18:00:38 +0800  wrote:


From: Roger Tseng 

Realtek USB memstick host driver provides memstick host support based on the
Realtek USB card reader MFD driver.


...

+static int rtsx_usb_ms_drv_probe(struct platform_device *pdev)
+{
+   struct memstick_host *msh;
+   struct rtsx_usb_ms *host;
+   struct rtsx_ucr *ucr;
+   int err;
+
+   ucr = usb_get_intfdata(to_usb_interface(pdev->dev.parent));
+   if (!ucr)
+   return -ENXIO;
+
+   dev_dbg(&(pdev->dev),
+   "Realtek USB Memstick controller found\n");
+
+   msh = memstick_alloc_host(sizeof(*host), &pdev->dev);
+   if (!msh)
+   return -ENOMEM;
+
+   host = memstick_priv(msh);
+   host->ucr = ucr;
+   host->msh = msh;
+   host->pdev = pdev;
+   host->power_mode = MEMSTICK_POWER_OFF;
+   platform_set_drvdata(pdev, host);
+
+   mutex_init(&host->host_mutex);
+   INIT_WORK(&host->handle_req, rtsx_usb_ms_handle_req);
+
+   init_completion(&host->detect_ms_exit);
+   host->detect_ms = kthread_create(rtsx_usb_detect_ms_card, host,
+   "rtsx_usb_ms_%d", pdev->id);
+   if (IS_ERR(host->detect_ms)) {
+   dev_dbg(&(pdev->dev),
+   "Unable to create polling thread.\n");
+   err = PTR_ERR(host->detect_ms);
+   goto err_out;
+   }
+
+   msh->request = rtsx_usb_ms_request;
+   msh->set_param = rtsx_usb_ms_set_param;
+   msh->caps = MEMSTICK_CAP_PAR4;
+
+   pm_runtime_enable(&pdev->dev);
+   err = memstick_add_host(msh);
+   if (err)
+   goto err_out;


Isn't that kernel thread still running?

The kernel thread is created(kthread_create) but should not start before 
the "goto".


Did I miss anything or will it be better to move all kthread things 
after here?

+   wake_up_process(host->detect_ms);
+   return 0;
+err_out:
+   memstick_free_host(msh);
+   return err;
+}


Best regards,
Roger Tseng
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4] mac80211: LLVMLinux: Remove VLAIS usage from mac80211

2014-03-20 Thread behanw
From: Jan-Simon Möller 

Replaced the use of a Variable Length Array In Struct (VLAIS) with a C99
compliant equivalent. This is the original VLAIS struct.

struct {
struct aead_request req;
u8  priv[crypto_aead_reqsize(tfm)];
} aead_req;

This patch instead allocates the appropriate amount of memory using an char
array.

The new code can be compiled with both gcc and clang.

Signed-off-by: Jan-Simon Möller 
Signed-off-by: Behan Webster 
Signed-off-by: Vinícius Tinti 
Signed-off-by: Mark Charlebois 
---
 net/mac80211/aes_ccm.c | 37 ++---
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index 7c7df47..71e2abd 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -23,12 +23,12 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 
*b_0, u8 *aad,
   u8 *data, size_t data_len, u8 *mic)
 {
struct scatterlist assoc, pt, ct[2];
-   struct {
-   struct aead_request req;
-   u8  priv[crypto_aead_reqsize(tfm)];
-   } aead_req;
 
-   memset(&aead_req, 0, sizeof(aead_req));
+   char aead_req_data[sizeof(struct aead_request) +
+   crypto_aead_reqsize(tfm)]
+   __aligned(__alignof__(struct aead_request));
+   struct aead_request *aead_req = (void *) aead_req_data;
+   memset(aead_req, 0, sizeof(aead_req_data));
 
sg_init_one(&pt, data, data_len);
sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
@@ -36,23 +36,22 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 
*b_0, u8 *aad,
sg_set_buf(&ct[0], data, data_len);
sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
 
-   aead_request_set_tfm(&aead_req.req, tfm);
-   aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
-   aead_request_set_crypt(&aead_req.req, &pt, ct, data_len, b_0);
+   aead_request_set_tfm(aead_req, tfm);
+   aead_request_set_assoc(aead_req, &assoc, assoc.length);
+   aead_request_set_crypt(aead_req, &pt, ct, data_len, b_0);
 
-   crypto_aead_encrypt(&aead_req.req);
+   crypto_aead_encrypt(aead_req);
 }
 
 int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
  u8 *data, size_t data_len, u8 *mic)
 {
struct scatterlist assoc, pt, ct[2];
-   struct {
-   struct aead_request req;
-   u8  priv[crypto_aead_reqsize(tfm)];
-   } aead_req;
-
-   memset(&aead_req, 0, sizeof(aead_req));
+   char aead_req_data[sizeof(struct aead_request) +
+   crypto_aead_reqsize(tfm)]
+   __aligned(__alignof__(struct aead_request));
+   struct aead_request *aead_req = (void *) aead_req_data;
+   memset(aead_req, 0, sizeof(aead_req_data));
 
sg_init_one(&pt, data, data_len);
sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
@@ -60,12 +59,12 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 
*b_0, u8 *aad,
sg_set_buf(&ct[0], data, data_len);
sg_set_buf(&ct[1], mic, IEEE80211_CCMP_MIC_LEN);
 
-   aead_request_set_tfm(&aead_req.req, tfm);
-   aead_request_set_assoc(&aead_req.req, &assoc, assoc.length);
-   aead_request_set_crypt(&aead_req.req, ct, &pt,
+   aead_request_set_tfm(aead_req, tfm);
+   aead_request_set_assoc(aead_req, &assoc, assoc.length);
+   aead_request_set_crypt(aead_req, ct, &pt,
   data_len + IEEE80211_CCMP_MIC_LEN, b_0);
 
-   return crypto_aead_decrypt(&aead_req.req);
+   return crypto_aead_decrypt(aead_req);
 }
 
 struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[])
-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RFC v1 00/01] dm-lightnvm introduction

2014-03-20 Thread Matias Bjørling
Hi,

Users that have custom firmware SSDs, may choose to expose their flash directly.
This allows the host to control logical to physical address mappings, garbage
collection strategy, wear-leveling, and so on.

This is beneficial when you either want to strip the cost of a costly controller
or want detailed control over an SSD. Such as implementing key-value stores,
object-stored, atomic I/Os, etc.

LightNVM implements the host-side core. It supports two modes. Users that
doesn't have hardware available and want to emulate an SSD, and users with a
physical SSD, that has a LightNVM compatible firmware.

The behavior of the core can be exposed through the various components that make
up an FTL. Thus, it is not single implementation, but instead possible to
mix the best algorithms for the user-space application workloads.

Currently, LightNVM implements a page-based FTL, cost-based GC and simple
wear-leveling. Additionally, it allows simulation of flash timings when hardware
isn't available. There's work on getting it to the OpenSSD platform with a
custom firmware and getting it stable for broad use.

There is still much work to do. I'm looking for feedback on the approach, dm
integration, and more. Any feedback is greatly appreciated.

A presentation was given at the recent Non-Volatile Memory Workshop (NVMW)
workshop. Slides are available at: http://bjorling.me/NVMW2014-LightNVM.pdf

Major todo's:
 * Patch has TODO and FIXME in places that needs to be cleaned.
 * LightNVM compatible firmware for the OpenSSD platform and integration.
 * Performance regressions during GC.
 * Durability during power failure.

Thanks,

Matias

Matias Bjørling (1):
  dm-lightnvm: An open FTL for open firmware SSDs

 drivers/md/Kconfig |   1 +
 drivers/md/Makefile|   1 +
 drivers/md/lightnvm/Kconfig|  14 +
 drivers/md/lightnvm/Makefile   |   1 +
 drivers/md/lightnvm/core.c | 705 +
 drivers/md/lightnvm/gc.c   | 208 
 drivers/md/lightnvm/lightnvm.c | 589 ++
 drivers/md/lightnvm/lightnvm.h | 592 ++
 drivers/md/lightnvm/reg.c  |  41 +++
 9 files changed, 2152 insertions(+)
 create mode 100644 drivers/md/lightnvm/Kconfig
 create mode 100644 drivers/md/lightnvm/Makefile
 create mode 100644 drivers/md/lightnvm/core.c
 create mode 100644 drivers/md/lightnvm/gc.c
 create mode 100644 drivers/md/lightnvm/lightnvm.c
 create mode 100644 drivers/md/lightnvm/lightnvm.h
 create mode 100644 drivers/md/lightnvm/reg.c

-- 
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v2] MAINTAINERS: microblaze: Use LKML as mailing list

2014-03-20 Thread Michal Simek
microblaze-uclinux mailing list is almost dead and
it is just causing troubles for non subscribers which are
getting email about waiting for moderator.
Approval never happens. Move it to LKML.

Reported-by: Richard Guy Briggs 
Signed-off-by: Michal Simek 
---

Changes in v2:
 - LKML is default mailing list, remove mailing list line
   from Microblaze fragment

 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b3fdb0f..f6fe453 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5702,7 +5702,6 @@ F:fs/imgdafs/

 MICROBLAZE ARCHITECTURE
 M: Michal Simek 
-L: microblaze-ucli...@itee.uq.edu.au (moderated for non-subscribers)
 W: http://www.monstr.eu/fdt/
 T: git git://git.monstr.eu/linux-2.6-microblaze.git
 S: Supported
--
1.8.2.3



pgpUCHxPyNnPo.pgp
Description: PGP signature


Re: [PATCH] MAINTAINERS: microblaze: Use LKML as mailing list

2014-03-20 Thread Michal Simek
On 03/20/2014 09:18 PM, Joe Perches wrote:
> On Thu, 2014-03-20 at 21:15 +0100, Paul Bolle wrote:
>> On Thu, 2014-03-20 at 16:09 +0100, Michal Simek wrote:
>>> microblaze-uclinux mailing list is almost dead and
>>> it is just causing troubles for non subscribers which are
>>> getting email about waiting for moderator.
>>> Approval never happens. Move it to LKML.
> []
>>> diff --git a/MAINTAINERS b/MAINTAINERS
> []
>>> @@ -5702,7 +5702,7 @@ F:fs/imgdafs/
>>>
>>>  MICROBLAZE ARCHITECTURE
>>>  M: Michal Simek 
>>> -L: microblaze-ucli...@itee.uq.edu.au (moderated for non-subscribers)
>>> +L: linux-kernel@vger.kernel.org
>>
>> Isn't lkml already the catch all address that get_maintainer.pl always
>> spits out?
> 
> 'tis.
> 
> My preference is not adding L: lkml lines to
> individual sections as it is the default and it is
> specified as such in the beginning of MAINTAINERS
> and SubmittingPatches.

Ok. No problem will send v2.

Maybe good to remove others too.
[linux]$ grep linux-kernel@vger.kernel.org MAINTAINERS  | wc -l
24

Thanks,
Michal

-- 
Michal Simek, Ing. (M.Eng), OpenPGP -> KeyID: FE3D1F91
w: www.monstr.eu p: +42-0-721842854
Maintainer of Linux kernel - Microblaze cpu - http://www.monstr.eu/fdt/
Maintainer of Linux kernel - Xilinx Zynq ARM architecture
Microblaze U-BOOT custodian and responsible for u-boot arm zynq platform




signature.asc
Description: OpenPGP digital signature


From Mrs Monat Adama.

2014-03-20 Thread inforproject100

Dear Friend,

Greetings to you and your family.



My name is Mrs Monate Adama, the current Chief Auditor of a formidable bank 
here in Ouagadougou, Burkina Faso, West Africa. I have a transaction worth of 
12.5 Million U.S dollars for transferring into your care for our mutual 
benefits, so  i need your assistance with 100% cooperation to realise this task.

Further details about the fund, its source and the administrative procedure to 
transfer this fund into your receiving bank account shall be made known to you 
immediately as I have your positive response.



Thanks and Regards,



Mrs Monate Adama.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [BUG] Paravirtual time accounting / IRQ time accounting

2014-03-20 Thread Mike Galbraith
On Thu, 2014-03-20 at 12:01 -0300, Glauber Costa wrote: 
> On Wed, Mar 19, 2014 at 6:42 AM,   wrote:
> > In consolidated environments, when there are multiple virtual machines (VMs)
> > running on one CPU core, timekeeping will be a problem to the guest OS.
> > Here, I report my findings about Linux process scheduler.
> >
> >
> > Description
> > 
> > Linux CFS relies on rq->clock_task to charge each task, determine vruntime,
> > etc.
> >
> > When CONFIG_IRQ_TIME_ACCOUNTING is enabled, the time spent on serving IRQ
> > will be excluded from updating rq->clock_task.
> > When CONFIG_PARAVIRT_TIME_ACCOUNTING is enabled, the time stolen by the
> > hypervisor
> > will also be excluded from updating rq->clock_task.
> >
> > With "both" CONFIG_IRQ_TIME_ACCOUNTING and CONFIG_PARAVIRT_TIME_ACCOUNTING
> > enabled,
> > I put three KVM guests on one core and run hackbench in each guest. I find
> > that
> > in the guests, rq->clock_task stays *unchanged*. The malfunction embarrasses
> > CFS.
> > 
> >
> >
> > Analysis
> > 
> > [src/kernel/sched/core.c]
> > static void update_rq_clock_task(struct rq *rq, s64 delta)
> > {
> > ... ...
> > #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> > irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
> > ... ...
> > rq->prev_irq_time += irq_delta;
> > delta -= irq_delta;
> > #endif
> >
> > #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
> > if (static_key_false((¶virt_steal_rq_enabled))) {
> > steal = paravirt_steal_clock(cpu_of(rq));
> > steal -= rq->prev_steal_time_rq;
> > ... ...
> > rq->prev_steal_time_rq += steal;
> > delta -= steal;
> > }
> > #endif
> >
> > rq->clock_task += delta;
> > ... ...
> > }
> > --
> > "delta" -> the intended increment to rq->clock_task
> > "irq_delta" -> the time spent on serving IRQ (hard + soft)
> > "steal" -> the time stolen by the underlying hypervisor
> > --
> > "irq_delta" is calculated based on sched_clock_cpu(), which is vulnerable
> > to VM scheduling delays.
> 
> This looks like a real problem indeed. The main problem in searching
> for a solution, is that of course not all of the irq time is steal
> time and vice versa. In this case, we could subtract irq_time from
> steal, and add only the steal part time that is in excess. I don't
> think this is 100 % guaranteed, but maybe it is a good approximation.
> 
> Rik, do you have an opinion on this ?

Hrm, on my little Q6600 box, I'm running 3 VMS all pinned to CPU3, all
running hackbench -l zillion, one of them also running crash, staring at
it's sole rq->clock_task as I write this, with kernels (3.11.10) on both
host and guest configured as reported.

  clock_task = 631322187004, 
  clock_task = 631387807452, 
  clock_task = 631474214294, 
  clock_task = 631523864893, 
  clock_task = 631604646268, 
  clock_task = 631643276025, 

Maybe 3 VMs isn't enough overload for such a beastly CPU.  Top reports
some very funky utilization numbers, but other than that, the things
seem to work fine here.  perf thinks scheduling work too.

-Mike

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] FTM PWM adds regmap and endianness support.

2014-03-20 Thread Xiubo Li

Xiubo Li (3):
  pwm: ftm-pwm: Clean up the code.
  pwm: ftm-pwm: Convert to direct regmap API usage.
  pwm: ftm-pwm: Add big-endian support

 drivers/pwm/pwm-fsl-ftm.c | 96 ++-
 1 file changed, 53 insertions(+), 43 deletions(-)

-- 
1.8.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] pwm: ftm-pwm: Convert to direct regmap API usage.

2014-03-20 Thread Xiubo Li
Signed-off-by: Xiubo Li 
---
 drivers/pwm/pwm-fsl-ftm.c | 83 +--
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 4a4ad58..5d999c1 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define FTM_SC 0x00
@@ -82,7 +83,7 @@ struct fsl_pwm_chip {
unsigned int cnt_select;
unsigned int clk_ps;
 
-   void __iomem *base;
+   struct regmap *regmap;
 
int period_ns;
 
@@ -218,10 +219,11 @@ static unsigned long fsl_pwm_calculate_duty(struct 
fsl_pwm_chip *fpc,
unsigned long period_ns,
unsigned long duty_ns)
 {
-   unsigned long long val, duty;
+   unsigned long long duty;
+   u32 val;
 
-   val = readl(fpc->base + FTM_MOD);
-   duty = duty_ns * (val + 1);
+   regmap_read(fpc->regmap, FTM_MOD, &val);
+   duty = (unsigned long long)duty_ns * (val + 1);
do_div(duty, period_ns);
 
return (unsigned long)duty;
@@ -231,7 +233,7 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct 
pwm_device *pwm,
  int duty_ns, int period_ns)
 {
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
-   u32 val, period, duty;
+   u32 period, duty;
 
mutex_lock(&fpc->lock);
 
@@ -256,11 +258,9 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct 
pwm_device *pwm,
return -EINVAL;
}
 
-   val = readl(fpc->base + FTM_SC);
-   val &= ~FTM_SC_PS_MASK;
-   val |= fpc->clk_ps;
-   writel(val, fpc->base + FTM_SC);
-   writel(period - 1, fpc->base + FTM_MOD);
+   regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_PS_MASK,
+  fpc->clk_ps);
+   regmap_write(fpc->regmap, FTM_MOD, period - 1);
 
fpc->period_ns = period_ns;
}
@@ -269,8 +269,9 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct 
pwm_device *pwm,
 
duty = fsl_pwm_calculate_duty(fpc, period_ns, duty_ns);
 
-   writel(FTM_CSC_MSB | FTM_CSC_ELSB, fpc->base + FTM_CSC(pwm->hwpwm));
-   writel(duty, fpc->base + FTM_CV(pwm->hwpwm));
+   regmap_write(fpc->regmap, FTM_CSC(pwm->hwpwm),
+FTM_CSC_MSB | FTM_CSC_ELSB);
+   regmap_write(fpc->regmap, FTM_CV(pwm->hwpwm), duty);
 
return 0;
 }
@@ -282,31 +283,28 @@ static int fsl_pwm_set_polarity(struct pwm_chip *chip,
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
u32 val;
 
-   val = readl(fpc->base + FTM_POL);
+   regmap_read(fpc->regmap, FTM_POL, &val);
 
if (polarity == PWM_POLARITY_INVERSED)
val |= BIT(pwm->hwpwm);
else
val &= ~BIT(pwm->hwpwm);
 
-   writel(val, fpc->base + FTM_POL);
+   regmap_write(fpc->regmap, FTM_POL, val);
 
return 0;
 }
 
 static int fsl_counter_clock_enable(struct fsl_pwm_chip *fpc)
 {
-   u32 val;
int ret;
 
if (fpc->use_count != 0)
return 0;
 
/* select counter clock source */
-   val = readl(fpc->base + FTM_SC);
-   val &= ~FTM_SC_CLK_MASK;
-   val |= FTM_SC_CLK(fpc->cnt_select);
-   writel(val, fpc->base + FTM_SC);
+   regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK,
+  FTM_SC_CLK(fpc->cnt_select));
 
ret = clk_prepare_enable(fpc->clk[fpc->cnt_select]);
if (ret)
@@ -326,13 +324,10 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip 
*fpc)
 static int fsl_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
 {
struct fsl_pwm_chip *fpc = to_fsl_chip(chip);
-   u32 val;
int ret;
 
mutex_lock(&fpc->lock);
-   val = readl(fpc->base + FTM_OUTMASK);
-   val &= ~BIT(pwm->hwpwm);
-   writel(val, fpc->base + FTM_OUTMASK);
+   regmap_update_bits(fpc->regmap, FTM_OUTMASK, BIT(pwm->hwpwm), 0);
 
ret = fsl_counter_clock_enable(fpc);
mutex_unlock(&fpc->lock);
@@ -342,8 +337,6 @@ static int fsl_pwm_enable(struct pwm_chip *chip, struct 
pwm_device *pwm)
 
 static void fsl_counter_clock_disable(struct fsl_pwm_chip *fpc)
 {
-   u32 val;
-
/*
 * already disabled, do nothing
 */
@@ -355,9 +348,7 @@ static void fsl_counter_clock_disable(struct fsl_pwm_chip 
*fpc)
return;
 
/* no users left, disable PWM counter clock */
-   val = readl(fpc->base + FTM_SC);
-   val &= ~FTM_SC_CLK_MASK;
-   writel(val, fpc->base + FTM_SC);
+   regmap_update_bits(fpc->regmap, FTM_SC, FTM_SC_CLK_MASK, 0);
 
clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
clk_disable_unprepare(fpc->clk[fpc->cnt_select]);
@@ -369,14 +360,12 @@ static void fsl_pwm_disable(st

[PATCH 3/3] pwm: ftm-pwm: Add big-endian support

2014-03-20 Thread Xiubo Li
Now for the following scenarios:

 SoC |  CPU   | FTM-PWM | 'big-endian' property is needed?
-||-|-
Vybird   |  LE|   LE| No
 LS1 |  LE|   BE| Yes
 LS2 |  LE|   LE| No

Signed-off-by: Xiubo Li 
---
 drivers/pwm/pwm-fsl-ftm.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 5d999c1..9a82741 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -409,6 +409,7 @@ static struct regmap_config fsl_pwm_regmap_config = {
 
 static int fsl_pwm_probe(struct platform_device *pdev)
 {
+   struct device_node *np = pdev->dev.of_node;
struct fsl_pwm_chip *fpc;
struct resource *res;
void __iomem *base;
@@ -422,6 +423,11 @@ static int fsl_pwm_probe(struct platform_device *pdev)
 
fpc->chip.dev = &pdev->dev;
 
+   if (of_property_read_bool(np, "big-endian"))
+   fsl_pwm_regmap_config.val_format_endian = REGMAP_ENDIAN_BIG;
+   else
+   fsl_pwm_regmap_config.val_format_endian = REGMAP_ENDIAN_NATIVE;
+
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(base))
-- 
1.8.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] pwm: ftm-pwm: Clean up the code.

2014-03-20 Thread Xiubo Li
Signed-off-by: Xiubo Li 
---
 drivers/pwm/pwm-fsl-ftm.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index 420169e..4a4ad58 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -21,11 +21,10 @@
 #include 
 
 #define FTM_SC 0x00
-#define FTM_SC_CLK_MASK0x3
-#define FTM_SC_CLK_SHIFT   3
-#define FTM_SC_CLK(c)  (((c) + 1) << FTM_SC_CLK_SHIFT)
+#define FTM_SC_CLK_MASK_SHIFT  3
+#define FTM_SC_CLK_MASK(3 << FTM_SC_CLK_MASK_SHIFT)
+#define FTM_SC_CLK(c)  (((c) + 1) << FTM_SC_CLK_MASK_SHIFT)
 #define FTM_SC_PS_MASK 0x7
-#define FTM_SC_PS_SHIFT0
 
 #define FTM_CNT0x04
 #define FTM_MOD0x08
@@ -258,7 +257,7 @@ static int fsl_pwm_config(struct pwm_chip *chip, struct 
pwm_device *pwm,
}
 
val = readl(fpc->base + FTM_SC);
-   val &= ~(FTM_SC_PS_MASK << FTM_SC_PS_SHIFT);
+   val &= ~FTM_SC_PS_MASK;
val |= fpc->clk_ps;
writel(val, fpc->base + FTM_SC);
writel(period - 1, fpc->base + FTM_MOD);
@@ -305,7 +304,7 @@ static int fsl_counter_clock_enable(struct fsl_pwm_chip 
*fpc)
 
/* select counter clock source */
val = readl(fpc->base + FTM_SC);
-   val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT);
+   val &= ~FTM_SC_CLK_MASK;
val |= FTM_SC_CLK(fpc->cnt_select);
writel(val, fpc->base + FTM_SC);
 
@@ -357,7 +356,7 @@ static void fsl_counter_clock_disable(struct fsl_pwm_chip 
*fpc)
 
/* no users left, disable PWM counter clock */
val = readl(fpc->base + FTM_SC);
-   val &= ~(FTM_SC_CLK_MASK << FTM_SC_CLK_SHIFT);
+   val &= ~FTM_SC_CLK_MASK;
writel(val, fpc->base + FTM_SC);
 
clk_disable_unprepare(fpc->clk[FSL_PWM_CLK_CNTEN]);
-- 
1.8.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v3] USB: Gadget: fsl driver pullup fix

2014-03-20 Thread Suresh Gupta
This fix the fsl usb gadget driver in a way that the usb device
will be only "pulled up" on requests only when vbus is powered

Signed-off-by: Suresh Gupta 
---
Changes from previous version:
* fixed checkpatch error

 drivers/usb/gadget/fsl_udc_core.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/gadget/fsl_udc_core.c 
b/drivers/usb/gadget/fsl_udc_core.c
index 35cb972..5a0f89c 100644
--- a/drivers/usb/gadget/fsl_udc_core.c
+++ b/drivers/usb/gadget/fsl_udc_core.c
@@ -1219,6 +1219,10 @@ static int fsl_pullup(struct usb_gadget *gadget, int 
is_on)
struct fsl_udc *udc;
 
udc = container_of(gadget, struct fsl_udc, gadget);
+
+   if (!udc->vbus_active)
+   return -EOPNOTSUPP;
+
udc->softconnect = (is_on != 0);
if (can_pullup(udc))
fsl_writel((fsl_readl(&dr_regs->usbcmd) | USB_CMD_RUN_STOP),
-- 
1.8.4.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.13 000/149] 3.13.7-stable review

2014-03-20 Thread Guenter Roeck

On 03/20/2014 05:02 PM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 3.13.7 release.
There are 149 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Mar 23 00:03:54 UTC 2014.
Anything received after that time might be too late.



Build results:
total: 126 pass: 120 skipped: 4 fail: 2

qemu tests all passed.

There are two new build failures.

Building powerpc:mpc85xx_defconfig ... failed
Building powerpc:mpc85xx_smp_defconfig ... failed

The failure is the same in both cases.

drivers/i2c/busses/i2c-cpm.c: In function 'cpm_i2c_setup':
drivers/i2c/busses/i2c-cpm.c:450:2: error: implicit declaration of function 
'irq_of_parse_and_map' [-Werror=implicit-function-declaration]
drivers/i2c/busses/i2c-cpm.c:461:2: error: implicit declaration of function 
'of_iomap' [-Werror=implicit-function-declaration]

It appears you picked this up from the latest mainline, where the
same builds fail with the same error. The problem was introduced
in mainline between rc6 and rc7.

I have no immediate idea which patch causes the problem.
I can bisect tomorrow if needed.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 3/3] cpufreq: Make cpufreq_notify_transition & cpufreq_notify_post_transition static

2014-03-20 Thread Viresh Kumar
cpufreq_notify_transition() and cpufreq_notify_post_transition() shouldn't be
called directly by cpufreq drivers anymore and so these should be marked static.

Signed-off-by: Viresh Kumar 
---
 drivers/cpufreq/cpufreq.c | 6 ++
 include/linux/cpufreq.h   | 4 
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b63e7e4..7b1feff 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -331,16 +331,15 @@ static void __cpufreq_notify_transition(struct 
cpufreq_policy *policy,
  * function. It is called twice on all CPU frequency changes that have
  * external effects.
  */
-void cpufreq_notify_transition(struct cpufreq_policy *policy,
+static void cpufreq_notify_transition(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, unsigned int state)
 {
for_each_cpu(freqs->cpu, policy->cpus)
__cpufreq_notify_transition(policy, freqs, state);
 }
-EXPORT_SYMBOL_GPL(cpufreq_notify_transition);
 
 /* Do post notifications when there are chances that transition has failed */
-void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
+static void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs, int transition_failed)
 {
cpufreq_notify_transition(policy, freqs, CPUFREQ_POSTCHANGE);
@@ -351,7 +350,6 @@ void cpufreq_notify_post_transition(struct cpufreq_policy 
*policy,
cpufreq_notify_transition(policy, freqs, CPUFREQ_PRECHANGE);
cpufreq_notify_transition(policy, freqs, CPUFREQ_POSTCHANGE);
 }
-EXPORT_SYMBOL_GPL(cpufreq_notify_post_transition);
 
 void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 263173d..826830b 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -338,10 +338,6 @@ static inline void cpufreq_resume(void) {}
 int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list);
 int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list);
 
-void cpufreq_notify_transition(struct cpufreq_policy *policy,
-   struct cpufreq_freqs *freqs, unsigned int state);
-void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
-   struct cpufreq_freqs *freqs, int transition_failed);
 void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs);
 void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
-- 
1.7.12.rc2.18.g61b472e

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V4 1/3] cpufreq: Make sure frequency transitions are serialized

2014-03-20 Thread Viresh Kumar
From: "Srivatsa S. Bhat" 

Whenever we change the frequency of a CPU, we call the PRECHANGE and POSTCHANGE
notifiers. They must be serialized, i.e. PRECHANGE and POSTCHANGE notifiers
should strictly alternate, thereby preventing two different sets of PRECHANGE or
POSTCHANGE notifiers from interleaving arbitrarily.

The following examples illustrate why this is important:

Scenario 1:
---
A thread reading the value of cpuinfo_cur_freq, will call
__cpufreq_cpu_get()->cpufreq_out_of_sync()->cpufreq_notify_transition()

The ondemand governor can decide to change the frequency of the CPU at the same
time and hence it can end up sending the notifications via ->target().

If the notifiers are not serialized, the following sequence can occur:
- PRECHANGE Notification for freq A (from cpuinfo_cur_freq)
- PRECHANGE Notification for freq B (from target())
- Freq changed by target() to B
- POSTCHANGE Notification for freq B
- POSTCHANGE Notification for freq A

We can see from the above that the last POSTCHANGE Notification happens for freq
A but the hardware is set to run at freq B.

Where would we break then?: adjust_jiffies() in cpufreq.c & cpufreq_callback()
in arch/arm/kernel/smp.c (which also adjusts the jiffies). All the
loops_per_jiffy calculations will get messed up.

Scenario 2:
---
The governor calls __cpufreq_driver_target() to change the frequency. At the
same time, if we change scaling_{min|max}_freq from sysfs, it will end up
calling the governor's CPUFREQ_GOV_LIMITS notification, which will also call
__cpufreq_driver_target(). And hence we end up issuing concurrent calls to
->target().

Typically, platforms have the following logic in their ->target() routines:
(Eg: cpufreq-cpu0, omap, exynos, etc)

A. If new freq is more than old: Increase voltage
B. Change freq
C. If new freq is less than old: decrease voltage

Now, if the two concurrent calls to ->target() are X and Y, where X is trying to
increase the freq and Y is trying to decrease it, we get the following race
condition:

X.A: voltage gets increased for larger freq
Y.A: nothing happens
Y.B: freq gets decreased
Y.C: voltage gets decreased
X.B: freq gets increased
X.C: nothing happens

Thus we can end up setting a freq which is not supported by the voltage we have
set. That will probably make the clock to the CPU unstable and the system might
not work properly anymore.

This patch introduces a set of synchronization primitives to serialize frequency
transitions, which are to be used as shown below:

cpufreq_freq_transition_begin();

//Perform the frequency change

cpufreq_freq_transition_end();

The _begin() call sends the PRECHANGE notification whereas the _end() call sends
the POSTCHANGE notification. Also, all the necessary synchronization is handled
within these calls. In particular, even drivers which set the ASYNC_NOTIFICATION
flag can also use these APIs for performing frequency transitions (ie., you can
call _begin() from one task, and call the corresponding _end() from a different
task).

The actual synchronization underneath is not that complicated:

The key challenge is to allow drivers to begin the transition from one thread
and end it in a completely different thread (this is to enable drivers that do
asynchronous POSTCHANGE notification from bottom-halves, to also use the same
interface).

To achieve this, a 'transition_ongoing' flag, a 'transition_lock' spinlock and a
wait-queue are added per-policy. The flag and the wait-queue are used in
conjunction to create an "uninterrupted flow" from _begin() to _end(). The
spinlock is used to ensure that only one such "flow" is in flight at any given
time. Put together, this provides us all the necessary synchronization.

Based-on-patch-by: Viresh Kumar 
Signed-off-by: Srivatsa S. Bhat 
Signed-off-by: Viresh Kumar 
---

I have kept your Authorship for this patch as is and did few modifications:
-  removed 'state' parameter from begin/end routines.
- added 'trasition_failed' parameter to end routine.
- changed mutex with spinlock as discussed earlier.
- Added WARN_ON() as discussed.
- Exported these new routines.
- Removed locks from end.

 drivers/cpufreq/cpufreq.c | 37 +
 include/linux/cpufreq.h   | 10 ++
 2 files changed, 47 insertions(+)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index b349406..4279cc9 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -353,6 +353,41 @@ void cpufreq_notify_post_transition(struct cpufreq_policy 
*policy,
 }
 EXPORT_SYMBOL_GPL(cpufreq_notify_post_transition);
 
+void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
+   struct cpufreq_freqs *freqs)
+{
+wait:
+   wait_event(policy->transition_wait, !policy->transition_ongoing);
+
+   spin_lock(&policy->transition_lock);
+
+   if (unlikely(policy->transition_ongoing)) {
+   spin_unlock(&policy->transition_lock);
+   goto wait;
+   }
+
+  

[PATCH V4 2/3] cpufreq: Convert existing drivers to use cpufreq_freq_transition_{begin|end}

2014-03-20 Thread Viresh Kumar
CPUFreq core has new infrastructure that would guarantee serialized calls to
target() or target_index() callbacks. These are called
cpufreq_freq_transition_begin() and cpufreq_freq_transition_end().

This patch converts existing drivers to use these new set of routines.

Signed-off-by: Viresh Kumar 
---
 drivers/cpufreq/cpufreq-nforce2.c| 4 ++--
 drivers/cpufreq/cpufreq.c| 9 -
 drivers/cpufreq/exynos5440-cpufreq.c | 4 ++--
 drivers/cpufreq/gx-suspmod.c | 4 ++--
 drivers/cpufreq/integrator-cpufreq.c | 4 ++--
 drivers/cpufreq/longhaul.c   | 4 ++--
 drivers/cpufreq/pcc-cpufreq.c| 4 ++--
 drivers/cpufreq/powernow-k6.c| 4 ++--
 drivers/cpufreq/powernow-k7.c| 4 ++--
 drivers/cpufreq/powernow-k8.c| 4 ++--
 drivers/cpufreq/s3c24xx-cpufreq.c| 4 ++--
 drivers/cpufreq/sh-cpufreq.c | 4 ++--
 drivers/cpufreq/unicore2-cpufreq.c   | 4 ++--
 13 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-nforce2.c 
b/drivers/cpufreq/cpufreq-nforce2.c
index a05b876..379cc2c 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -270,7 +270,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
pr_debug("Old CPU frequency %d kHz, new %d kHz\n",
   freqs.old, freqs.new);
 
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
+   cpufreq_freq_transition_begin(policy, &freqs);
 
/* Disable IRQs */
/* local_irq_save(flags); */
@@ -285,7 +285,7 @@ static int nforce2_target(struct cpufreq_policy *policy,
/* Enable IRQs */
/* local_irq_restore(flags); */
 
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
+   cpufreq_freq_transition_end(policy, &freqs, false);
 
return 0;
 }
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 4279cc9..b63e7e4 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1503,8 +1503,8 @@ static void cpufreq_out_of_sync(unsigned int cpu, 
unsigned int old_freq,
policy = per_cpu(cpufreq_cpu_data, cpu);
read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
+   cpufreq_freq_transition_begin(policy, &freqs);
+   cpufreq_freq_transition_end(policy, &freqs, false);
 }
 
 /**
@@ -1864,8 +1864,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 __func__, policy->cpu, freqs.old, freqs.new);
 
-   cpufreq_notify_transition(policy, &freqs,
-   CPUFREQ_PRECHANGE);
+   cpufreq_freq_transition_begin(policy, &freqs);
}
 
retval = cpufreq_driver->target_index(policy, index);
@@ -1874,7 +1873,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
   __func__, retval);
 
if (notify)
-   cpufreq_notify_post_transition(policy, &freqs, retval);
+   cpufreq_freq_transition_end(policy, &freqs, retval);
}
 
 out:
diff --git a/drivers/cpufreq/exynos5440-cpufreq.c 
b/drivers/cpufreq/exynos5440-cpufreq.c
index 7f776aa..3655e7d 100644
--- a/drivers/cpufreq/exynos5440-cpufreq.c
+++ b/drivers/cpufreq/exynos5440-cpufreq.c
@@ -219,7 +219,7 @@ static int exynos_target(struct cpufreq_policy *policy, 
unsigned int index)
freqs.old = policy->cur;
freqs.new = freq_table[index].frequency;
 
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
+   cpufreq_freq_transition_begin(policy, &freqs);
 
/* Set the target frequency in all C0_3_PSTATE register */
for_each_cpu(i, policy->cpus) {
@@ -258,7 +258,7 @@ static void exynos_cpufreq_work(struct work_struct *work)
dev_crit(dvfs_info->dev, "New frequency out of range\n");
freqs.new = freqs.old;
}
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_POSTCHANGE);
+   cpufreq_freq_transition_end(policy, &freqs, false);
 
cpufreq_cpu_put(policy);
mutex_unlock(&cpufreq_lock);
diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c
index d83e826..fe85673 100644
--- a/drivers/cpufreq/gx-suspmod.c
+++ b/drivers/cpufreq/gx-suspmod.c
@@ -265,7 +265,7 @@ static void gx_set_cpuspeed(struct cpufreq_policy *policy, 
unsigned int khz)
 
freqs.new = new_khz;
 
-   cpufreq_notify_transition(policy, &freqs, CPUFREQ_PRECHANGE);
+   cpufreq_freq_transition_begin(policy, &freqs);
local_irq_save(flags);
 
if (new_khz != stock_freq) {
@@ -314,7 +314,7 @@ static void gx_set_cpuspeed(struct cpufreq_policy *policy, 
unsigned int khz)
 
gx_params->pci_suscfg = suscfg;
 
-

[PATCH V4 0/3] cpufreq: Introduce cpufreq_freq_transition_{begin|end}()

2014-03-20 Thread Viresh Kumar
Whenever we change the frequency of a CPU, we call the PRECHANGE and POSTCHANGE
notifiers. They must be serialized, i.e. PRECHANGE and POSTCHANGE notifiers
should strictly alternate, thereby preventing two different sets of PRECHANGE or
POSTCHANGE notifiers from interleaving arbitrarily.

The following examples illustrate why this is important:

Scenario 1:
---
A thread reading the value of cpuinfo_cur_freq, will call
__cpufreq_cpu_get()->cpufreq_out_of_sync()->cpufreq_notify_transition()

The ondemand governor can decide to change the frequency of the CPU at the same
time and hence it can end up sending the notifications via ->target().

If the notifiers are not serialized, the following sequence can occur:
- PRECHANGE Notification for freq A (from cpuinfo_cur_freq)
- PRECHANGE Notification for freq B (from target())
- Freq changed by target() to B
- POSTCHANGE Notification for freq B
- POSTCHANGE Notification for freq A

We can see from the above that the last POSTCHANGE Notification happens for freq
A but the hardware is set to run at freq B.

Where would we break then?: adjust_jiffies() in cpufreq.c & cpufreq_callback()
in arch/arm/kernel/smp.c (which also adjusts the jiffies). All the
loops_per_jiffy calculations will get messed up.

Scenario 2:
---
The governor calls __cpufreq_driver_target() to change the frequency. At the
same time, if we change scaling_{min|max}_freq from sysfs, it will end up
calling the governor's CPUFREQ_GOV_LIMITS notification, which will also call
__cpufreq_driver_target(). And hence we end up issuing concurrent calls to
->target().

Typically, platforms have the following logic in their ->target() routines:
(Eg: cpufreq-cpu0, omap, exynos, etc)

A. If new freq is more than old: Increase voltage
B. Change freq
C. If new freq is less than old: decrease voltage

Now, if the two concurrent calls to ->target() are X and Y, where X is trying to
increase the freq and Y is trying to decrease it, we get the following race
condition:

X.A: voltage gets increased for larger freq
Y.A: nothing happens
Y.B: freq gets decreased
Y.C: voltage gets decreased
X.B: freq gets increased
X.C: nothing happens

Thus we can end up setting a freq which is not supported by the voltage we have
set. That will probably make the clock to the CPU unstable and the system might
not work properly anymore.


This patchset introduces a new set of routines cpufreq_freq_transition_begin()
and cpufreq_freq_transition_end(), which will guarantee that calls to frequency
transition routines are serialized. Later patches force other drivers to use
these new routines.

Srivatsa S. Bhat (1):
  cpufreq: Make sure frequency transitions are serialized

Viresh Kumar (2):
  cpufreq: Convert existing drivers to use
cpufreq_freq_transition_{begin|end}
  cpufreq: Make cpufreq_notify_transition &
cpufreq_notify_post_transition static

 drivers/cpufreq/cpufreq-nforce2.c|  4 +--
 drivers/cpufreq/cpufreq.c| 52 +---
 drivers/cpufreq/exynos5440-cpufreq.c |  4 +--
 drivers/cpufreq/gx-suspmod.c |  4 +--
 drivers/cpufreq/integrator-cpufreq.c |  4 +--
 drivers/cpufreq/longhaul.c   |  4 +--
 drivers/cpufreq/pcc-cpufreq.c|  4 +--
 drivers/cpufreq/powernow-k6.c|  4 +--
 drivers/cpufreq/powernow-k7.c|  4 +--
 drivers/cpufreq/powernow-k8.c|  4 +--
 drivers/cpufreq/s3c24xx-cpufreq.c|  4 +--
 drivers/cpufreq/sh-cpufreq.c |  4 +--
 drivers/cpufreq/unicore2-cpufreq.c   |  4 +--
 include/linux/cpufreq.h  | 12 ++---
 14 files changed, 76 insertions(+), 36 deletions(-)

-- 
1.7.12.rc2.18.g61b472e

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] mtd: gpmi: make blockmark swapping optional

2014-03-20 Thread Huang Shijie

于 2014年03月19日 21:23, y...@karo-electronics.de 写道:

+   /* Set up swap_block_mark, must be set before the gpmi_set_geometry() */
+   if (!of_property_read_bool(this->dev->of_node, "fsl,no-blockmark-swap"))
+   this->swap_block_mark = !GPMI_IS_MX23(this);



Our ROM guy had confirmed that the Rom will disable the swapping if we 
set the DISBBM bit.

But please do not change any logic for imx23/imx28.

I really do not know what's the benefit we can get from this patch.
Please send the new version about this patch if you want this feature.


thanks
Huang Shijie

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/3] Volatile Ranges (v11)

2014-03-20 Thread Minchan Kim
On Thu, Mar 20, 2014 at 09:13:59AM +0100, Jan Kara wrote:
> On Thu 20-03-14 10:09:54, Minchan Kim wrote:
> > Hello,
> > 
> > On Wed, Mar 19, 2014 at 11:12:02AM +0100, Jan Kara wrote:
> > > On Wed 19-03-14 09:49:18, Minchan Kim wrote:
> > > > On Tue, Mar 18, 2014 at 11:07:50AM -0700, John Stultz wrote:
> > > > > On Tue, Mar 18, 2014 at 8:11 AM, Minchan Kim  
> > > > > wrote:
> > > > > > 1) SIGBUS
> > > > > >
> > > > > > It's one of the arguable issue because some user want to get a
> > > > > > SIGBUS(ex, Firefox) while other want a just zero page(ex, Google
> > > > > > address sanitizer) without signal so it should be option.
> > > > > >
> > > > > > int vrange(start, len, VRANGE_VOLATILE|VRANGE_ZERO, 
> > > > > > &purged);
> > > > > > int vrange(start, len, VRANGE_VOLATILE|VRANGE_SIGNAL, 
> > > > > > &purged);
> > > > > 
> > > > > So, the zero-fill on volatile access feels like a *very* special case
> > > > > to me, since a null page could be valid data in many cases. Since
> > > > > support/interest for volatile ranges has been middling at best, I want
> > > > > to start culling the stranger use cases. I'm open in the future to
> > > > > adding a special flag or something if it really make sense, but at
> > > > > this point, lets just get the more general volatile range use cases
> > > > > supported.
> > > > 
> > > > I'm not sure it's special case. Because some user could reserve
> > > > a big volatile VMA and want to use the range by circle queue for
> > > > caching so overwriting could happen easily.
> > > > We should call vrange(NOVOLATILE) to prevent SIGBUS right before
> > > > overwriting. I feel it's unnecessary overhead and we could avoid
> > > > the cost with VRANGE_ZERO.
> > > > Do you think this usecase would be rare?
> > >   If I understand it correctly the buffer would be volatile all the time
> > > and userspace would like to opportunistically access it. Hum, but then 
> > > with
> > > your automatic zero-filling it could see half of the page with data and
> > > half of the page zeroed out (the page got evicted in the middle of
> > > userspace reading it). I don't think that's a very comfortable interface 
> > > to
> > > work with (you would have to very carefully verify the data you've read is
> > > really valid). And frankly in most cases I'm afraid the application would
> > > fail to do proper verification and crash randomly under memory pressure. 
> > > So
> > > I wouldn't provide VRANGE_ZERO unless I come across real people for which
> > > avoiding marking the range as NONVOLATILE is a big deal and they are OK 
> > > with
> > > handling all the odd situations that can happen.
> > 
> > Plaes think following usecase.
> > 
> > Let's assume big volatile cacne.
> > If there is request for cache, it should find a object in a cache
> > and if it found, it should call vrange(NOVOLATILE) right before
> > passing it to the user and investigate it was purged or not.
> > If it wasn't purged, cache manager could pass the object to the user.
> > But it's circular cache so if there is no request from user, cache manager
> > always overwrites objects so it could encounter SIGBUS easily
> > so as current sematic, cache manager always should call vrange(NOVOLATILE)
> > right before the overwriting. Otherwise, it should register SIGBUS handler
> > to unmark volatile by page unit. SIGH.
> > 
> > If we support VRANGE_ZERO, cache manager could overwrite object without
> > SIGBUS handling or vrange(NOVOLATILE) call. Just need is vrange(NOVOLATILE)
> > call while cache manager pass it to the user.
>   OK, that makes some sense but I don't think we have to implement this
> functionality in the beginning...

Yeb, I am not strong against the idea which starts syscall as simple one
but make room for future but I believe scenario I mentioned is one of
typical usecase for volatile cache and it could avoid vrange(NOVOLATILE)
which is heavier than vrange(VOLATILE) because NOVOLATILE should enumerate
all of ptes in the range at current implementation so reducing NOVOATILE
call looks important to me.


>   Honza
> -- 
> Jan Kara 
> SUSE Labs, CR
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org";> em...@kvack.org 

-- 
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 3.10 00/85] 3.10.34-stable review

2014-03-20 Thread Guenter Roeck

On 03/20/2014 05:09 PM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 3.10.34 release.
There are 85 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Mar 23 00:05:33 UTC 2014.
Anything received after that time might be too late.


Build results:
total: 126 pass: 121 skipped: 4 fail: 1

qemu tests all passed.

Results are as expected. Details are available at 
http://server.roeck-us.net:8010/builders.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[git pull] drm fixes

2014-03-20 Thread Dave Airlie

Hi Linus,

Some final few intel fixes, all regressions, all stable cc, and one exynos 
oops fixer, the biggest is probably the intel display error irqs one, but 
it seems to fix a few crashes on startup, and one use after free in drm 
core.

Dave.

The following changes since commit dcb99fd9b08cfe1afe426af4d8d3cbc429190f15:

  Linux 3.14-rc7 (2014-03-16 18:51:24 -0700)

are available in the git repository at:

  git://people.freedesktop.org/~airlied/linux drm-fixes

for you to fetch changes up to 004e5cf743086990e5fc04a14437b3966d7fa9a2:

  Merge branch 'exynos-drm-fixes' of 
git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos into drm-fixes 
(2014-03-20 13:20:00 +1000)



Ben Widawsky (1):
  drm/i915: Fix PSR programming

Chris Wilson (1):
  drm/i915: Disable stolen memory when DMAR is active

Daniel Kurtz (1):
  drm/exynos: Fix (more) freeing issues in exynos_drm_drv.c

Daniel Vetter (2):
  drm/i915: Don't enable display error interrupts from the start
  drm: Fix use-after-free in the shadow-attache exit code

Dave Airlie (3):
  Merge tag 'drm-intel-fixes-2014-03-17' of 
git://anongit.freedesktop.org/drm-intel into drm-fixes
  Merge tag 'drm-intel-fixes-2014-03-19' of 
git://anongit.freedesktop.org/drm-intel into drm-fixes
  Merge branch 'exynos-drm-fixes' of 
git://git.kernel.org/.../daeinki/drm-exynos into drm-fixes

Jani Nikula (1):
  Revert "drm/i915: don't touch the VDD when disabling the panel"

Ville Syrjälä (2):
  drm/i915: Add a workaround for HSW scanline counter weirdness
  drm/i915: Fix scanline counter fixup on BDW

 drivers/gpu/drm/drm_pci.c   |  2 +-
 drivers/gpu/drm/exynos/exynos_drm_drv.c | 10 +++--
 drivers/gpu/drm/i915/i915_gem_stolen.c  |  7 
 drivers/gpu/drm/i915/i915_irq.c | 71 +++--
 drivers/gpu/drm/i915/intel_ddi.c|  1 +
 drivers/gpu/drm/i915/intel_dp.c | 12 +-
 6 files changed, 67 insertions(+), 36 deletions(-)

Re: [PATCH 3.4 00/35] 3.4.84-stable review

2014-03-20 Thread Guenter Roeck

On 03/20/2014 05:10 PM, Greg Kroah-Hartman wrote:

This is the start of the stable review cycle for the 3.4.84 release.
There are 35 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Mar 23 00:10:42 UTC 2014.
Anything received after that time might be too late.



Build results:
total: 119 pass: 97 skipped: 18 fail: 4

qemu tests all passed.

Results are as expected. Details are available at 
http://server.roeck-us.net:8010/builders.

Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Tasks stuck in futex code (in 3.14-rc6)

2014-03-20 Thread Linus Torvalds
On Thu, Mar 20, 2014 at 9:55 PM, Srikar Dronamraju
 wrote:
>
> I reverted commits 99b60ce6 and b0c29f79. Then applied the patches in
> the above url. The last one had a reject but it was pretty
> straightforward to resolve it. After this, specjbb completes.
>
> So reverting and applying v3 3/4 and 4/4 patches works for me.

Ok, I verified that the above endds up resulting in the same tree as
the minimal patch I sent out, modulo (a) some comments and (b) an
#ifdef CONFIG_SMP in futex_get_mm() that doesn't really matter.

So I committed the minimal patch with your tested-by.

 Linus
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v11 20/27] iommu/exynos: allow having multiple System MMUs for a master H/W

2014-03-20 Thread Cho KyongHo
On Thu, 20 Mar 2014 11:54:58 +0100, Tomasz Figa wrote:
> On 20.03.2014 11:22, Cho KyongHo wrote:
> > On Wed, 19 Mar 2014 16:14:57 +0100, Tomasz Figa wrote:
> >> On 19.03.2014 14:20, Tomasz Figa wrote:
> >>> On 19.03.2014 01:39, Cho KyongHo wrote:
>  On Tue, 18 Mar 2014 15:26:48 +0100, Tomasz Figa wrote:
> >
> >
> > On 18.03.2014 14:01, Cho KyongHo wrote:
> >> On Fri, 14 Mar 2014 17:12:03 +0100, Tomasz Figa wrote:
> >>> Hi KyongHo,
> >>>
> >>> On 14.03.2014 06:10, Cho KyongHo wrote:
>  Some master device descriptor like fimc-is which is an abstraction
>  of very complex H/W may have multiple System MMUs. For those devices,
>  the design of the link between System MMU and its master H/W is
>  needed
>  to be reconsidered.
> 
>  A link structure, sysmmu_list_data is introduced that provides a link
>  to master H/W and that has a pointer to the device descriptor of a
>  System MMU. Given a device descriptor of a master H/W, it is possible
>  to traverse all System MMUs that must be controlled along with the
>  master H/W.
> >>>
> >>> NAK.
> >>>
> >>> A device driver should handle particular hardware instances
> >>> separately,
> >>> without abstracting a virtual hardware instance consisting of multiple
> >>> physical ones.
> >>>
> >>> If such abstraction is needed, it should be done above the
> >>> exynos-iommu
> >>> driver, e.g. by something like iommu-composite driver that would
> >>> aggregate several IOMMUs. Keep in mind that such IOMMUs in a group
> >>> could
> >>> be different, e.g. different Exynos SysMMU versions or even completely
> >>> different IPs handled by different drivers.
> >>>
> >>> Still, I don't think there is a real need for such abstraction.
> >>> Instead,
> >>> related drivers shall be fixed to properly handle multiple memory
> >>> masters and their IOMMUs.
> >>>
> >>
> >> G2D, Scalers and FIMD of Exynos5420 has 2 System MMUs while aother
> >> SoC like
> >> Exynos5250 does not.
> >>
> >> I don't understand why you are negative to this approach.
> >> This is the simplest than the others.
> >>
> >> Let me show you an example.
> >> FIMC-IS driver just controls MCU in FIMC-IS subsystem and the
> >> firmware of
> >> the MCU controls all other peripherals in the subsystem. Each
> >> peripherals
> >> have their own System MMU. Moreover, the configuration of the
> >> peripherals
> >> varies according to the SoCs.
> >>
> >> If System MMU driver accepts multiple masters, everything is done in
> >> DT.
> >> But I worry that it is not easy if System MMU driver does not support
> >> multiple masters.
> >
> > I believe I have stated enough reasons why this kind of implementation
> > is bad. I'm not going to waste time repeating myself.
> >
> > Your concerns presented above are valid, however they are not related to
> > what is wrong with this patch. I have given you two proper ways to
> > handle this, none should be forced upon particular IOMMU master drivers
> > - their authors should have the chance to select the method that works
> > best for them.
> >
> 
>  I don't still understand why you think this patch is wrong.
>  I think this is the best way not to think for all the driver developers
>  about other things than their business logic.
> >>>
> >>> I agree, but one of the ways I proposed (an iommu-composite layer above
> >>> the IOMMU low level drivers) doesn't add any extra responsibility of
> >>> driver developers.
> >>>
> >>> Moreover, it's this kind of business logic in low level drivers that is
> >>> adding more responsibility, because it introduces additional complexity
> >>> and makes the driver harder to read, maintain and extend in future.
> >>>
> 
>  This does not hurt anyone and I think this is good enough.
> 
> >>>
> >>> Well, it is barely good enough. It is a good practice to make a low
> >>> level driver handle a single device instance and this is how Linux
> >>> driver model is designed.
> >>>
> >>> Moreover, a single device tree node _must_ represent a single hardware
> >>> block, so you can't group multiple SysMMUs into a single device tree node.
> >>>
> >>
> >> OK, you add nodes for single SysMMUs devices which is fine, sorry. I was
> >> under impression that one kernel device (struct device) corresponds to
> >> multiple SysMMUs, but this was before your patches, sorry. So one issue
> >> less, but it's still not good.
> >>
> >
> > Ok. Understood why you have mentioned such.
> >
> >>> Furthermore, if you force grouping of SysMMUs into a single virtual one,
> >>> you enforce using the same address space for all masters of some
> >>> particular hardware blocks, while potentially driver developers would
> >>> like to separate them.
>

[PATCH v7 1/2] Tracepoint cleanup: remove unused API functions

2014-03-20 Thread Mathieu Desnoyers
After the following commit:

commit b75ef8b44b1cb95f5a26484b0e2fe37a63b12b44
Author: Mathieu Desnoyers 
Date:   Wed Aug 10 15:18:39 2011 -0400

Tracepoint: Dissociate from module mutex

The following functions became unnecessary:

- tracepoint_probe_register_noupdate,
- tracepoint_probe_unregister_noupdate,
- tracepoint_probe_update_all.

In fact, none of the in-kernel tracers, nor LTTng, nor SystemTAP use
them. Remove those.

Moreover, the functions:

- tracepoint_iter_start,
- tracepoint_iter_next,
- tracepoint_iter_stop,
- tracepoint_iter_reset.

are unused by in-kernel tracers, LTTng and SystemTAP. Remove those too.

Signed-off-by: Mathieu Desnoyers 
CC: Steven Rostedt 
CC: Ingo Molnar 
CC: Frederic Weisbecker 
CC: Andrew Morton 
CC: Frank Ch. Eigler 
CC: Johannes Berg 
---
 include/linux/tracepoint.h |   18 
 kernel/tracepoint.c|  221 +---
 2 files changed, 5 insertions(+), 234 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7159a0a..812b255 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -48,12 +48,6 @@ extern int tracepoint_probe_register(const char *name, void 
*probe, void *data);
 extern int
 tracepoint_probe_unregister(const char *name, void *probe, void *data);
 
-extern int tracepoint_probe_register_noupdate(const char *name, void *probe,
- void *data);
-extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe,
-   void *data);
-extern void tracepoint_probe_update_all(void);
-
 #ifdef CONFIG_MODULES
 struct tp_module {
struct list_head list;
@@ -68,18 +62,6 @@ static inline bool trace_module_has_bad_taint(struct module 
*mod)
 }
 #endif /* CONFIG_MODULES */
 
-struct tracepoint_iter {
-#ifdef CONFIG_MODULES
-   struct tp_module *module;
-#endif /* CONFIG_MODULES */
-   struct tracepoint * const *tracepoint;
-};
-
-extern void tracepoint_iter_start(struct tracepoint_iter *iter);
-extern void tracepoint_iter_next(struct tracepoint_iter *iter);
-extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
-extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
-
 /*
  * tracepoint_synchronize_unregister must be called between the last tracepoint
  * probe unregistration and the end of module exit to make sure there is no
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 031cc56..f27e5f0 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -66,10 +66,7 @@ struct tracepoint_entry {
 };
 
 struct tp_probes {
-   union {
-   struct rcu_head rcu;
-   struct list_head list;
-   } u;
+   struct rcu_head rcu;
struct tracepoint_func probes[0];
 };
 
@@ -82,7 +79,7 @@ static inline void *allocate_probes(int count)
 
 static void rcu_free_old_probes(struct rcu_head *head)
 {
-   kfree(container_of(head, struct tp_probes, u.rcu));
+   kfree(container_of(head, struct tp_probes, rcu));
 }
 
 static inline void release_probes(struct tracepoint_func *old)
@@ -90,7 +87,7 @@ static inline void release_probes(struct tracepoint_func *old)
if (old) {
struct tp_probes *tp_probes = container_of(old,
struct tp_probes, probes[0]);
-   call_rcu_sched(&tp_probes->u.rcu, rcu_free_old_probes);
+   call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes);
}
 }
 
@@ -438,198 +435,6 @@ int tracepoint_probe_unregister(const char *name, void 
*probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
-static LIST_HEAD(old_probes);
-static int need_update;
-
-static void tracepoint_add_old_probes(void *old)
-{
-   need_update = 1;
-   if (old) {
-   struct tp_probes *tp_probes = container_of(old,
-   struct tp_probes, probes[0]);
-   list_add(&tp_probes->u.list, &old_probes);
-   }
-}
-
-/**
- * tracepoint_probe_register_noupdate -  register a probe but not connect
- * @name: tracepoint name
- * @probe: probe handler
- *
- * caller must call tracepoint_probe_update_all()
- */
-int tracepoint_probe_register_noupdate(const char *name, void *probe,
-  void *data)
-{
-   struct tracepoint_func *old;
-
-   mutex_lock(&tracepoints_mutex);
-   old = tracepoint_add_probe(name, probe, data);
-   if (IS_ERR(old)) {
-   mutex_unlock(&tracepoints_mutex);
-   return PTR_ERR(old);
-   }
-   tracepoint_add_old_probes(old);
-   mutex_unlock(&tracepoints_mutex);
-   return 0;
-}
-EXPORT_SYMBOL_GPL(tracepoint_probe_register_noupdate);
-
-/**
- * tracepoint_probe_unregister_noupdate -  remove a probe but not disconnect
- * @name: tracepoint name
- * @probe: probe function pointer
- *
- * caller must call tracepoint_probe_update_all()
- */
-int tracepoint_probe_unregister_noupdate(const 

[PATCH v7 2/2] Tracepoint: register/unregister struct tracepoint

2014-03-20 Thread Mathieu Desnoyers
Register/unregister tracepoint probes with struct tracepoint pointer
rather than tracepoint name.

This change, which vastly simplifies tracepoint.c, has been proposed by
Steven Rostedt.

>From this point on, the tracers need to pass a struct tracepoint pointer
to probe register/unregister. A probe can now only be connected to a
tracepoint that exists. Moreover, tracers are responsible for
unregistering the probe before the module containing its associated
tracepoint is unloaded.

Signed-off-by: Mathieu Desnoyers 
CC: Steven Rostedt 
CC: Ingo Molnar 
CC: Frederic Weisbecker 
CC: Andrew Morton 
CC: Frank Ch. Eigler 
CC: Johannes Berg 
---
 include/linux/ftrace_event.h |1 +
 include/linux/tracepoint.h   |   42 ++--
 include/trace/ftrace.h   |2 +
 kernel/trace/trace_events.c  |8 +-
 kernel/tracepoint.c  |  524 --
 5 files changed, 275 insertions(+), 302 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4e4cc28..1592c1c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -230,6 +230,7 @@ struct ftrace_event_call {
struct list_headlist;
struct ftrace_event_class *class;
char*name;
+   struct tracepoint   *tp;
struct trace_event  event;
const char  *print_fmt;
struct event_filter *filter;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 812b255..3144d12 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -6,7 +6,7 @@
  *
  * See Documentation/trace/tracepoints.txt.
  *
- * (C) Copyright 2008 Mathieu Desnoyers 
+ * Copyright (C) 2008-2014 Mathieu Desnoyers 
  *
  * Heavily inspired from the Linux Kernel Markers.
  *
@@ -21,6 +21,7 @@
 
 struct module;
 struct tracepoint;
+struct notifier_block;
 
 struct tracepoint_func {
void *func;
@@ -35,18 +36,12 @@ struct tracepoint {
struct tracepoint_func __rcu *funcs;
 };
 
-/*
- * Connect a probe to a tracepoint.
- * Internal API, should not be used directly.
- */
-extern int tracepoint_probe_register(const char *name, void *probe, void 
*data);
-
-/*
- * Disconnect a probe from a tracepoint.
- * Internal API, should not be used directly.
- */
-extern int
-tracepoint_probe_unregister(const char *name, void *probe, void *data);
+extern
+int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data);
+extern
+int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void 
*data);
+void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv),
+   void *priv);
 
 #ifdef CONFIG_MODULES
 struct tp_module {
@@ -54,12 +49,25 @@ struct tp_module {
unsigned int num_tracepoints;
struct tracepoint * const *tracepoints_ptrs;
 };
+
 bool trace_module_has_bad_taint(struct module *mod);
+int register_tracepoint_module_notifier(struct notifier_block *nb);
+int unregister_tracepoint_module_notifier(struct notifier_block *nb);
 #else
 static inline bool trace_module_has_bad_taint(struct module *mod)
 {
return false;
 }
+static inline
+int register_tracepoint_module_notifier(struct notifier_block *nb)
+{
+   return 0;
+}
+static inline
+int unregister_tracepoint_module_notifier(struct notifier_block *nb)
+{
+   return 0;
+}
 #endif /* CONFIG_MODULES */
 
 /*
@@ -160,14 +168,14 @@ static inline void tracepoint_synchronize_unregister(void)
static inline int   \
register_trace_##name(void (*probe)(data_proto), void *data)\
{   \
-   return tracepoint_probe_register(#name, (void *)probe,  \
-data); \
+   return tracepoint_probe_register(&__tracepoint_##name,  \
+   (void *)probe, data);   \
}   \
static inline int   \
unregister_trace_##name(void (*probe)(data_proto), void *data)  \
{   \
-   return tracepoint_probe_unregister(#name, (void *)probe, \
-  data);   \
+   return tracepoint_probe_unregister(&__tracepoint_##name,\
+   (void *)probe, data);   \
}   \
static inline void  \
check_trace_callback_type_##name(void (*cb)(data_proto))\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1a8b28d..415b986 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -625,6 +625,

[PATCH v7 0/2] Tracepoint API simplification/cleanup

2014-03-20 Thread Mathieu Desnoyers
Here is v7 of the tracepoint register/unregister API simplification,
submitted for 3.15.

Changes since v1:
- Adapt ftrace/perf callers,
- Update tracepoint.h macro,
- Build tested.

Changes since v2:
- Introduce for_each_tracepoint() iterator to allow listing the
  currently loaded tracepoints, for modules using coming/going
  notifiers to track tracepoints.

Changes since v3:
- Introduce module coming and going notifiers each with their own
  priority to ensure that other coming notifiers are called after, and
  coming notifiers are called before the tracepoint notifiers.
- Fix: move for_each_tracepoint() outside of the CONFIG_MODULE ifdef.

Changes since v4:
- Split tracepoints_mutex into two locks: tp_modlist_mutex, which
  protects the tracepoint list of modules, and a tracepoint_mutex, which
  protects the tracepoints per se. tracepoint_mutex nests inside
  tp_modlist_mutex.
- Introduce a tracepoint module notifier, to ensure consistency between
  iteration on tracepoints with for_each_tracepoint() and module coming
  and going notifications. This consistency is ensured by holding
  tp_modlist_mutex when calling the notifier and when iterating on the
  tracepoint module list.

Changes since v5:
- Remove tracepoint_add_old_probes within "Tracepoint cleanup: remove
  unused API functions" rather than in later patch.
- Remove struct tp_probes "old probes" list within "Tracepoint cleanup:
  remove unused API functions", as it is not needed anymore.
- Within "Tracepoint cleanup: remove unused API functions", simply add
  the new module to tail of list, since there is no need to keep it
  ordered anymore.

Changes since v6:
- Remove unused struct list_head list from struct tp_probes in patch
  "Tracepoint: register/unregister struct tracepoint",
- Rename for_each_tracepoint() to for_each_kernel_tracepoint(). Only
  iterate on kernel tracepoints,
- Registered tracepoint module notifiers now receive struct tp_module
  pointer rather than struct module pointer,
- register_tracepoint_module_notifier and
  unregister_tracepoint_module_notifier now iterate on all modules and
  call coming/going notifiers. This ensures notifier registration and
  listing of already loaded modules are done atomically (protected by
  tp_modlist_mutex).

Thanks!

Mathieu

Mathieu Desnoyers (2):
  Tracepoint cleanup: remove unused API functions
  Tracepoint: register/unregister struct tracepoint

 include/linux/ftrace_event.h |1 +
 include/linux/tracepoint.h   |   60 ++--
 include/trace/ftrace.h   |2 +
 kernel/trace/trace_events.c  |8 +-
 kernel/tracepoint.c  |  705 ++
 5 files changed, 260 insertions(+), 516 deletions(-)

-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] madvise: fix locking in force_swapin_readahead() (Re: [PATCH 08/11] madvise: redefine callback functions for page table walker)

2014-03-20 Thread Hugh Dickins
On Thu, 20 Mar 2014, Naoya Horiguchi wrote:
> On Thu, Mar 20, 2014 at 09:47:04PM -0400, Sasha Levin wrote:
> > On 02/10/2014 04:44 PM, Naoya Horiguchi wrote:
> > >swapin_walk_pmd_entry() is defined as pmd_entry(), but it has no code
> > >about pmd handling (except pmd_none_or_trans_huge_or_clear_bad, but the
> > >same check are now done in core page table walk code).
> > >So let's move this function on pte_entry() as swapin_walk_pte_entry().
> > >
> > >Signed-off-by: Naoya Horiguchi
> > 
> > This patch seems to generate:
> 
> Sasha, thank you for reporting.
> I forgot to unlock ptlock before entering read_swap_cache_async() which
> holds page lock in it, as a result lock ordering rule (written in mm/rmap.c)
> was violated (we should take in the order of mmap_sem -> page lock -> ptlock.)
> The following patch should fix this. Could you test with it?
> 
> ---
> From c0d56af5874dc40467c9b3a0f9e53b39b3c4f1c5 Mon Sep 17 00:00:00 2001
> From: Naoya Horiguchi 
> Date: Thu, 20 Mar 2014 22:30:51 -0400
> Subject: [PATCH] madvise: fix locking in force_swapin_readahead()
> 
> We take mmap_sem and ptlock in walking over ptes with swapin_walk_pte_entry(),
> but inside it we call read_swap_cache_async() which holds page lock.
> So we should unlock ptlock to call read_swap_cache_async() to meet lock order
> rule (mmap_sem -> page lock -> ptlock).
> 
> Reported-by: Sasha Levin 
> Signed-off-by: Naoya Horiguchi 

NAK.  You are now unlocking and relocking the spinlock, good; but on
arm frv or i386 CONFIG_HIGHPTE you are leaving the page table atomically
kmapped across read_swap_cache_async(), which (never mind lock ordering)
is quite likely to block waiting to allocate memory.

I do not see
madvise-redefine-callback-functions-for-page-table-walker.patch
as an improvement.  I can see what's going on in Shaohua's original
code, whereas this style makes bugs more likely.  Please drop it.

Hugh

> ---
>  mm/madvise.c | 11 +++
>  1 file changed, 11 insertions(+)
> 
> diff --git a/mm/madvise.c b/mm/madvise.c
> index 5e957b984c14..ed9c31e3b5ff 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -141,24 +141,35 @@ static int swapin_walk_pte_entry(pte_t *pte, unsigned 
> long start,
>   swp_entry_t entry;
>   struct page *page;
>   struct vm_area_struct *vma = walk->vma;
> + spinlock_t *ptl = (spinlock_t *)walk->private;
>  
>   if (pte_present(*pte) || pte_none(*pte) || pte_file(*pte))
>   return 0;
>   entry = pte_to_swp_entry(*pte);
>   if (unlikely(non_swap_entry(entry)))
>   return 0;
> + spin_unlock(ptl);
>   page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
>vma, start);
> + spin_lock(ptl);
>   if (page)
>   page_cache_release(page);
>   return 0;
>  }
>  
> +static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
> + unsigned long end, struct mm_walk *walk)
> +{
> + walk->private = pte_lockptr(walk->mm, pmd);
> + return 0;
> +}
> +
>  static void force_swapin_readahead(struct vm_area_struct *vma,
>   unsigned long start, unsigned long end)
>  {
>   struct mm_walk walk = {
>   .mm = vma->vm_mm,
> + .pmd_entry = swapin_walk_pmd_entry,
>   .pte_entry = swapin_walk_pte_entry,
>   };
>  
> -- 
> 1.8.5.3
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majord...@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: mailto:"d...@kvack.org";> em...@kvack.org 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH v2] USB: Gadget: fsl driver pullup fix

2014-03-20 Thread suresh.gu...@freescale.com


> -Original Message-
> From: Greg KH [mailto:gre...@linuxfoundation.org]
> Sent: Thursday, March 20, 2014 9:33 PM
> To: Gupta Suresh-B42813
> Cc: ba...@ti.com; linux-...@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v2] USB: Gadget: fsl driver pullup fix
> 
> On Fri, Mar 21, 2014 at 01:57:18AM +0530, Suresh Gupta wrote:
> > This fix the fsl usb gadget driver in a way that the usb device will
> > be only "pulled up" on requests only when vbus is powered
> >
> > Signed-off-by: Suresh Gupta 
> > ---
> > Changes from previous version:
> > * Removed re-factored code, Will send another patch for re-factoring
> >   duplicated code
> > * Changed Description
> >
> >  drivers/usb/gadget/fsl_udc_core.c | 4 
> >  1 file changed, 4 insertions(+)
> >
> > diff --git a/drivers/usb/gadget/fsl_udc_core.c
> > b/drivers/usb/gadget/fsl_udc_core.c
> > index 35cb972..49d66779 100644
> > --- a/drivers/usb/gadget/fsl_udc_core.c
> > +++ b/drivers/usb/gadget/fsl_udc_core.c
> > @@ -1219,6 +1219,10 @@ static int fsl_pullup(struct usb_gadget *gadget,
> int is_on)
> > struct fsl_udc *udc;
> >
> > udc = container_of(gadget, struct fsl_udc, gadget);
> > +
> > +   if(!udc->vbus_active)
> > +   return -EOPNOTSUPP;
> 
> Always run your patches through scripts/checkpatch.pl so they don't get
> rejected for silly things like the wrong coding style...
> 
Accepted, Sorry for such a inane mistake 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] xattr: guard against simultaneous glibc header inclusion (v2)

2014-03-20 Thread Serge Hallyn
If the glibc xattr.h header is included after the uapi header,
compilation fails due to an enum re-using a #define from the
uapi header.  Protect against this by guarding the define and
enum inclusions against each other.

(See https://lists.debian.org/debian-glibc/2014/03/msg00029.html
and https://sourceware.org/glibc/wiki/Synchronizing_Headers
for more information.)

v2: As Allan McRae points out, __UAPI_DFE_XATTR should only be
set to zero when _SYS_XATTR_H is defined.

Signed-off-by: Serge Hallyn 
Cc: linux-fsde...@vger.kernel.org
Cc: Andrew Morton 
Cc: Allan McRae 
Cc: libc-al...@sourceware.org
---
 include/uapi/linux/libc-compat.h | 9 +
 include/uapi/linux/xattr.h   | 7 +++
 2 files changed, 16 insertions(+)

diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 335e8a7..0b73ee9 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -85,6 +85,12 @@
 
 #endif /* _NETINET_IN_H */
 
+/* Definitions for xattr.h */ 
+#if defined(_SYS_XATTR_H)
+#define __UAPI_DEF_XATTR   0
+#else
+#define __UAPI_DEF_XATTR   1
+#endif
 
 /* If we did not see any headers from any supported C libraries,
  * or we are being included in the kernel, then define everything
@@ -98,6 +104,9 @@
 #define __UAPI_DEF_IPV6_MREQ   1
 #define __UAPI_DEF_IPPROTO_V6  1
 
+/* Definitions for xattr.h */
+#define __UAPI_DEF_XATTR   1
+
 #endif /* __GLIBC__ */
 
 #endif /* _UAPI_LIBC_COMPAT_H */
diff --git a/include/uapi/linux/xattr.h b/include/uapi/linux/xattr.h
index e4629b9..a63c146 100644
--- a/include/uapi/linux/xattr.h
+++ b/include/uapi/linux/xattr.h
@@ -7,11 +7,18 @@
   Copyright (c) 2001-2002 Silicon Graphics, Inc.  All Rights Reserved.
   Copyright (c) 2004 Red Hat, Inc., James Morris 
 */
+
+#include 
+
 #ifndef _UAPI_LINUX_XATTR_H
 #define _UAPI_LINUX_XATTR_H
 
+#ifdef __UAPI_DEF_XATTR
+#define __USE_KERNEL_XATTR_DEFS
+
 #define XATTR_CREATE   0x1 /* set value, fail if attr already exists */
 #define XATTR_REPLACE  0x2 /* set value, fail if attr does not exist */
+#endif
 
 /* Namespaces */
 #define XATTR_OS2_PREFIX "os2."
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Tasks stuck in futex code (in 3.14-rc6)

2014-03-20 Thread Srikar Dronamraju
> 
> Ok, so a big reason why this patch doesn't apply cleanly after reverting
> is because *most* of the changes were done at the top of the file with
> regards to documenting the ordering guarantees, the actual code changes
> are quite minimal.
> 
> I reverted commits 99b60ce6 (documentation) and b0c29f79 (the offending
> commit), and then I cleanly applied the equivalent ones from v3 of the
> series (which was already *tested* and ready for upstream until you
> suggested looking into the alternative spinlock approach):
> 
> https://lkml.org/lkml/2013/12/19/624
> https://lkml.org/lkml/2013/12/19/630

I reverted commits 99b60ce6 and b0c29f79. Then applied the patches in
the above url. The last one had a reject but it was pretty
straightforward to resolve it. After this, specjbb completes. 

So reverting and applying v3 3/4 and 4/4 patches works for me.

-- 
Thanks and Regards
Srikar Dronamraju

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] mm: fix swapops.h:131 bug if remap_file_pages raced migration

2014-03-20 Thread Hugh Dickins
Add remove_linear_migration_ptes_from_nonlinear(), to fix an interesting
little include/linux/swapops.h:131 BUG_ON(!PageLocked) found by trinity:
indicating that remove_migration_ptes() failed to find one of the
migration entries that was temporarily inserted.

The problem comes from remap_file_pages()'s switch from vma_interval_tree
(good for inserting the migration entry) to i_mmap_nonlinear list (no good
for locating it again); but can only be a problem if the remap_file_pages()
range does not cover the whole of the vma (zap_pte() clears the range).

remove_migration_ptes() needs a file_nonlinear method to go down the
i_mmap_nonlinear list, applying linear location to look for migration
entries in those vmas too, just in case there was this race.

The file_nonlinear method does need rmap_walk_control.arg to do this;
but it never needed vma passed in - vma comes from its own iteration.

Reported-and-tested-by: Dave Jones 
Reported-and-tested-by: Sasha Levin 
Signed-off-by: Hugh Dickins 
---

 include/linux/rmap.h |3 +--
 mm/migrate.c |   32 
 mm/rmap.c|5 +++--
 3 files changed, 36 insertions(+), 4 deletions(-)

--- 3.14-rc7/include/linux/rmap.h   2014-02-02 18:49:07.429302104 -0800
+++ linux/include/linux/rmap.h  2014-03-19 20:12:27.056451541 -0700
@@ -250,8 +250,7 @@ struct rmap_walk_control {
int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
unsigned long addr, void *arg);
int (*done)(struct page *page);
-   int (*file_nonlinear)(struct page *, struct address_space *,
-   struct vm_area_struct *vma);
+   int (*file_nonlinear)(struct page *, struct address_space *, void *arg);
struct anon_vma *(*anon_lock)(struct page *page);
bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
 };
--- 3.14-rc7/mm/migrate.c   2014-03-16 19:24:19.635512576 -0700
+++ linux/mm/migrate.c  2014-03-19 21:06:02.704527965 -0700
@@ -178,6 +178,37 @@ out:
 }
 
 /*
+ * Congratulations to trinity for discovering this bug.
+ * mm/fremap.c's remap_file_pages() accepts any range within a single vma to
+ * convert that vma to VM_NONLINEAR; and generic_file_remap_pages() will then
+ * replace the specified range by file ptes throughout (maybe populated after).
+ * If page migration finds a page within that range, while it's still located
+ * by vma_interval_tree rather than lost to i_mmap_nonlinear list, no problem:
+ * zap_pte() clears the temporary migration entry before mmap_sem is dropped.
+ * But if the migrating page is in a part of the vma outside the range to be
+ * remapped, then it will not be cleared, and remove_migration_ptes() needs to
+ * deal with it.  Fortunately, this part of the vma is of course still linear,
+ * so we just need to use linear location on the nonlinear list.
+ */
+static int remove_linear_migration_ptes_from_nonlinear(struct page *page,
+   struct address_space *mapping, void *arg)
+{
+   struct vm_area_struct *vma;
+   /* hugetlbfs does not support remap_pages, so no huge pgoff worries */
+   pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+   unsigned long addr;
+
+   list_for_each_entry(vma,
+   &mapping->i_mmap_nonlinear, shared.nonlinear) {
+
+   addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
+   if (addr >= vma->vm_start && addr < vma->vm_end)
+   remove_migration_pte(page, vma, addr, arg);
+   }
+   return SWAP_AGAIN;
+}
+
+/*
  * Get rid of all migration entries and replace them by
  * references to the indicated page.
  */
@@ -186,6 +217,7 @@ static void remove_migration_ptes(struct
struct rmap_walk_control rwc = {
.rmap_one = remove_migration_pte,
.arg = old,
+   .file_nonlinear = remove_linear_migration_ptes_from_nonlinear,
};
 
rmap_walk(new, &rwc);
--- 3.14-rc7/mm/rmap.c  2014-02-02 18:49:07.929302115 -0800
+++ linux/mm/rmap.c 2014-03-19 20:16:03.552456686 -0700
@@ -1360,8 +1360,9 @@ static int try_to_unmap_cluster(unsigned
 }
 
 static int try_to_unmap_nonlinear(struct page *page,
-   struct address_space *mapping, struct vm_area_struct *vma)
+   struct address_space *mapping, void *arg)
 {
+   struct vm_area_struct *vma;
int ret = SWAP_AGAIN;
unsigned long cursor;
unsigned long max_nl_cursor = 0;
@@ -1663,7 +1664,7 @@ static int rmap_walk_file(struct page *p
if (list_empty(&mapping->i_mmap_nonlinear))
goto done;
 
-   ret = rwc->file_nonlinear(page, mapping, vma);
+   ret = rwc->file_nonlinear(page, mapping, rwc->arg);
 
 done:
mutex_unlock(&mapping->i_mmap_mutex);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majo

Re: [PATCH v11 10/27] iommu/exynos: use managed device helper functions

2014-03-20 Thread Cho KyongHo
On Thu, 20 Mar 2014 11:44:50 +0100, Tomasz Figa wrote:
> On 20.03.2014 11:03, Cho KyongHo wrote:
> > On Wed, 19 Mar 2014 13:08:42 +0100, Tomasz Figa wrote:
> >> On 19.03.2014 10:01, Sachin Kamat wrote:
> >>> On 19 March 2014 14:29, Cho KyongHo  wrote:
>  On Tue, 18 Mar 2014 16:14:53 +0100, Tomasz Figa wrote:
> > On 18.03.2014 12:09, Cho KyongHo wrote:
> >> On Fri, 14 Mar 2014 20:52:43 +0530, Sachin Kamat wrote:
> >>> Hi KyongHo,
> >>>
> >>> On 14 March 2014 10:35, Cho KyongHo  wrote:
>  This patch uses managed device helper functions in the probe().
> 
>  Signed-off-by: Cho KyongHo 
>  ---
> >>> [snip]
> >>>
>  +   data->clk = devm_clk_get(dev, "sysmmu");
>  +   if (IS_ERR(data->clk)) {
>  +   dev_info(dev, "No gate clock found!\n");
>  +   data->clk = NULL;
>  +   }
> >>>
> >>> Why aren't you returning from here upon error?
> >>
> >> It is for the case of a System MMU which does not need clock gating.
> >>
> >
> > Are there really such cases?
> >
> 
>  Yes.
>  Especially in the case of initial stage of new SoC development.
> 
>  I have experianced some software workaround for H/W restriction
>  needs prevention of clock gating for some devices.
> >>>
> >>> So aren't these basically some exceptions/hacks rather than the usual way
> >>> of functioning of the device?
> >>>
> >>
> >> This actually raises a good question, whether we really need to support
> >> such early development SoC versions in mainline.
> >>
> >> Another thing is that if you need to assure that a clock is ungated, you
> >> must acquire it and prepare_enable explicitly, so I don't think this
> >> kind of handling is correct.
> >>
> > On early development step of a new SoC, clock related stuffs and
> > some device drivers like display controller are usually developed in 
> > parallel.
> >
> > In that case, -ENOENT from clk_get() must not treated as an error.
> > "[PATCH v11 20/17] iommu/exynos: allow having multiple System MMUs for a 
> > master H/W"
> > patch distinguishes -ENOENT from other error values returned by 
> > devm_clk_get().
> 
> I still don't think upstream is right place for such development hacks 
> and such assumption will mask potential errors caused by clocks 
> unspecified in DT.
> 
> If such thing is needed for development, an extra patch might be kept in 
> development tree, until clock driver is implemented or a dummy 
> fixed-rate clock might be specified in DT.
> 

Ok.

Now I understand.

Error from clk_get() will be failure of probe in the next patch series.

Thanks.

KyongHo

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: bad rss-counter message in 3.14rc5

2014-03-20 Thread Hugh Dickins
On Thu, 20 Mar 2014, Sasha Levin wrote:
> On 03/20/2014 09:51 AM, Dave Jones wrote:
> > On Wed, Mar 19, 2014 at 10:00:29PM -0700, Hugh Dickins wrote:
> > 
> >   > > This might be collateral damage from the swapops thing, I guess we
> > won't know until
> >   > > that gets fixed, but I thought I'd mention that we might still have a
> > problem here.
> >   >
> >   > Yes, those Bad rss-counters could well be collateral damage from the
> >   > swapops BUG.  To which I believe I now have the answer: again untested,
> >   > but please give this a try...
> > 
> > This survived an overnight run. No swapops bug, and no bad RSS. Good job:)
> 
> Same here, swapops bug is gone!

That was welcome news, thanks guys.  I notice it has not (yet) magically
appeared in Linus's public tree like the rss one did: so to be on the
safe side, I'll just repost it now, with your Reported-and-tested-bys,
otherwise unchanged.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] fix some coding style in drivers/staging/iio

2014-03-20 Thread Jimmy Li
fix some coding style in drivers/staging/iio.

Signed-off-by: Jimmy Li
---
 drivers/staging/iio/accel/adis16220_core.c |3 ++-
 drivers/staging/iio/accel/sca3000_core.c   |3 ++-
 drivers/staging/iio/adc/ad7192.c   |3 ++-
 drivers/staging/iio/adc/ad7606_core.c  |2 +-
 drivers/staging/iio/adc/ad7816.c   |6 +++---
 drivers/staging/iio/adc/mxs-lradc.c|6 --
 drivers/staging/iio/addac/adt7316.c|3 +--
 drivers/staging/iio/frequency/ad5930.c |   23 +++
 drivers/staging/iio/frequency/ad9850.c |2 +-
 drivers/staging/iio/light/isl29018.c   |   10 ++
 drivers/staging/iio/light/tsl2583.c|2 +-
 drivers/staging/iio/light/tsl2x7x_core.c   |   12 ++--
 drivers/staging/iio/meter/ade7854-i2c.c|3 ++-
 drivers/staging/iio/resolver/ad2s1200.c|1 +
 14 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/drivers/staging/iio/accel/adis16220_core.c 
b/drivers/staging/iio/accel/adis16220_core.c
index 6f38ca9..31c7a9d 100644
--- a/drivers/staging/iio/accel/adis16220_core.c
+++ b/drivers/staging/iio/accel/adis16220_core.c
@@ -392,7 +392,8 @@ static const struct iio_info adis16220_info = {
 };
 
 static const char * const adis16220_status_error_msgs[] = {
-   [ADIS16220_DIAG_STAT_VIOLATION_BIT] = "Capture period 
violation/interruption",
+   [ADIS16220_DIAG_STAT_VIOLATION_BIT] =
+   "Capture period violation/interruption",
[ADIS16220_DIAG_STAT_SPI_FAIL_BIT] = "SPI failure",
[ADIS16220_DIAG_STAT_FLASH_UPT_BIT] = "Flash update failed",
[ADIS16220_DIAG_STAT_POWER_HIGH_BIT] = "Power supply above 3.625V",
diff --git a/drivers/staging/iio/accel/sca3000_core.c 
b/drivers/staging/iio/accel/sca3000_core.c
index ed30e32..c099294 100644
--- a/drivers/staging/iio/accel/sca3000_core.c
+++ b/drivers/staging/iio/accel/sca3000_core.c
@@ -506,7 +506,8 @@ static int sca3000_read_raw(struct iio_dev *indio_dev,
mutex_unlock(&st->lock);
return ret;
}
-   *val = ((st->rx[0] & 0x3F) << 3) | ((st->rx[1] & 0xE0) 
>> 5);
+   *val = ((st->rx[0] & 0x3F) << 3) |
+   ((st->rx[1] & 0xE0) >> 5);
}
mutex_unlock(&st->lock);
return IIO_VAL_INT;
diff --git a/drivers/staging/iio/adc/ad7192.c b/drivers/staging/iio/adc/ad7192.c
index 83bb44b..d1f9790 100644
--- a/drivers/staging/iio/adc/ad7192.c
+++ b/drivers/staging/iio/adc/ad7192.c
@@ -223,7 +223,8 @@ static int ad7192_setup(struct ad7192_state *st,
id &= AD7192_ID_MASK;
 
if (id != st->devid)
-   dev_warn(&st->sd.spi->dev, "device ID query failed (0x%X)\n", 
id);
+   dev_warn(&st->sd.spi->dev,
+   "device ID query failed (0x%X)\n", id);
 
switch (pdata->clock_source_sel) {
case AD7192_CLK_EXT_MCLK1_2:
diff --git a/drivers/staging/iio/adc/ad7606_core.c 
b/drivers/staging/iio/adc/ad7606_core.c
index f0f05f1..bf2c801 100644
--- a/drivers/staging/iio/adc/ad7606_core.c
+++ b/drivers/staging/iio/adc/ad7606_core.c
@@ -140,7 +140,7 @@ static ssize_t ad7606_store_range(struct device *dev,
return count;
 }
 
-static IIO_DEVICE_ATTR(in_voltage_range, S_IRUGO | S_IWUSR, \
+static IIO_DEVICE_ATTR(in_voltage_range, S_IRUGO | S_IWUSR,
   ad7606_show_range, ad7606_store_range, 0);
 static IIO_CONST_ATTR(in_voltage_range_available, "5000 1");
 
diff --git a/drivers/staging/iio/adc/ad7816.c b/drivers/staging/iio/adc/ad7816.c
index 2369cf2..ec86c01 100644
--- a/drivers/staging/iio/adc/ad7816.c
+++ b/drivers/staging/iio/adc/ad7816.c
@@ -153,7 +153,8 @@ static ssize_t ad7816_show_available_modes(struct device 
*dev,
return sprintf(buf, "full\npower-save\n");
 }
 
-static IIO_DEVICE_ATTR(available_modes, S_IRUGO, ad7816_show_available_modes, 
NULL, 0);
+static IIO_DEVICE_ATTR(available_modes, S_IRUGO,
+   ad7816_show_available_modes, NULL, 0);
 
 static ssize_t ad7816_show_channel(struct device *dev,
struct device_attribute *attr,
@@ -442,6 +443,5 @@ static struct spi_driver ad7816_driver = {
 module_spi_driver(ad7816_driver);
 
 MODULE_AUTHOR("Sonic Zhang ");
-MODULE_DESCRIPTION("Analog Devices AD7816/7/8 digital"
-   " temperature sensor driver");
+MODULE_DESCRIPTION("Analog Devices AD7816/7/8 digital temperature sensor 
driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/staging/iio/adc/mxs-lradc.c 
b/drivers/staging/iio/adc/mxs-lradc.c
index 11fb952..84f7177 100644
--- a/drivers/staging/iio/adc/mxs-lradc.c
+++ b/drivers/staging/iio/adc/mxs-lradc.c
@@ -462,7 +462,8 @@ static void mxs_lradc_setup_ts_channel(struct mxs_lradc 
*lradc, unsigned ch)
 * SoC's delay unit and start the conversion later
 * and automatically.
 */
-   mxs_lradc_reg_wr

Re: [PATCH v2] staging: cxt1e1: replace OS_kmalloc/OS_kfree with kmalloc/kfree

2014-03-20 Thread DaeSeok Youn
2014-03-21 13:27 GMT+09:00 Greg KH :
> On Fri, Mar 21, 2014 at 01:15:23PM +0900, Daeseok Youn wrote:
>>
>> Replace OS_kmalloc/OS_kfree with kmalloc/kfree.
>
> You should replace it with kzalloc, not kmalloc, as OS_kmalloc() zeroed
> out the allocated data:

I think some case does not need to get zeroed memory. :-)
I will change that and send it again.

Thanks for review.
Daeseok Youn.

>
>
>> -static inline void *
>> -OS_kmalloc (size_t size)
>> -{
>> -char   *ptr = kmalloc (size, GFP_KERNEL | GFP_DMA);
>> -
>> -if (ptr)
>> -memset (ptr, 0, size);
>> -return ptr;
>> -}
>
> greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] staging: cxt1e1: replace OS_kmalloc/OS_kfree with kmalloc/kfree

2014-03-20 Thread Greg KH
On Fri, Mar 21, 2014 at 01:15:23PM +0900, Daeseok Youn wrote:
> 
> Replace OS_kmalloc/OS_kfree with kmalloc/kfree.

You should replace it with kzalloc, not kmalloc, as OS_kmalloc() zeroed
out the allocated data:


> -static inline void *
> -OS_kmalloc (size_t size)
> -{
> -char   *ptr = kmalloc (size, GFP_KERNEL | GFP_DMA);
> -
> -if (ptr)
> -memset (ptr, 0, size);
> -return ptr;
> -}

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the tip tree with Linus' tree

2014-03-20 Thread Stephen Rothwell
Hi all,

Today's linux-next merge of the tip tree got a conflict in
arch/x86/kernel/cpu/perf_event_intel_uncore.c between commit 2c666adacc9e
("x86, intel, uncore: Fix CPU hotplug callback registration") from Linus'
tree and commit 411cf180fa00 ("perf/x86/uncore: fix initialization of
cpumask") from the tip tree.

I fixed it up (maybe incorrectly - see below) and can carry the fix as
necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 28922f62eb29,bd2253d40cff..
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@@ -3868,6 -4234,41 +4234,41 @@@ static int __init uncore_pmus_register(
return 0;
  }
  
+ static void __init uncore_cpumask_init(void)
+ {
+   int cpu;
+ 
+   /*
+* ony invoke once from msr or pci init code
+*/
+   if (!cpumask_empty(&uncore_cpu_mask))
+   return;
+ 
 -  get_online_cpus();
++  cpu_notifier_register_begin();
+ 
+   for_each_online_cpu(cpu) {
+   int i, phys_id = topology_physical_package_id(cpu);
+ 
+   for_each_cpu(i, &uncore_cpu_mask) {
+   if (phys_id == topology_physical_package_id(i)) {
+   phys_id = -1;
+   break;
+   }
+   }
+   if (phys_id < 0)
+   continue;
+ 
+   uncore_cpu_prepare(cpu, phys_id);
+   uncore_event_init_cpu(cpu);
+   }
+   on_each_cpu(uncore_cpu_setup, NULL, 1);
+ 
 -  register_cpu_notifier(&uncore_cpu_nb);
++  __register_cpu_notifier(&uncore_cpu_nb);
+ 
 -  put_online_cpus();
++  cpu_notifier_register_done();
+ }
+ 
+ 
  static int __init intel_uncore_init(void)
  {
int ret;


pgp4hMp5ePTQi.pgp
Description: PGP signature


[PATCH v2] staging: cxt1e1: replace OS_kmalloc/OS_kfree with kmalloc/kfree

2014-03-20 Thread Daeseok Youn

Replace OS_kmalloc/OS_kfree with kmalloc/kfree.
And also some allocation doesn't need to use GFP_DMA
so just use GFP_KERNEL.

c4_new() function is never called, remove it.

Signed-off-by: Daeseok Youn 
---
v2: fix subject and comment correctly.

I'm not sure what GFP_DMA use correctly for km{,z}alloc().
Please review this. 
And this patch has coding style issues. coding style issues will be
fixed with another one. 

 drivers/staging/cxt1e1/hwprobe.c |2 +-
 drivers/staging/cxt1e1/linux.c   |   21 +--
 drivers/staging/cxt1e1/musycc.c  |   12 ---
 drivers/staging/cxt1e1/pmcc4_drv.c   |   47 +-
 drivers/staging/cxt1e1/sbecom_inline_linux.h |   23 +
 drivers/staging/cxt1e1/sbecrc.c  |5 ++-
 drivers/staging/cxt1e1/sbeproc.c |2 +-
 7 files changed, 32 insertions(+), 80 deletions(-)

diff --git a/drivers/staging/cxt1e1/hwprobe.c b/drivers/staging/cxt1e1/hwprobe.c
index 9b4198b..6e207f5 100644
--- a/drivers/staging/cxt1e1/hwprobe.c
+++ b/drivers/staging/cxt1e1/hwprobe.c
@@ -205,7 +205,7 @@ cleanup_devs(void)
 #ifdef CONFIG_SBE_PMCC4_NCOMM
free_irq(hi->pdev[1]->irq, hi->ndev);
 #endif
-   OS_kfree(hi->ndev);
+   kfree(hi->ndev);
}
 }
 
diff --git a/drivers/staging/cxt1e1/linux.c b/drivers/staging/cxt1e1/linux.c
index b02f5ade..4b4609d 100644
--- a/drivers/staging/cxt1e1/linux.c
+++ b/drivers/staging/cxt1e1/linux.c
@@ -60,7 +60,6 @@ status_tc4_chan_work_init(mpi_t *, mch_t *);
 voidmusycc_wq_chan_restart(void *);
 status_t __init c4_init(ci_t *, u_char *, u_char *);
 status_t __init c4_init2(ci_t *);
-ci_t   *__init c4_new(void *);
 int __init  c4hw_attach_all(void);
 void __init hdw_sn_get(hdw_info_t *, int);
 
@@ -418,7 +417,7 @@ create_chan(struct net_device *ndev, ci_t *ci,
struct c4_priv *priv;
 
/* allocate then fill in private data structure */
-   priv = OS_kmalloc(sizeof(struct c4_priv));
+   priv = kzalloc(sizeof(struct c4_priv), GFP_KERNEL);
if (!priv) {
pr_warning("%s: no memory for net_device !\n",
   ci->devname);
@@ -428,7 +427,7 @@ create_chan(struct net_device *ndev, ci_t *ci,
if (!dev) {
pr_warning("%s: no memory for hdlc_device !\n",
   ci->devname);
-   OS_kfree(priv);
+   kfree(priv);
return NULL;
}
priv->ci = ci;
@@ -972,8 +971,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
 
if (register_netdev(ndev) ||
(c4_init(ci, (u_char *) f0, (u_char *) f1) != 
SBE_DRVR_SUCCESS)) {
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
error_flag = -ENODEV;
return NULL;
}
@@ -998,8 +997,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
pr_warning("%s: MUSYCC could not get irq: %d\n",
   ndev->name, irq0);
unregister_netdev(ndev);
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
error_flag = -EIO;
return NULL;
}
@@ -1008,8 +1007,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
pr_warning("%s: EBUS could not get irq: %d\n", hi->devname, 
irq1);
unregister_netdev(ndev);
free_irq(irq0, ndev);
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
error_flag = -EIO;
return NULL;
}
@@ -1068,8 +1067,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
unregister_netdev(ndev);
free_irq(irq1, ndev);
free_irq(irq0, ndev);
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
/* failure, error_flag is set */
return NULL;
}
diff --git a/drivers/staging/cxt1e1/musycc.c b/drivers/staging/cxt1e1/musycc.c
index 7b4f6f2..c174f6d 100644
--- a/drivers/staging/cxt1e1/musycc.c
+++ b/drivers/staging/cxt1e1/musycc.c
@@ -744,7 +744,8 @@ musycc_init(ci_t *ci)
 
 #define INT_QUEUE_BOUNDARY  4
 
-regaddr = OS_kmalloc((INT_QUEUE_SIZE + 1) * sizeof(u_int32_t));
+   regaddr = kmalloc((INT_QUEUE_SIZE + 1) * sizeof(u_int32_t),
+ GFP_KERNEL | GFP_DMA);
 if (!regaddr)
return -ENOMEM;
 ci->iqd_

Re: [PATCH] Fix northbridge quirk to assign correct NUMA node

2014-03-20 Thread Daniel J Blueman

On 21/03/2014 11:51, Suravee Suthikulpanit wrote:

Bjorn,

On a typical AMD system, there are two types of host bridges:
* PCI Root Complex Host bridge (e.g. RD890, SR56xx, etc.)
* CPU Host bridge

Here is an example from a 2 sockets system:

$ lspci

[]


The host bridge 00:00.0 is basically the PCI root complex which connects
to the actual PCI bus with
PCI devices hanging off of it.  However, the host bridge 00:[18,19].x
are the CPU host bridges,
each of which represents a CPU node within the system. In system with
single root complex,
the root complex is normally connected to node 0 (i.e. 00:18.0) via
non-coherent HT (I/O) link.



Even though the CPU host bridge 00:[18,19].x is on the same bus as the
PCI root complex, it should
not be using the NUMA information from the PCI root complex host bridge.


This is unavoidable unless we special-case it via another mechanism (ie 
not quirks), since the northbridges/CPU host bridges are logically under 
the _PXM method.



Therefore, I don't think we should be using the pcibus_to_node(dev->bus)
here.
Only the "val" from pci_read_config_dword(nb_ht, 0x60, &val), should be
used here.


Using only effectively the NUMA node ID (HT node ID here) would 
associate all the northbridges with the first fabric, which is false 
information. If there was no quirk, they'd all be associated with the 
first NUMA node in each fabric, as you'd expect.


This was the only safe and defensible one-liner approach I could 
prepare; if you find it introduces a regression or you can find a better 
approach, do tell. If not, we can decouple this fix from an overall new 
approach, since it's unlikely that'll get backported to stable kernels.


Thanks,
  Daniel


On 3/20/2014 5:07 PM, Bjorn Helgaas wrote:

[+cc linux-pci, Myron, Suravee, Kim, Aravind]

On Thu, Mar 13, 2014 at 5:43 AM, Daniel J Blueman
 wrote:

For systems with multiple servers and routed fabric, all northbridges
get
assigned to the first server. Fix this by also using the node
reported from
the PCI bus. For single-fabric systems, the northbriges are on PCI bus 0
by definition, which are on NUMA node 0 by definition, so this is
invarient
on most systems.

Tested on fam10h and fam15h single and multi-fabric systems and
candidate
for stable.


I wish this had been cc'd to linux-pci.  We're talking about a related
change by Suravee there.  In fact, we were hoping this quirk could be
removed altogether.

I don't understand what this quirk is doing.  Normally we discover the
NUMA node for a PCI host bridge via the ACPI _PXM method.  The way
_PXM works is that every PCI device in the hierarchy below the bridge
inherits the same node number as the host bridge.  I first thought
this might be a workaround for a system that lacks _PXM, but I don't
think that can be right, because you're only changing the node for a
few devices, not the whole hierarchy.

So I suspect the problem is more complicated, and maybe _PXM is
insufficient to describe the topology?  Are there subtrees that should
have nodes different from the host bridge?

I know this patch is already in v3.14-rc7, but I'd still like to
understand it so we can do the right thing with Suravee's patch.

Bjorn


Signed-off-by: Daniel J Blueman 
Acked-by: Steffen Persvold 
---
  arch/x86/kernel/quirks.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 04ee1e2..52dbf1e 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev)
 return;

 pci_read_config_dword(nb_ht, 0x60, &val);
-   node = val & 7;
+   node = pcibus_to_node(dev->bus) | (val & 7);
 /*
  * Some hardware may return an invalid node ID,
  * so check it first:
--
1.8.3.2

--
To unsubscribe from this list: send the line "unsubscribe
linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/








--
Daniel J Blueman
Principal Software Engineer, Numascale
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] Fix northbridge quirk to assign correct NUMA node

2014-03-20 Thread Suravee Suthikulpanit

Bjorn,

On a typical AMD system, there are two types of host bridges:
* PCI Root Complex Host bridge (e.g. RD890, SR56xx, etc.)
* CPU Host bridge

Here is an example from a 2 sockets system:

$ lspci
00:00.0 Host bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
bridge (external gfx0 port A) (rev 02)
00:00.2 IOMMU: Advanced Micro Devices [AMD] nee ATI RD990 I/O Memory Management 
Unit (IOMMU)
00:04.0 PCI bridge: Advanced Micro Devices [AMD] nee ATI RD890 PCI to PCI 
bridge (PCI express gpp port D)
00:11.0 SATA controller: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
SATA Controller [AHCI mode]
00:12.0 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
USB OHCI0 Controller
00:12.1 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0 USB OHCI1 
Controller
00:12.2 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
USB EHCI Controller
00:13.0 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
USB OHCI0 Controller
00:13.1 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0 USB OHCI1 
Controller
00:13.2 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
USB EHCI Controller
00:14.0 SMBus: Advanced Micro Devices [AMD] nee ATI SBx00 SMBus Controller (rev 
3d)
00:14.1 IDE interface: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
IDE Controller
00:14.3 ISA bridge: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 LPC 
host controller
00:14.4 PCI bridge: Advanced Micro Devices [AMD] nee ATI SBx00 PCI to PCI Bridge
00:14.5 USB controller: Advanced Micro Devices [AMD] nee ATI SB7x0/SB8x0/SB9x0 
USB OHCI2 Controller
00:18.0 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 0
00:18.1 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
1
00:18.2 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
2
00:18.3 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
3
00:18.4 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
4
00:18.5 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
5
00:19.0 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 0
00:19.1 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
1
00:19.2 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
2
00:19.3 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
3
00:19.4 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
4
00:19.5 Host bridge: Advanced Micro Devices [AMD] Family 15h Processor Function 
5
01:00.0 Ethernet controller: Broadcom Corporation NetXtreme II BCM5709 Gigabit 
Ethernet (rev 20)
01:00.1 Ethernet controller: Broadcom Corporation NetXtreme II BCM5709 Gigabit 
Ethernet (rev 20)
02:06.0 VGA compatible controller: Advanced Micro Devices [AMD] nee ATI ES1000 
(rev 02)

The host bridge 00:00.0 is basically the PCI root complex which connects to the 
actual PCI bus with
PCI devices hanging off of it.  However, the host bridge 00:[18,19].x are the 
CPU host bridges,
each of which represents a CPU node within the system. In system with single 
root complex,
the root complex is normally connected to node 0 (i.e. 00:18.0) via 
non-coherent HT (I/O) link.

Even though the CPU host bridge 00:[18,19].x is on the same bus as the PCI root 
complex, it should
not be using the NUMA information from the PCI root complex host bridge.
Therefore, I don't think we should be using the pcibus_to_node(dev->bus) here.
Only the "val" from pci_read_config_dword(nb_ht, 0x60, &val), should be used 
here.

Please see section 2.2 of the BIOS and Kernel development guide here for more 
info.
(http://support.amd.com/TechDocs/42301_15h_Mod_00h-0Fh_BKDG.pdf)

Suravee

On 3/20/2014 5:07 PM, Bjorn Helgaas wrote:

[+cc linux-pci, Myron, Suravee, Kim, Aravind]

On Thu, Mar 13, 2014 at 5:43 AM, Daniel J Blueman  wrote:

For systems with multiple servers and routed fabric, all northbridges get
assigned to the first server. Fix this by also using the node reported from
the PCI bus. For single-fabric systems, the northbriges are on PCI bus 0
by definition, which are on NUMA node 0 by definition, so this is invarient
on most systems.

Tested on fam10h and fam15h single and multi-fabric systems and candidate
for stable.


I wish this had been cc'd to linux-pci.  We're talking about a related
change by Suravee there.  In fact, we were hoping this quirk could be
removed altogether.

I don't understand what this quirk is doing.  Normally we discover the
NUMA node for a PCI host bridge via the ACPI _PXM method.  The way
_PXM works is that every PCI device in the hierarchy below the bridge
inherits the same node number as the host bridge.  I first thought
this might be a workaround for a system that lacks _PXM, but I don't
think that can be right, because you're only changing the node for a
few devices, not the whole hierarchy.

So

Re: [PATCH RFC/RFT v3 6/9] powerpc: move cacheinfo sysfs to generic cacheinfo infrastructure

2014-03-20 Thread Anshuman Khandual
On 03/10/2014 04:42 PM, Sudeep Holla wrote:
> Hi Anshuman,
> 
> On 07/03/14 06:14, Anshuman Khandual wrote:
>> On 03/07/2014 09:36 AM, Anshuman Khandual wrote:
>>> On 02/19/2014 09:36 PM, Sudeep Holla wrote:
 From: Sudeep Holla 

 This patch removes the redundant sysfs cacheinfo code by making use of
 the newly introduced generic cacheinfo infrastructure.

 Signed-off-by: Sudeep Holla 
 Cc: Benjamin Herrenschmidt 
 Cc: Paul Mackerras 
 Cc: linuxppc-...@lists.ozlabs.org
 ---
   arch/powerpc/kernel/cacheinfo.c | 831
 ++--
   arch/powerpc/kernel/cacheinfo.h |   8 -
   arch/powerpc/kernel/sysfs.c |   4 -
   3 files changed, 109 insertions(+), 734 deletions(-)
   delete mode 100644 arch/powerpc/kernel/cacheinfo.h

 diff --git a/arch/powerpc/kernel/cacheinfo.c
 b/arch/powerpc/kernel/cacheinfo.c
 index 2912b87..05b7580 100644
 --- a/arch/powerpc/kernel/cacheinfo.c
 +++ b/arch/powerpc/kernel/cacheinfo.c
 @@ -10,38 +10,10 @@
* 2 as published by the Free Software Foundation.
*/

 +#include 
   #include 
 -#include 
   #include 
 -#include 
 -#include 
 -#include 
   #include 
 -#include 
 -#include 
 -#include 
 -
 -#include "cacheinfo.h"
 -
 -/* per-cpu object for tracking:
 - * - a "cache" kobject for the top-level directory
 - * - a list of "index" objects representing the cpu's local cache
 hierarchy
 - */
 -struct cache_dir {
 -struct kobject *kobj; /* bare (not embedded) kobject for cache
 -   * directory */
 -struct cache_index_dir *index; /* list of index objects */
 -};
 -
 -/* "index" object: each cpu's cache directory has an index
 - * subdirectory corresponding to a cache object associated with the
 - * cpu.  This object's lifetime is managed via the embedded kobject.
 - */
 -struct cache_index_dir {
 -struct kobject kobj;
 -struct cache_index_dir *next; /* next index in parent directory */
 -struct cache *cache;
 -};

   /* Template for determining which OF properties to query for a given
* cache type */
 @@ -60,11 +32,6 @@ struct cache_type_info {
   const char *nr_sets_prop;
   };

 -/* These are used to index the cache_type_info array. */
 -#define CACHE_TYPE_UNIFIED 0
 -#define CACHE_TYPE_INSTRUCTION 1
 -#define CACHE_TYPE_DATA2
 -
   static const struct cache_type_info cache_type_info[] = {
   {
   /* PowerPC Processor binding says the [di]-cache-*
 @@ -77,246 +44,115 @@ static const struct cache_type_info
 cache_type_info[] = {
   .nr_sets_prop= "d-cache-sets",
   },
   {
 -.name= "Instruction",
 -.size_prop   = "i-cache-size",
 -.line_size_props = { "i-cache-line-size",
 - "i-cache-block-size", },
 -.nr_sets_prop= "i-cache-sets",
 -},
 -{
   .name= "Data",
   .size_prop   = "d-cache-size",
   .line_size_props = { "d-cache-line-size",
"d-cache-block-size", },
   .nr_sets_prop= "d-cache-sets",
   },
 +{
 +.name= "Instruction",
 +.size_prop   = "i-cache-size",
 +.line_size_props = { "i-cache-line-size",
 + "i-cache-block-size", },
 +.nr_sets_prop= "i-cache-sets",
 +},
   };
>>>
>>>
>>> Hey Sudeep,
>>>
>>> After applying this patch, the cache_type_info array looks like this.
>>>
>>> static const struct cache_type_info cache_type_info[] = {
>>>  {
>>>  /*
>>>   * PowerPC Processor binding says the [di]-cache-*
>>>   * must be equal on unified caches, so just use
>>>   * d-cache properties.
>>>   */
>>>  .name= "Unified",
>>>  .size_prop   = "d-cache-size",
>>>  .line_size_props = { "d-cache-line-size",
>>>   "d-cache-block-size", },
>>>  .nr_sets_prop= "d-cache-sets",
>>>  },
>>>  {
>>>  .name= "Data",
>>>  .size_prop   = "d-cache-size",
>>>  .line_size_props = { "d-cache-line-size",
>>>   "d-cache-block-size", },
>>>  .nr_sets_prop= "d-cache-sets",
>>>  },
>>>  {
>>>  .name= "Instruction",
>>>  .size_prop   = "i-cache-size",
>>>  .line_size_props = { "i-cache-line-size",
>>>   "i-cach

Re: [PATCH] Fix northbridge quirk to assign correct NUMA node

2014-03-20 Thread Daniel J Blueman

On 21/03/2014 06:07, Bjorn Helgaas wrote:

[+cc linux-pci, Myron, Suravee, Kim, Aravind]

On Thu, Mar 13, 2014 at 5:43 AM, Daniel J Blueman  wrote:

For systems with multiple servers and routed fabric, all northbridges get
assigned to the first server. Fix this by also using the node reported from
the PCI bus. For single-fabric systems, the northbriges are on PCI bus 0
by definition, which are on NUMA node 0 by definition, so this is invarient
on most systems.

Tested on fam10h and fam15h single and multi-fabric systems and candidate
for stable.



I wish this had been cc'd to linux-pci.  We're talking about a related
change by Suravee there.  In fact, we were hoping this quirk could be
removed altogether.


Noted.


I don't understand what this quirk is doing.  Normally we discover the
NUMA node for a PCI host bridge via the ACPI _PXM method.  The way
_PXM works is that every PCI device in the hierarchy below the bridge
inherits the same node number as the host bridge.  I first thought
this might be a workaround for a system that lacks _PXM, but I don't
think that can be right, because you're only changing the node for a
few devices, not the whole hierarchy.

>

So I suspect the problem is more complicated, and maybe _PXM is
insufficient to describe the topology?  Are there subtrees that should
have nodes different from the host bridge?


Yes; see below.


I know this patch is already in v3.14-rc7, but I'd still like to
understand it so we can do the right thing with Suravee's patch.


The _PXM method associates each northbridge with the first NUMA node, 0 
in single-fabric systems, and eg 4 for the second server in a 
multi-fabric system with 2 dual-module Opterons (with 2 NUMA nodes 
internally) etc, since the northbridges appear in the PCI tree, under 
the host bridge, not above it [1].


With _PXM, the rest of the PCI bus hierarchy has the right NUMA node 
associated, but the northbridge PCI devices should be associated with 
their actual NUMA node, 0, 1, 2, 3 for the first server in this example. 
The quirk fixes this up; irqbalance at least uses this NUMA data exposed 
in /sys.


The alternative to the quirk may be to explicitly express the 
northbridge PCI devices in the AML with their own _PXM methods. If it's 
valid, it may be the honest approach, though the quirk may be needed for 
most BIOSs; I can check the AML on a few servers to confirm if helpful.


Thanks,
  Daniel

[1] http://quora.org/2014/lspci.txt
--
Daniel J Blueman
Principal Software Engineer, Numascale
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[tip:x86/vdso] x86, vdso: Move more vdso definitions into vdso.h

2014-03-20 Thread tip-bot for Andy Lutomirski
Commit-ID:  9e6f450f946d35d585798da268d45c679632fe05
Gitweb: http://git.kernel.org/tip/9e6f450f946d35d585798da268d45c679632fe05
Author: Andy Lutomirski 
AuthorDate: Thu, 20 Mar 2014 18:57:18 -0700
Committer:  H. Peter Anvin 
CommitDate: Thu, 20 Mar 2014 20:20:08 -0700

x86, vdso: Move more vdso definitions into vdso.h

This fixes the Xen build and gets rid of a silly header file.

Signed-off-by: Andy Lutomirski 
Cc: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/1df77311795aff75f5742c787d277518314a38d3.1395366931.git.l...@amacapital.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/vdso.h  | 38 ++
 arch/x86/vdso/vdso.S |  2 +-
 arch/x86/vdso/vdso32-setup.c |  7 ---
 arch/x86/vdso/vdso32.S   |  2 +-
 arch/x86/vdso/vdso_image.h   | 30 --
 arch/x86/vdso/vdsox32.S  |  2 +-
 arch/x86/vdso/vma.c  |  1 -
 7 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 0301d78..7622a65 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,10 +1,46 @@
 #ifndef _ASM_X86_VDSO_H
 #define _ASM_X86_VDSO_H
 
+#include 
+#include 
+
+#ifdef __ASSEMBLER__
+
+#define DEFINE_VDSO_IMAGE(symname, filename)   \
+__PAGE_ALIGNED_DATA ;  \
+   .globl symname##_start, symname##_end ; \
+   .align PAGE_SIZE ;  \
+   symname##_start: ;  \
+   .incbin filename ;  \
+   symname##_end: ;\
+   .align PAGE_SIZE /* extra data here leaks to userspace. */ ;\
+   \
+.previous ;\
+   \
+   .globl symname##_pages ;\
+   .bss ;  \
+   .align 8 ;  \
+   .type symname##_pages, @object ;\
+   symname##_pages: ;  \
+   .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * 
(BITS_PER_LONG / 8) ; \
+   .size symname##_pages, .-symname##_pages
+
+#else
+
+#define DECLARE_VDSO_IMAGE(symname)\
+   extern char symname##_start[], symname##_end[]; \
+   extern struct page *symname##_pages[]
+
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
 
 #include 
 
+DECLARE_VDSO_IMAGE(vdso32_int80);
+#ifdef CONFIG_COMPAT
+DECLARE_VDSO_IMAGE(vdso32_syscall);
+#endif
+DECLARE_VDSO_IMAGE(vdso32_sysenter);
+
 extern const char VDSO32_PRELINK[];
 
 /*
@@ -27,4 +63,6 @@ extern void __user __kernel_rt_sigreturn;
 
 void __init patch_vdso32(void *vdso, size_t len);
 
+#endif /* __ASSEMBLER__ */
+
 #endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index c749d15..be3f23b 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,3 +1,3 @@
-#include "vdso_image.h"
+#include 
 
 DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index b45528e..791c1cb 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include "vdso_image.h"
 
 #ifdef CONFIG_COMPAT_VDSO
 #define VDSO_DEFAULT   0
@@ -42,12 +41,6 @@
 #define arch_setup_additional_pagessyscall32_setup_pages
 #endif
 
-DECLARE_VDSO_IMAGE(vdso32_int80);
-#ifdef CONFIG_COMPAT
-DECLARE_VDSO_IMAGE(vdso32_syscall);
-#endif
-DECLARE_VDSO_IMAGE(vdso32_sysenter);
-
 /*
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
index cfa6add..018bcd9 100644
--- a/arch/x86/vdso/vdso32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -1,4 +1,4 @@
-#include "vdso_image.h"
+#include 
 
 DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
 
diff --git a/arch/x86/vdso/vdso_image.h b/arch/x86/vdso/vdso_image.h
deleted file mode 100644
index 1baa6bc..000
--- a/arch/x86/vdso/vdso_image.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _VDSO_IMAGE_H
-#define _VDSO_IMAGE_H
-
-#include 
-#include 
-
-#define DEFINE_VDSO_IMAGE(symname, filename)   \
-__PAGE_ALIGNED_DATA ;  \
-   .globl symname##_start, symname##_end ; \
-   .align PAGE_SIZE ;  \
-   symname##_start: ;  

[tip:x86/vdso] x86, vdso: Finish removing VDSO32_PRELINK

2014-03-20 Thread tip-bot for Andy Lutomirski
Commit-ID:  3c1b63b9e4862fb16352a0646439c2dd6d9e0e5c
Gitweb: http://git.kernel.org/tip/3c1b63b9e4862fb16352a0646439c2dd6d9e0e5c
Author: Andy Lutomirski 
AuthorDate: Thu, 20 Mar 2014 18:57:19 -0700
Committer:  H. Peter Anvin 
CommitDate: Thu, 20 Mar 2014 20:20:18 -0700

x86, vdso: Finish removing VDSO32_PRELINK

It's a declaration of a nonexistent symbol.  We can get rid of the
64-bit versions, too, but that's more intrusive.

Signed-off-by: Andy Lutomirski 
Cc: Stefani Seibold 
Link: 
http://lkml.kernel.org/r/2ce2ce18447d8a0b78d44a278a066b6c0af06b32.1395366931.git.l...@amacapital.net
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/vdso.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 7622a65..d1dc554 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -41,8 +41,6 @@ DECLARE_VDSO_IMAGE(vdso32_syscall);
 #endif
 DECLARE_VDSO_IMAGE(vdso32_sysenter);
 
-extern const char VDSO32_PRELINK[];
-
 /*
  * Given a pointer to the vDSO image, find the pointer to VDSO32_name
  * as that symbol is defined in the vDSO sources or linker script.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] vmcore: continue vmcore initialization if PT_NOTE is found empty

2014-03-20 Thread WANG Chao
Currently when an empty PT_NOTE is detected, vmcore initialization
fails. It sounds too harsh. Because PT_NOTE could be empty, for example,
one offlined a cpu but never restarted kdump service, and after crash,
PT_NOTE program header is there but no data contains. It's better to
warn about the empty PT_NOTE and continue to initialise vmcore.

And ultimately the multiple PT_NOTE are merged into a single one, all
empty PT_NOTE are discarded naturally during the merge. So empty PT_NOTE
is not visible to user space and vmcore is as good as expected.

Signed-off-by: WANG Chao 
---
 fs/proc/vmcore.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 88d4585..dc71bf9 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -484,7 +484,6 @@ static int __init update_note_header_size_elf64(const 
Elf64_Ehdr *ehdr_ptr)
phdr_ptr->p_memsz = real_sz;
if (real_sz == 0) {
pr_warn("Warning: Zero PT_NOTE entries found\n");
-   return -EINVAL;
}
}
 
@@ -671,7 +670,6 @@ static int __init update_note_header_size_elf32(const 
Elf32_Ehdr *ehdr_ptr)
phdr_ptr->p_memsz = real_sz;
if (real_sz == 0) {
pr_warn("Warning: Zero PT_NOTE entries found\n");
-   return -EINVAL;
}
}
 
-- 
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] ARM: berlin: SMP support

2014-03-20 Thread Jisheng Zhang
On Thu, 20 Mar 2014 13:39:44 -0700
Sebastian Hesselbarth  wrote:

> This is a small patch set for SMP support on Marvell Berlin BG2
> and recently provided BG2Q. Nothing spectacular, as it basically
> copies SMP holding pen mechanism from mach-prima2 and plat-versatile
> with minor Berlin specific code for SCU and general purpose registers
> used by secondary CPUs to get their boot address.
> 
> There was some IRC discussion with Alexandre about using
> scu_get_base() instead of a DT node. Although BG2Q is true A9
> and provides SCU base; BG2 with PJ4b does not and I decided to
> depend on a DT node for SCU in both cases, which is fine I guess.
> 
> I tested this on BG2, and BG2CD (which is UP). I expect the
> Free-Electrons guys to test on BG2Q.

Although BG2Q and future SoCs will go through PSCI code path finally. But
currently, it's OK to use the pen mechanism.

So for both of these two patches, 

Acked-by: Jisheng Zhang 

Thanks,
Jisheng
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL] tracing: Fix array size mismatch in format string

2014-03-20 Thread Steven Rostedt

Linus,

Vaibhav Nagarnaik discovered that since 3.10 a clean up patch made the
array index in the trace event format bogus. He supplied an elegant solution
that uses __stringify() and also removes the need for the event_storage
and event_storage_mutex that cuts off a few K of overhead from
the trace events.

I know this is very late in the -rcs. I'm fine if you don't pull this
and I'll just add it to my 3.15 push. The Cc to stable still stands as
this does fix bogus information passed to userspace.

This actually conflicts with my current 3.15 queue, as I had a not so
elegant code reduction of this same code. I'll have to revert my change
to use this one instead.

I'll let you decide to pull it or let it wait.

-- Steve


Please pull the latest trace-fixes-v3.14-rc7 tree, which can be found at:


  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
trace-fixes-v3.14-rc7

Tag SHA1: e7c4d872fd1fc8906debb95c6eab6f3b4d035a86
Head SHA1: 87291347c49dc40aa339f587b209618201c2e527


Vaibhav Nagarnaik (1):
  tracing: Fix array size mismatch in format string


 include/linux/ftrace_event.h | 4 
 include/trace/ftrace.h   | 7 ++-
 kernel/trace/trace_events.c  | 6 --
 kernel/trace/trace_export.c  | 7 ++-
 4 files changed, 4 insertions(+), 20 deletions(-)
---
commit 87291347c49dc40aa339f587b209618201c2e527
Author: Vaibhav Nagarnaik 
Date:   Thu Feb 13 19:51:48 2014 -0800

tracing: Fix array size mismatch in format string

In event format strings, the array size is reported in two locations.
One in array subscript and then via the "size:" attribute. The values
reported there have a mismatch.

For e.g., in sched:sched_switch the prev_comm and next_comm character
arrays have subscript values as [32] where as the actual field size is
16.

name: sched_switch
ID: 301
format:
field:unsigned short common_type;   offset:0;   size:2; 
signed:0;
field:unsigned char common_flags;   offset:2;   size:1; 
signed:0;
field:unsigned char common_preempt_count;   offset:3;   
size:1;signed:0;
field:int common_pid;   offset:4;   size:4; signed:1;

field:char prev_comm[32];   offset:8;   size:16;
signed:1;
field:pid_t prev_pid;   offset:24;  size:4; signed:1;
field:int prev_prio;offset:28;  size:4; signed:1;
field:long prev_state;  offset:32;  size:8; signed:1;
field:char next_comm[32];   offset:40;  size:16;
signed:1;
field:pid_t next_pid;   offset:56;  size:4; signed:1;
field:int next_prio;offset:60;  size:4; signed:1;

After bisection, the following commit was blamed:
92edca0 tracing: Use direct field, type and system names

This commit removes the duplication of strings for field->name and
field->type assuming that all the strings passed in
__trace_define_field() are immutable. This is not true for arrays, where
the type string is created in event_storage variable and field->type for
all array fields points to event_storage.

Use __stringify() to create a string constant for the type string.

Also, get rid of event_storage and event_storage_mutex that are not
needed anymore.

also, an added benefit is that this reduces the overhead of events a bit 
more:

   textdata bss dec hex filename
8424787 2036472 1302528 11763787 b3804b vmlinux
8420814 2036408 1302528 11759750 b37086 vmlinux.patched

Link: 
http://lkml.kernel.org/r/1392349908-29685-1-git-send-email-vnagarn...@google.com

Cc: Laurent Chavey 
Cc: sta...@vger.kernel.org # 3.10+
Signed-off-by: Vaibhav Nagarnaik 
Signed-off-by: Steven Rostedt 

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4e4cc28..4cdb3a1 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -495,10 +495,6 @@ enum {
FILTER_TRACE_FN,
 };
 
-#define EVENT_STORAGE_SIZE 128
-extern struct mutex event_storage_mutex;
-extern char event_storage[EVENT_STORAGE_SIZE];
-
 extern int trace_event_raw_init(struct ftrace_event_call *call);
 extern int trace_define_field(struct ftrace_event_call *call, const char *type,
  const char *name, int offset, int size,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1a8b28d..1ee19a2 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -310,15 +310,12 @@ static struct trace_event_functions 
ftrace_event_type_funcs_##call = {\
 #undef __array
 #define __array(type, item, len)   \
do {\
-   mutex_lock(&event_storage_mutex);   \
+   char *typ

Re: [PATCH 7/8] ASoC: atmel: document clock properties of the wm8904 driver

2014-03-20 Thread Bo Shen

Hi Mark Brown,

On 03/20/2014 09:47 PM, Mark Brown wrote:

On Thu, Mar 20, 2014 at 10:37:53AM +0800, Bo Shen wrote:


For this, in my mind, I think we need add following parameters in DT.
1. sysclk_src_from_fll --> we need do nothing.


No, how would this work?  If nothing else the FLL needs configuration.


Only configure it in machine driver. Then no DT operation.


2. sysclk_src_from_mclk
2.1 use_external_xtal --> we need clock_frequency
2.2 !use_external_xtal --> we need retrieve clock and clock_frequency.


No, this is all handled in the clock binding.  If there's a fixed rate
clock the device tree should have a fixed rate clock provided.


Does this acceptable? Or any other better suggestion for this?


Just have the device tree describe the hardware and provide a way of
specifying an optional MCLK.



I will implement an RFC patch, please help review it. Thanks.

Best Regards,
Bo Shen
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 0/2] wait: introduce WQ_FLAG_EXCLUSIVE_HEAD

2014-03-20 Thread Dilger, Andreas
On 2014/03/20, 11:51 AM, "Oleg Nesterov"  wrote:

>On 03/19, Oleg Nesterov wrote:
>>
>> OK, I'll try to test/cleanup/resend tomorrow.
>
>Cough. Still un-tested, sorry. I will test it somehow and report,
>but I'd like to send this for review right now.
>
>Because I simply can't decide what the new flag should actually
>do, so please ack/nack the semantics/naming at least.
>
>Changes:
>
>   1. I decided it would be better to change __wait_event()
>  to accept wait.flags right now. This looks better in
>  any case to me, and otherwise we need to introduce the
>  __wait_exclusive_enum.
>
>  The change looks trivial (both actually), please tell
>  me if you think it doesn't deserve a separate patch.
>
>   2. I won't insist, but WQ_FLAG_EXCLUSIVE_HEAD can be used
>  without WQ_FLAG_EXCLUSIVE.
>
>  Unlikely this can be useful, but it looks more natural
>  this way. Otherwise we need to add another check to
>  ensure that WQ_FLAG_EXCLUSIVE_HEAD can't come alone.
>
>  However, perhaps this means the new flag needs another
>  name. I agree in advance with any.

What about:

#define WQ_FLAG_HEAD0x02

#define WQ_FLAG_EXCLUSIVE_HEAD (WQ_FLAG_HEAD | WQ_FLAG_EXCLUSIVE)

That avoids having WQ_FLAG_EXCLUSIVE_HEAD not actually meaning "exclusive"?

Patches look reasonable at first glance.  The second patch would need
to be changed to handle that WQ_FLAG_EXCLUSIVE_HEAD has both bits set
(probably just replace uses of WQ_FLAG_EXCLUSIVE_HEAD with WQ_FLAG_HEAD).

Cheers, Andreas
-- 
Andreas Dilger

Lustre Software Architect
Intel High Performance Data Division


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] staging: cxt1e1: replace kmalloc/kfree with OS_kmalloc/OS_kfree

2014-03-20 Thread DaeSeok Youn
Yes. It was already noticed by Joe Perches.
I will send it again.

Thanks.
Daeseok Youn.

2014-03-21 11:35 GMT+09:00, Greg KH :
> On Fri, Mar 21, 2014 at 10:41:39AM +0900, Daeseok Youn wrote:
>>
>> Replace kmalloc/kfree with OS_kmalloc/OS_kfree.
>
> I think you mean this the other way around, right?
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PULL REQUEST for Rafael] PM / devfreq: pull request

2014-03-20 Thread MyungJoo Ham
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1


Dear Rafael,


Here goes bugfix devfreq patch.

Recent patchset of device-tree support / exynos driver updates is omitted in 
this pull request
as there could be further updates on the patchset.


Cheers,
MyungJoo


The following changes since commit dcb99fd9b08cfe1afe426af4d8d3cbc429190f15:

  Linux 3.14-rc7 (2014-03-16 18:51:24 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mzx/devfreq.git for-rafael

for you to fetch changes up to e35d35a1c0b3a7317d77e03e686a4a205cdd4eed:

  PM / devfreq: Rewrite devfreq_update_status() to fix multiple bugs 
(2014-03-21 11:16:30 +0900)

- 
Pull request for Rafael. Bugfix only.
(recence changes of DT/Exynos drivers need further work)

- 
Saravana Kannan (1):
  PM / devfreq: Rewrite devfreq_update_status() to fix multiple bugs

 drivers/devfreq/devfreq.c | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)
-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.14 (GNU/Linux)

iQIcBAEBAgAGBQJTK6S5AAoJEBOhurvWBoCKuoQQAJjAVcoCzlrjUSSr8y++mXfU
UIJCrJrJR7jFIBc2ykACYXG9zrHg0/OL3VgJHd8Yj9NWtRCNv6shOZ7vk2XZ7M2r
19Lf3+wDR3zBKvCCKoSoh+crywIgXHhpI8Bgxi+rAZmxkDdv/vlFZAlPWt0pxjQG
M19zb0Y+VDgWinfwrR0kP5sPCGSoUQVU085h4Kf+OjeB2ZMWDeR/49fAlSVPFFi6
XDU68mhwytmUdLpqOv/q5c0sNEBgaj00oKyAkco5eccQ8g3+ZwFWdxzLwCoARW/N
beXz/Tr6fc/7x9l1xSdBcuxmH8Lv6OxXUMrewGx2nIyBxgDzHCQ4tKbi1URWuzyy
4LnTqNZbsgQi3EzeNdb5CqFqCIhR63uGfBtyLUTWi9QPgj4CBffgAigcXLZVkL6e
/dpiRHEQGjijRuJxVqdx8WqR0ucMOYHa890Du6l50LlI9uoiyr8I24/+qVPTZBmN
CgSloQe748k2+IXckjNa4TivEDqMCMtn1YPQu23OODg2b98n+gDnwYQFsvi52WH+
Ue5Uidv14o0M/rCYDCkBHdv+Xh+VIRdAhrX/OeAv5xxi996JG8gJW5uXBWX4BdWu
6BugMPW0dmIUnfTTUfeekfLYA+VdN44Rv2L0CdgeMr29qvkT991GBEs6w1wkyI6Z
CpjCdg3UclJ78WVIr+KJ
=CYyn
-END PGP SIGNATURE-
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] staging: cxt1e1: replace kmalloc/kfree with OS_kmalloc/OS_kfree

2014-03-20 Thread Greg KH
On Fri, Mar 21, 2014 at 10:41:39AM +0900, Daeseok Youn wrote:
> 
> Replace kmalloc/kfree with OS_kmalloc/OS_kfree.

I think you mean this the other way around, right?

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] regulator: core: Reduce busy-wait looping

2014-03-20 Thread Jonghwan Choi
Commit 5df529d440("regulator: core: Reduce busy-wait looping")
can also be used in regulator_do_set_voltage.

Signed-off-by: Jonghwan Choi 
---
 drivers/regulator/core.c |   39 +--
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index afca1bc..c375bfb 100644
--- a/drivers/regulator/core.c
+++ b/d728099474rivers/regulator/core.c
@@ -2355,12 +2355,39 @@ static int _regulator_do_set_voltage(struct
regulator_dev *rdev,
delay = 0;
}
 
-   /* Insert any necessary delays */
-   if (delay >= 1000) {
-   mdelay(delay / 1000);
-   udelay(delay % 1000);
-   } else if (delay) {
-   udelay(delay);
+   /*
+* Delay for the requested amount of time as per the
guidelines in:
+*
+* Documentation/timers/timers-howto.txt
+*
+* The assumption here is that regulators will never set the
voltage in
+* atomic context and therefore sleeping functions can be
used.
+*/
+   if (delay) {
+   unsigned int ms = delay / 1000;
+   unsigned int us = delay % 1000;
+
+   if (ms > 0) {
+   /*
+* For small enough values, handle
super-millisecond
+* delays in the usleep_range() call below.
+*/
+   if (ms < 20)
+   us += ms * 1000;
+   else
+   msleep(ms);
+   }
+
+   /*
+* Give the scheduler some room to coalesce with any
other
+* wakeup sources. For delays shorter than 10 us,
don't even
+* bother setting up high-resolution timers and just
busy-
+* loop.
+*/
+   if (us >= 10)
+   usleep_range(us, us + 100);
+   else
+   udelay(us);
}
}
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] bonding: Inactive slaves should keep inactive flag's value to 1.

2014-03-20 Thread zheng.li
于 2014年03月21日 01:02, Jay Vosburgh 写道:
> Zheng Li  wrote:
> 
>> Except bond mode 1, in other bond modes, inactive slaves should keep
>> inactive flag to 1 to refuse to receive broadcast packets. Now, active
>> slave send broadcast packets (for example ARP requests) which will
>> arrive inactive slaves on same host from switch, but inactive slave's
>> inactive flag is zero that cause bridge receive the broadcast packets
>> to produce a wrong entry in forward table. Typical situation is domu
>> send some ARP request which go out from dom0 bond's active slave, then
>> the ARP broadcast request packets go back to inactive slave from
>> switch, because the inactive slave's inactive flag is zero, kernel will
>> receive the packets and pass them to bridge, that cause dom0's bridge
>> map domu's MAC address to port of bond, bridge should map domu's MAC to
>> port of vif.
> 
>   I suspect this will break LACP (802.3ad) and Etherchannel
> (balance-xor, balance-rr) modes, as those modes can receive broadcast or
> multicast on any slave.  In those cases, the switch knows about the
> aggregation, and will only send the broadcast / multicast to one of the
> ports, but the port selected is not always the same one.
> 
>   In which mode are you having trouble?
> 
>   -J

Except bond mode 1, in other modes (major test in mode 6, and test all
other mode,  except mode 1, all other modes has the bug), the bridge
make a wrong entry which map guest MAC to the port of bond, it should
map guest MAC to the port of vif.

Env description: dom0's bridge contains bond1 and vif ports, bond1 as
port 1 , vif as port 2, bond1 has two slaves which connect a switch.
when from guest ping others ,the arp broadcast request will go out from
bond1's active slave, and then go back to itself inactive slave from
switch , in function of bond_should_deliver_exact_match will return
false by inactive is zero, return false will cause bridge receive the
arp request packets whose original is from guest through vif that let
bridge consider the SRC MAC of guest is from bond1 by analyzing the arp
broadcast packets, then make a wrong forward entry "MAC of guest, from
port 1 (bond1)" , the correct entry should be "MAC of guest , from port
2 (vif)".


bond_should_deliver_exact_match(struct sk_buff *skb,
struct slave *slave,
struct bonding *bond)
{
if (bond_is_slave_inactive(slave)) {
if (bond->params.mode == BOND_MODE_ALB &&
skb->pkt_type != PACKET_BROADCAST &&
skb->pkt_type != PACKET_MULTICAST)
return false;
return true;
}
return false;
}

Thanks,
Zheng Li


> 
>>
>> Signed-off-by: Zheng Li 
>> ---
>> drivers/net/bonding/bond_main.c |2 +-
>> 1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c 
>> b/drivers/net/bonding/bond_main.c
>> index e5628fc..2f73f18 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -3063,7 +3063,7 @@ static int bond_open(struct net_device *bond_dev)
>>  bond_set_slave_inactive_flags(slave,
>>
>> BOND_SLAVE_NOTIFY_NOW);
>>  } else {
>> -bond_set_slave_active_flags(slave,
>> +bond_set_slave_state(slave, BOND_STATE_ACTIVE,
>>  
>> BOND_SLAVE_NOTIFY_NOW);
>>  }
>>  }
>> -- 
>> 1.7.6.5
>>
> 
> ---
>   -Jay Vosburgh, IBM Linux Technology Center, fu...@us.ibm.com
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH] cifs: Fix possible deadlock with cifs and work queues

2014-03-20 Thread Steven Rostedt
On Thu, 20 Mar 2014 17:02:39 -0400
Jeff Layton  wrote:

> Eventually the server should just allow the read to complete even if
> the client doesn't respond to the oplock break. It has to since clients
> can suddenly drop off the net while holding an oplock. That should
> allow everything to unwedge eventually (though it may take a while).
> 
> If that's not happening then I'd be curious as to why...

The problem is that the data is being filled in the page and the reader
is waiting for the page lock to be released. The kworker for the reader
will issue the complete() and unlock the page to wake up the reader.

But because the other workqueue callback calls down_read(), and there
can be a down_write() waiting for the reader to finish, this
down_read() will block on the lock as well (rwsems are fair locks).
This blocks the other workqueue callback from issuing the complete and
page_unlock() that will wake up the reader that is holding the rwsem
with down_read().

DEADLOCK.


-- Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the omap_dss2 tree with the pm tree

2014-03-20 Thread Stephen Rothwell
Hi Tomi,

Today's linux-next merge of the omap_dss2 tree got a conflict in
drivers/video/Makefile between commit f167a64e9d67 ("video / output: Drop
display output class support") from the pm tree and commit 844901baede6
("video: move fbdev to drivers/video/fbdev") from the omap_dss2 tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

diff --cc drivers/video/Makefile
index 08d6a4ab3ace,94fe9c486c60..
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@@ -16,162 -5,11 +5,8 @@@ obj-$(CONFIG_VT)   += console
  obj-$(CONFIG_LOGO)  += logo/
  obj-y   += backlight/
  
- obj-$(CONFIG_EXYNOS_VIDEO) += exynos/
- 
- obj-$(CONFIG_FB_CFB_FILLRECT)  += cfbfillrect.o
- obj-$(CONFIG_FB_CFB_COPYAREA)  += cfbcopyarea.o
- obj-$(CONFIG_FB_CFB_IMAGEBLIT) += cfbimgblt.o
- obj-$(CONFIG_FB_SYS_FILLRECT)  += sysfillrect.o
- obj-$(CONFIG_FB_SYS_COPYAREA)  += syscopyarea.o
- obj-$(CONFIG_FB_SYS_IMAGEBLIT) += sysimgblt.o
- obj-$(CONFIG_FB_SYS_FOPS)  += fb_sys_fops.o
- obj-$(CONFIG_FB_SVGALIB)   += svgalib.o
- obj-$(CONFIG_FB_MACMODES)  += macmodes.o
- obj-$(CONFIG_FB_DDC)   += fb_ddc.o
- obj-$(CONFIG_FB_DEFERRED_IO)   += fb_defio.o
- obj-$(CONFIG_FB_WMT_GE_ROPS)   += wmt_ge_rops.o
- 
- # Hardware specific drivers go first
- obj-$(CONFIG_FB_AMIGA)+= amifb.o c2p_planar.o
- obj-$(CONFIG_FB_ARC)  += arcfb.o
- obj-$(CONFIG_FB_CLPS711X) += clps711xfb.o
- obj-$(CONFIG_FB_CYBER2000)+= cyber2000fb.o
- obj-$(CONFIG_FB_GRVGA)+= grvga.o
- obj-$(CONFIG_FB_PM2)  += pm2fb.o
- obj-$(CONFIG_FB_PM3)+= pm3fb.o
- 
- obj-$(CONFIG_FB_I740)   += i740fb.o
- obj-$(CONFIG_FB_MATROX) += matrox/
- obj-$(CONFIG_FB_RIVA)   += riva/
- obj-$(CONFIG_FB_NVIDIA) += nvidia/
- obj-$(CONFIG_FB_ATY)+= aty/ macmodes.o
- obj-$(CONFIG_FB_ATY128) += aty/ macmodes.o
- obj-$(CONFIG_FB_RADEON) += aty/
- obj-$(CONFIG_FB_SIS)+= sis/
- obj-$(CONFIG_FB_VIA)+= via/
- obj-$(CONFIG_FB_KYRO) += kyro/
- obj-$(CONFIG_FB_SAVAGE) += savage/
- obj-$(CONFIG_FB_GEODE)  += geode/
- obj-$(CONFIG_FB_MBX)+= mbx/
- obj-$(CONFIG_FB_NEOMAGIC) += neofb.o
- obj-$(CONFIG_FB_3DFX) += tdfxfb.o
- obj-$(CONFIG_FB_CONTROL)  += controlfb.o
- obj-$(CONFIG_FB_PLATINUM) += platinumfb.o
- obj-$(CONFIG_FB_VALKYRIE) += valkyriefb.o
- obj-$(CONFIG_FB_CT65550)  += chipsfb.o
- obj-$(CONFIG_FB_IMSTT)+= imsttfb.o
- obj-$(CONFIG_FB_FM2)  += fm2fb.o
- obj-$(CONFIG_FB_VT8623)   += vt8623fb.o
- obj-$(CONFIG_FB_TRIDENT)  += tridentfb.o
- obj-$(CONFIG_FB_LE80578)  += vermilion/
- obj-$(CONFIG_FB_S3)   += s3fb.o
- obj-$(CONFIG_FB_ARK)  += arkfb.o
- obj-$(CONFIG_FB_STI)  += stifb.o
- obj-$(CONFIG_FB_FFB)  += ffb.o sbuslib.o
- obj-$(CONFIG_FB_CG6)  += cg6.o sbuslib.o
- obj-$(CONFIG_FB_CG3)  += cg3.o sbuslib.o
- obj-$(CONFIG_FB_BW2)  += bw2.o sbuslib.o
- obj-$(CONFIG_FB_CG14) += cg14.o sbuslib.o
- obj-$(CONFIG_FB_P9100)+= p9100.o sbuslib.o
- obj-$(CONFIG_FB_TCX)  += tcx.o sbuslib.o
- obj-$(CONFIG_FB_LEO)  += leo.o sbuslib.o
- obj-$(CONFIG_FB_SGIVW)+= sgivwfb.o
- obj-$(CONFIG_FB_ACORN)+= acornfb.o
- obj-$(CONFIG_FB_ATARI)+= atafb.o c2p_iplan2.o atafb_mfb.o \
-  atafb_iplan2p2.o atafb_iplan2p4.o 
atafb_iplan2p8.o
- obj-$(CONFIG_FB_MAC)  += macfb.o
- obj-$(CONFIG_FB_HECUBA)   += hecubafb.o
- obj-$(CONFIG_FB_N411) += n411.o
- obj-$(CONFIG_FB_HGA)  += hgafb.o
- obj-$(CONFIG_FB_XVR500)   += sunxvr500.o
- obj-$(CONFIG_FB_XVR2500)  += sunxvr2500.o
- obj-$(CONFIG_FB_XVR1000)  += sunxvr1000.o
- obj-$(CONFIG_FB_IGA)  += igafb.o
- obj-$(CONFIG_FB_APOLLO)   += dnfb.o
- obj-$(CONFIG_FB_Q40)  += q40fb.o
- obj-$(CONFIG_FB_TGA)  += tgafb.o
- obj-$(CONFIG_FB_HP300)+= hpfb.o
- obj-$(CONFIG_FB_G364) += g364fb.o
- obj-$(CONFIG_FB_EP93XX) += ep93xx-fb.o
- obj-$(CONFIG_FB_SA1100)   += sa1100fb.o
- obj-$(CONFIG_FB_HIT)  += hitfb.o
- obj-$(CONFIG_FB_ATMEL)  += atmel_lcdfb.o
- obj-$(CONFIG_FB_PVR2) += pvr2fb.o
- obj-$(CONFIG_FB_VOODOO1)  += sstfb.o
- obj-$(CONFIG_FB_ARMCLCD)+= amba-clcd.o
- obj-$(CONFIG_FB_GOLDFISH) += goldfishfb.o
- obj-$(CONFIG_FB_68328)+= 68328fb.o
- obj-$(CONFIG_FB_GBE)  += gbefb.o
- obj-$(CONFIG_FB_CIRRUS) += cirrusfb.o
- obj-$(CON

Re: [RFC PATCH] cifs: Fix possible deadlock with cifs and work queues

2014-03-20 Thread Steven Rostedt
On Thu, 20 Mar 2014 19:53:46 -0400
Jeff Layton  wrote:
 
> Wait...why does the work running on CPU1 end up blocked on down_read()?
> Is it really getting stuck on the down_write you mention?
> 

rwsems are fair locks. Readers will not block on a reader lock unless
there's a writer waiting. That's the key. As soon as a writer blocks on
a lock that is held by a reader (or multiple readers), new readers
coming in will also block to let the writer get a chance. Otherwise, it
is a unfair lock and the readers can starve the writer.

But people tend to forget that a waiting writer causes readers to block
on each other, and if the reader locks can deadlock each other, they
will deadlock with a writer waiting.

-- Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [pci] WARNING: CPU: 0 PID: 1 at drivers/gpu/drm/drm_crtc.c:94 drm_warn_on_modeset_not_all_locked()

2014-03-20 Thread Fengguang Wu
// CC Stephane for RAPL related bug

Bjorn, sorry this bug report is mis-titled. The only new bug that show
up in aa11fc58dc is on rapl_pmu_init. And it shows up only 1 time, so
it's hard to reproduce and the bisect is likely not accurate.  I'll
retry the bisect with more repeat count. Sorry for the disturbing!

[2.812392] Unpacking initramfs...
[2.812392] Unpacking initramfs...
[4.937582] Freeing initrd memory: 3276K (93cbd000 - 93ff)
[4.937582] Freeing initrd memory: 3276K (93cbd000 - 93ff)
[4.952113] BUG: unable to handle kernel
[4.952113] BUG: unable to handle kernel NULL pointer dereferenceNULL 
pointer dereference at 003c
 at 003c
[4.952871] IP:
[4.952871] IP: [<81c439fb>] rapl_pmu_init+0xed/0x165
 [<81c439fb>] rapl_pmu_init+0xed/0x165
[4.954190] *pde = 
[4.954190] *pde = 

[4.954619] Oops:  [#1]
[4.954619] Oops:  [#1]

[4.955440] CPU: 0 PID: 1 Comm: swapper Not tainted 
3.14.0-rc1-00023-gaa11fc5 #1
[4.955440] CPU: 0 PID: 1 Comm: swapper Not tainted 
3.14.0-rc1-00023-gaa11fc5 #1
[4.956050] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
   
[4.956050] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[4.956672] task: 80030c20 ti: 80032000 task.ti: 80032000
[4.956672] task: 80030c20 ti: 80032000 task.ti: 80032000
[4.957295] EIP: 0060:[<81c439fb>] EFLAGS: 0246 CPU: 0
[4.957295] EIP: 0060:[<81c439fb>] EFLAGS: 0246 CPU: 0
[4.957831] EIP is at rapl_pmu_init+0xed/0x165
[4.957831] EIP is at rapl_pmu_init+0xed/0x165

Full dmesg attached.

Thanks,
Fengguang

On Thu, Mar 20, 2014 at 04:50:08PM -0600, Bjorn Helgaas wrote:
> On Thu, Mar 20, 2014 at 6:41 AM, Fengguang Wu  wrote:
> > Greetings,
> >
> > I got the below dmesg and the first bad commit is
> >
> > git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git pci/resource
> >
> > commit aa11fc58dc71c27701b1f9a529a36a38d4337722
> > Author: Bjorn Helgaas 
> > AuthorDate: Fri Mar 7 13:39:01 2014 -0700
> > Commit: Bjorn Helgaas 
> > CommitDate: Wed Mar 19 15:00:16 2014 -0600
> >
> > PCI: Check all IORESOURCE_TYPE_BITS in pci_bus_alloc_from_region()
> >
> > When allocating space from a bus resource, i.e., from apertures leading 
> > to
> > this bus, make sure the entire resource type matches.  The previous code
> > assumed the IORESOURCE_TYPE_BITS field was a bitmask with only a single 
> > bit
> > set, but this is not true.  IORESOURCE_TYPE_BITS is really an 
> > enumeration,
> > and we have to check all the bits.
> >
> > See 72dcb1197228 ("resources: Add register address resource type").
> >
> > No functional change.  If we used this path for allocating IRQs, DMA
> > channels, or bus numbers, this would fix a bug because those types are
> > indistinguishable when masked by IORESOURCE_IO | IORESOURCE_MEM.  But we
> > don't, so this shouldn't make any difference.
> >
> > Signed-off-by: Bjorn Helgaas 
> 
> Thanks (I think).  I'm afraid I'm going to need some more help to
> debug this.  I built aa11fc58dc with the config you supplied and
> booted it on qemu with no real issues (it didn't boot all the way
> because the config doesn't include a driver for my root disk, but
> that's to be expected).
> 
> The dmesg you supplied is for some other commit 2d18516 that I don't
> have, so I'm confused about why it's not from aa11fc58dc.
> 
> I did reproduce what appears to be basically the same problem with
> a654dc797f3e, which is the 20140320 linux-next tree.  I backed up to
> 93ecdc077282, which is where pci/next was merged (this includes
> aa11fc58dc), but I could not reproduce the problem there.
> 
> So bottom line, I'm confused because your bisection doesn't match what
> I'm seeing, and I don't want to spend more time flailing around.
> 
> Bjorn
> 
> 
> > ++++
> > |   
> >  | aa11fc58dc | 2d18516523 |
> > ++++
> > | boot_successes
> >  | 19 | 0  |
> > | boot_failures 
> >  | 1  | 19 |
> &

Re: [PATCH 2/2] aio: fix the confliction of read events and migrating ring page

2014-03-20 Thread Gu Zheng
Hi Ben,
On 03/21/2014 12:30 AM, Benjamin LaHaise wrote:

> On Thu, Mar 20, 2014 at 10:32:07AM -0400, Dave Jones wrote:
>> On Thu, Mar 20, 2014 at 01:46:25PM +0800, Gu Zheng wrote:
>>
>>  > diff --git a/fs/aio.c b/fs/aio.c
>>  > index 88ad40c..e353085 100644
>>  > --- a/fs/aio.c
>>  > +++ b/fs/aio.c
>>  > @@ -319,6 +319,9 @@ static int aio_migratepage(struct address_space 
>> *mapping, struct page *new,
>>  >   ctx->ring_pages[old->index] = new;
>>  >   spin_unlock_irqrestore(&ctx->completion_lock, flags);
>>  >  
>>  > + /* Ensure read event is completed before putting old page */
>>  > + mutex_lock(&ctx->ring_lock);
>>  > + mutex_unlock(&ctx->ring_lock);
>>  >   put_page(old);
>>  >  
>>  >   return rc;
>>
>> This looks a bit weird. Would using a completion work here ?
> 
> Nope.  This is actually the most elegant fix I've seen for this approach, 
> as everything else has relied on adding additional spin locks (which only 
> end up being needed in the migration case) around access to the ring_pages 
> on the reader side.  That said, this patch is not a complete solution to 
> the problem, as the update of the ring's head pointer could still get lost 
> with this patch.  I think the right thing is just taking the ring_lock 
> mutex over the entire page migration operation.  That should be safe, as 
> nowhere else is the ring_lock mutex nested with any other locks.

This one is based on linux-next which has merged the following patch:
commit 692c9b8c5ee8d263bb8348171f0bebd3d84eb2c1
Author: Tang Chen 
Date:   Mon Mar 10 16:15:33 2014 +0800
aio, memory-hotplug: Fix confliction when migrating and accessing ring pages.

With this patch, the update of the ring's head pointer is safe because it is 
protected
by completion_lock, so we do not need to enlarge the ring_lock protection 
region.
And on the other side, if we take the ring_lock over the entire page migration
operation, reading events will be affected if the page migration is going.

Thanks,
Gu

> 
>   -ben
> 
>>  Dave
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] f2fs: introduce fi->i_sem to protect fi's info

2014-03-20 Thread Jaegeuk Kim
This patch introduces fi->i_sem to protect fi's info that includes xattr_ver,
pino, i_nlink.
This enables to remove i_mutex during f2fs_sync_file, resulting in performance
improvement when a number of fsync calls are triggered from many concurrent
threads.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/dir.c   |  6 ++
 fs/f2fs/f2fs.h  |  1 +
 fs/f2fs/file.c  | 14 ++
 fs/f2fs/namei.c |  7 +++
 fs/f2fs/super.c |  1 +
 fs/f2fs/xattr.c |  3 +++
 6 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7c9b17c..972fd0e 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -493,6 +493,7 @@ start:
 add_dentry:
f2fs_wait_on_page_writeback(dentry_page, DATA);
 
+   down_write(&F2FS_I(inode)->i_sem);
page = init_inode_metadata(inode, dir, name);
if (IS_ERR(page)) {
err = PTR_ERR(page);
@@ -515,6 +516,8 @@ add_dentry:
 
update_parent_metadata(dir, inode, current_depth);
 fail:
+   up_write(&F2FS_I(inode)->i_sem);
+
if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) {
update_inode_page(dir);
clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
@@ -559,6 +562,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
struct page *page,
if (inode) {
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
 
+   down_write(&F2FS_I(inode)->i_sem);
+
if (S_ISDIR(inode->i_mode)) {
drop_nlink(dir);
update_inode_page(dir);
@@ -569,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, 
struct page *page,
drop_nlink(inode);
i_size_write(inode, 0);
}
+   up_write(&F2FS_I(inode)->i_sem);
update_inode_page(inode);
 
if (inode->i_nlink == 0)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 05c6524..469779a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -210,6 +210,7 @@ struct f2fs_inode_info {
 
/* Use below internally in f2fs*/
unsigned long flags;/* use to pass per-file flags */
+   struct rw_semaphore i_sem;  /* protect fi info */
atomic_t dirty_dents;   /* # of dirty dentry pages */
f2fs_hash_t chash;  /* hash value of given file name */
unsigned int clevel;/* maximum level of given file name */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e755ee5..a9474cd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -111,6 +111,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 {
struct inode *inode = file->f_mapping->host;
+   struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
int ret = 0;
bool need_cp = false;
@@ -133,7 +134,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t 
end, int datasync)
/* guarantee free sections for fsync */
f2fs_balance_fs(sbi);
 
-   mutex_lock(&inode->i_mutex);
+   down_read(&fi->i_sem);
 
/*
 * Both of fdatasync() and fsync() are able to be recovered from
@@ -150,21 +151,27 @@ int f2fs_sync_file(struct file *file, loff_t start, 
loff_t end, int datasync)
else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
need_cp = true;
 
+   up_read(&fi->i_sem);
+
if (need_cp) {
nid_t pino;
 
-   F2FS_I(inode)->xattr_ver = 0;
-
/* all the dirty node pages should be flushed for POR */
ret = f2fs_sync_fs(inode->i_sb, 1);
+
+   down_write(&fi->i_sem);
+   F2FS_I(inode)->xattr_ver = 0;
if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
get_parent_ino(inode, &pino)) {
F2FS_I(inode)->i_pino = pino;
file_got_pino(inode);
+   up_write(&fi->i_sem);
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
goto out;
+   } else {
+   up_write(&fi->i_sem);
}
} else {
/* if there is no written node page, write its inode page */
@@ -180,7 +187,6 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t 
end, int datasync)
ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
}
 out:
-   mutex_unlock(&inode->i_mutex);
trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
return ret;
 }
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 397d459..0cea874 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -424,12 +424,17 @@ static int f2fs_rename(struc

[PATCH 3/3] f2fs: avoid RECLAIM_FS-ON-W warning

2014-03-20 Thread Jaegeuk Kim
This patch should resolve the following possible bug.

RECLAIM_FS-ON-W at:
 mark_held_locks+0xb9/0x140
 lockdep_trace_alloc+0x85/0xf0
 __kmalloc+0x53/0x1d0
 read_all_xattrs+0x3d1/0x3f0 [f2fs]
 f2fs_getxattr+0x4f/0x100 [f2fs]
 f2fs_get_acl+0x4c/0x290 [f2fs]
 get_acl+0x4f/0x80
 posix_acl_create+0x72/0x180
 f2fs_init_acl+0x29/0xcc [f2fs]
 __f2fs_add_link+0x259/0x710 [f2fs]
 f2fs_create+0xad/0x1c0 [f2fs]
 vfs_create+0xed/0x150
 do_last+0xd36/0xed0
 path_openat+0xc5/0x680
 do_filp_open+0x43/0xa0
 do_sys_open+0x13c/0x230
 SyS_creat+0x1e/0x20
 system_call_fastpath+0x16/0x1b

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/acl.c   | 2 +-
 fs/f2fs/xattr.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index fa8da4c..a285715 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int 
type)
 
retval = f2fs_getxattr(inode, name_index, "", NULL, 0);
if (retval > 0) {
-   value = kmalloc(retval, GFP_KERNEL);
+   value = kmalloc(retval, GFP_F2FS_ZERO);
if (!value)
return ERR_PTR(-ENOMEM);
retval = f2fs_getxattr(inode, name_index, "", value, retval);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 8419130..0121e45 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct 
page *ipage)
 
inline_size = inline_xattr_size(inode);
 
-   txattr_addr = kzalloc(inline_size + size, GFP_KERNEL);
+   txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO);
if (!txattr_addr)
return NULL;
 
-- 
1.8.4.474.g128a96c

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] f2fs: skip unnecessary node writes during fsync

2014-03-20 Thread Jaegeuk Kim
If multiple redundant fsync calls are triggered, we don't need to write its
node pages with fsync mark continuously.

So, this patch adds FI_NEED_FSYNC to track whether the latest node block is
written with the fsync mark or not.
If the mark was set, a new fsync doesn't need to write a node block.
Otherwise, we should do a new node block with the mark for roll-forward
recovery.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/f2fs.h |  1 +
 fs/f2fs/file.c |  2 ++
 fs/f2fs/node.c | 37 -
 fs/f2fs/node.h |  1 +
 4 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 469779a..f83433e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1126,6 +1126,7 @@ struct dnode_of_data;
 struct node_info;
 
 int is_checkpointed_node(struct f2fs_sb_info *, nid_t);
+bool fsync_mark_done(struct f2fs_sb_info *, nid_t);
 void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
 int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
 int truncate_inode_blocks(struct inode *, pgoff_t);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a9474cd..6ba2668 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -176,6 +176,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t 
end, int datasync)
} else {
/* if there is no written node page, write its inode page */
while (!sync_node_pages(sbi, inode->i_ino, &wbc)) {
+   if (fsync_mark_done(sbi, inode->i_ino))
+   goto out;
mark_inode_dirty_sync(inode);
ret = f2fs_write_inode(inode, NULL);
if (ret)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index daf644c..eced8d7 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -133,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t 
nid)
return is_cp;
 }
 
+bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid)
+{
+   struct f2fs_nm_info *nm_i = NM_I(sbi);
+   struct nat_entry *e;
+   bool fsync_done = false;
+
+   read_lock(&nm_i->nat_tree_lock);
+   e = __lookup_nat_cache(nm_i, nid);
+   if (e)
+   fsync_done = e->fsync_done;
+   read_unlock(&nm_i->nat_tree_lock);
+   return fsync_done;
+}
+
 static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
 {
struct nat_entry *new;
@@ -173,7 +187,7 @@ retry:
 }
 
 static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
-   block_t new_blkaddr)
+   block_t new_blkaddr, bool fsync_done)
 {
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
@@ -217,6 +231,11 @@ retry:
/* change address */
nat_set_blkaddr(e, new_blkaddr);
__set_nat_cache_dirty(nm_i, e);
+
+   /* update fsync_mark if its inode nat entry is still alive */
+   e = __lookup_nat_cache(nm_i, ni->ino);
+   if (e)
+   e->fsync_done = fsync_done;
write_unlock(&nm_i->nat_tree_lock);
 }
 
@@ -483,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn)
/* Deallocate node address */
invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, dn->inode);
-   set_node_addr(sbi, &ni, NULL_ADDR);
+   set_node_addr(sbi, &ni, NULL_ADDR, false);
 
if (dn->nid == dn->inode->i_ino) {
remove_orphan_inode(sbi, dn->nid);
@@ -846,7 +865,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
f2fs_bug_on(old_ni.blk_addr != NULL_ADDR);
new_ni = old_ni;
new_ni.ino = dn->inode->i_ino;
-   set_node_addr(sbi, &new_ni, NEW_ADDR);
+   set_node_addr(sbi, &new_ni, NEW_ADDR, false);
 
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
@@ -1202,7 +1221,7 @@ static int f2fs_write_node_page(struct page *page,
mutex_lock(&sbi->node_write);
set_page_writeback(page);
write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr);
-   set_node_addr(sbi, &ni, new_addr);
+   set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
mutex_unlock(&sbi->node_write);
unlock_page(page);
@@ -1503,7 +1522,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct 
page *page,
block_t new_blkaddr)
 {
rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr);
-   set_node_addr(sbi, ni, new_blkaddr);
+   set_node_addr(sbi, ni, new_blkaddr, false);
clear_node_page_dirty(page);
 }
 
@@ -1559,7 +1578,7 @@ bool recover_xattr_data(struct inode *inode, struct page 
*page, block_t blkaddr)
f2fs_bug_on(ni.blk_addr == NULL_ADDR);
invalidate_blocks(sbi, ni.blk_addr);
dec_valid_node_count(sbi, inode);
-   set_node_addr(sbi, &ni, NULL_ADDR);
+   set_node_addr(sbi, &ni, NULL_ADDR, false);

[PATCH 2/2] x86: Finish removing VDSO32_PRELINK

2014-03-20 Thread Andy Lutomirski
It's a declaration of a nonexistent symbol.  We can get rid of the
64-bit versions, too, but that's more intrusive.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/include/asm/vdso.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 7622a65..d1dc554 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -41,8 +41,6 @@ DECLARE_VDSO_IMAGE(vdso32_syscall);
 #endif
 DECLARE_VDSO_IMAGE(vdso32_sysenter);
 
-extern const char VDSO32_PRELINK[];
-
 /*
  * Given a pointer to the vDSO image, find the pointer to VDSO32_name
  * as that symbol is defined in the vDSO sources or linker script.
-- 
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] x86: vDSO fixes

2014-03-20 Thread Andy Lutomirski
Patch 1 fixes the Xen build.  Patch 2 fixes an embarrassment.

Andy Lutomirski (2):
  x86: Move more vdso definitions into vdso.h
  x86: Finish removing VDSO32_PRELINK

 arch/x86/include/asm/vdso.h  | 38 +-
 arch/x86/vdso/vdso.S |  2 +-
 arch/x86/vdso/vdso32-setup.c |  7 ---
 arch/x86/vdso/vdso32.S   |  2 +-
 arch/x86/vdso/vdso_image.h   | 30 --
 arch/x86/vdso/vdsox32.S  |  2 +-
 arch/x86/vdso/vma.c  |  1 -
 7 files changed, 40 insertions(+), 42 deletions(-)
 delete mode 100644 arch/x86/vdso/vdso_image.h

-- 
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] x86: Move more vdso definitions into vdso.h

2014-03-20 Thread Andy Lutomirski
This fixes the Xen build and gets rid of a silly header file.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/include/asm/vdso.h  | 38 ++
 arch/x86/vdso/vdso.S |  2 +-
 arch/x86/vdso/vdso32-setup.c |  7 ---
 arch/x86/vdso/vdso32.S   |  2 +-
 arch/x86/vdso/vdso_image.h   | 30 --
 arch/x86/vdso/vdsox32.S  |  2 +-
 arch/x86/vdso/vma.c  |  1 -
 7 files changed, 41 insertions(+), 41 deletions(-)
 delete mode 100644 arch/x86/vdso/vdso_image.h

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index 0301d78..7622a65 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -1,10 +1,46 @@
 #ifndef _ASM_X86_VDSO_H
 #define _ASM_X86_VDSO_H
 
+#include 
+#include 
+
+#ifdef __ASSEMBLER__
+
+#define DEFINE_VDSO_IMAGE(symname, filename)   \
+__PAGE_ALIGNED_DATA ;  \
+   .globl symname##_start, symname##_end ; \
+   .align PAGE_SIZE ;  \
+   symname##_start: ;  \
+   .incbin filename ;  \
+   symname##_end: ;\
+   .align PAGE_SIZE /* extra data here leaks to userspace. */ ;\
+   \
+.previous ;\
+   \
+   .globl symname##_pages ;\
+   .bss ;  \
+   .align 8 ;  \
+   .type symname##_pages, @object ;\
+   symname##_pages: ;  \
+   .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * 
(BITS_PER_LONG / 8) ; \
+   .size symname##_pages, .-symname##_pages
+
+#else
+
+#define DECLARE_VDSO_IMAGE(symname)\
+   extern char symname##_start[], symname##_end[]; \
+   extern struct page *symname##_pages[]
+
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
 
 #include 
 
+DECLARE_VDSO_IMAGE(vdso32_int80);
+#ifdef CONFIG_COMPAT
+DECLARE_VDSO_IMAGE(vdso32_syscall);
+#endif
+DECLARE_VDSO_IMAGE(vdso32_sysenter);
+
 extern const char VDSO32_PRELINK[];
 
 /*
@@ -27,4 +63,6 @@ extern void __user __kernel_rt_sigreturn;
 
 void __init patch_vdso32(void *vdso, size_t len);
 
+#endif /* __ASSEMBLER__ */
+
 #endif /* _ASM_X86_VDSO_H */
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index c749d15..be3f23b 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,3 +1,3 @@
-#include "vdso_image.h"
+#include 
 
 DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so")
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index b45528e..791c1cb 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -29,7 +29,6 @@
 #include 
 #include 
 #include 
-#include "vdso_image.h"
 
 #ifdef CONFIG_COMPAT_VDSO
 #define VDSO_DEFAULT   0
@@ -42,12 +41,6 @@
 #define arch_setup_additional_pagessyscall32_setup_pages
 #endif
 
-DECLARE_VDSO_IMAGE(vdso32_int80);
-#ifdef CONFIG_COMPAT
-DECLARE_VDSO_IMAGE(vdso32_syscall);
-#endif
-DECLARE_VDSO_IMAGE(vdso32_sysenter);
-
 /*
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S
index cfa6add..018bcd9 100644
--- a/arch/x86/vdso/vdso32.S
+++ b/arch/x86/vdso/vdso32.S
@@ -1,4 +1,4 @@
-#include "vdso_image.h"
+#include 
 
 DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so")
 
diff --git a/arch/x86/vdso/vdso_image.h b/arch/x86/vdso/vdso_image.h
deleted file mode 100644
index 1baa6bc..000
--- a/arch/x86/vdso/vdso_image.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef _VDSO_IMAGE_H
-#define _VDSO_IMAGE_H
-
-#include 
-#include 
-
-#define DEFINE_VDSO_IMAGE(symname, filename)   \
-__PAGE_ALIGNED_DATA ;  \
-   .globl symname##_start, symname##_end ; \
-   .align PAGE_SIZE ;  \
-   symname##_start: ;  \
-   .incbin filename ;  \
-   symname##_end: ;\
-   .align PAGE_SIZE /* extra data here leaks to userspace. */ ;\
-   \
-.previous ;\
- 

Re: kernel BUG in munlock_vma_pages_range

2014-03-20 Thread Sasha Levin

On 12/13/2013 04:08 AM, Vlastimil Babka wrote:

On 12/13/2013 09:49 AM, Bob Liu wrote:

On 12/13/2013 05:05 AM, Sasha Levin wrote:

On 12/12/2013 07:41 AM, Vlastimil Babka wrote:

On 12/12/2013 06:03 AM, Bob Liu wrote:


On 12/12/2013 11:16 AM, Sasha Levin wrote:

On 12/11/2013 05:59 PM, Vlastimil Babka wrote:

On 12/09/2013 09:26 PM, Sasha Levin wrote:

On 12/09/2013 12:12 PM, Vlastimil Babka wrote:

On 12/09/2013 06:05 PM, Sasha Levin wrote:

On 12/09/2013 04:34 AM, Vlastimil Babka wrote:

Hello, I will look at it, thanks.
Do you have specific reproduction instructions?


Not really, the fuzzer hit it once and I've been unable to trigger
it again. Looking at
the piece of code involved it might have had something to do with
hugetlbfs, so I'll crank
up testing on that part.


Thanks. Do you have trinity log and the .config file? I'm currently
unable to even boot linux-next
with my config/setup due to a GPF.
Looking at code I wouldn't expect that it could encounter a tail
page, without first encountering a
head page and skipping the whole huge page. At least in THP case, as
TLB pages should be split when
a vma is split. As for hugetlbfs, it should be skipped for
mlock/munlock operations completely. One
of these assumptions is probably failing here...


If it helps, I've added a dump_page() in case we hit a tail page
there and got:

[  980.172299] page:ea003e5e8040 count:0 mapcount:1
mapping:  (null) index:0
x0
[  980.173412] page flags: 0x2f80008000(tail)

I can also add anything else in there to get other debug output if
you think of something else useful.


Please try the following. Thanks in advance.


[  428.499889] page:ea003e5c0040 count:0 mapcount:4
mapping:  (null) index:0x0
[  428.499889] page flags: 0x2f80008000(tail)
[  428.499889] start=140117131923456 pfn=16347137
orig_start=140117130543104 page_increm
=1 vm_start=140117130543104 vm_end=140117134688256 vm_flags=135266419
[  428.499889] first_page pfn=16347136
[  428.499889] page:ea003e5c count:204 mapcount:44
mapping:880fb5c466c1 inde
x:0x7f6f8fe00
[  428.499889] page flags:
0x2f80084068(uptodate|lru|active|head|swapbacked)


From this print, it looks like the page is still a huge page.
One situation I guess is a huge page which isn't PageMlocked and passed
to munlock_vma_page(). I'm not sure whether this will happen.


Yes that's quite likely the case. It's not illegal to happen I would say.


Please take a try this patch.


I've made a simpler version that does away with the ugly page_mask
thing completely.
Please try that as well. Thanks.

Also when working on this I think I found another potential but much
rare problem
when munlock_vma_page races with a THP split. That would however
manifest such that
part of the former tail pages would stay PageMlocked. But that still
needs more thought.
The bug at hand should however be fixed by this patch.


Yup, this patch seems to fix the issue previously reported.

However, I'll piggyback another thing that popped up now that the vm
could run for a while which
also seems to be caused by the original patch. It looks like a pretty
straightforward deadlock, but


Sigh, put one down, patch it around... :)


Looks like put_page() in __munlock_pagevec() need to get the
zone->lru_lock which is already held when entering __munlock_pagevec().


I've come to the same conclusion, however:


How about fix like this?


That unfortunately removes most of the purpose of this function which was to 
avoid repeated locking.

Please try this patch.


It seems that this one is back, not exactly sure why yet:

[ 2857.034927] kernel BUG at include/linux/page-flags.h:415!
[ 2857.035576] invalid opcode:  [#1] PREEMPT SMP DEBUG_PAGEALLOC
[ 2857.036702] Dumping ftrace buffer:
[ 2857.037447](ftrace buffer empty)
[ 2857.037937] Modules linked in:
[ 2857.038379] CPU: 25 PID: 21381 Comm: trinity-c61 Tainted: GW 
3.14.0-rc7-next-20140320-sasha-00015-gd752393-dirty #261
[ 2857.039854] task: 88080f91b000 ti: 8807fd106000 task.ti: 
8807fd106000
[ 2857.040328] RIP: 0010:[]  [] 
munlock_vma_pages_range+0x93/0x1d0
[ 2857.040328] RSP: :8807fd107e08  EFLAGS: 00010246
[ 2857.040328] RAX: 88052c955360 RBX: 41b36000 RCX: 009f
[ 2857.040328] RDX:  RSI: 88080f91bcf0 RDI: 04fd5360
[ 2857.040328] RBP: 8807fd107ec8 R08: 0001 R09: 
[ 2857.040328] R10: 0001 R11: 0001 R12: ea0013f54d80
[ 2857.040328] R13: 88068083c200 R14: 41b37000 R15: 8807fd107e94
[ 2857.040328] FS:  7fcd4bd02700() GS:8806acc0() 
knlGS:
[ 2857.040328] CS:  0010 DS:  ES:  CR0: 8005003b
[ 2857.040328] CR2: 027405a8 CR3: 000804ad4000 CR4: 06a0
[ 2857.040328] DR0: 00698000 DR1: 00698000 DR2: 

Re: [PATCH 08/11] madvise: redefine callback functions for page table walker

2014-03-20 Thread Sasha Levin

On 02/10/2014 04:44 PM, Naoya Horiguchi wrote:

swapin_walk_pmd_entry() is defined as pmd_entry(), but it has no code
about pmd handling (except pmd_none_or_trans_huge_or_clear_bad, but the
same check are now done in core page table walk code).
So let's move this function on pte_entry() as swapin_walk_pte_entry().

Signed-off-by: Naoya Horiguchi


This patch seems to generate:

[  305.267354] =
[  305.268051] [ INFO: inconsistent lock state ]
[  305.268678] 3.14.0-rc7-next-20140320-sasha-00015-gd752393-dirty #261 
Tainted: GW
[  305.269992] -
[  305.270152] inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage.
[  305.270152] trinity-c57/13619 [HC0[0]:SC0[0]:HE1:SE1] takes:
[  305.270152]  (&(ptlock_ptr(page))->rlock#2){+.+.?.}, at: walk_pte_range 
(include/linux/spinlock.h:303 mm/pagewalk.c:33)
[  305.270152] {IN-RECLAIM_FS-W} state was registered at:
[  305.270152]   mark_irqflags (kernel/locking/lockdep.c:2821)
[  305.270152]   __lock_acquire (kernel/locking/lockdep.c:3138)
[  305.270152]   lock_acquire (arch/x86/include/asm/current.h:14 
kernel/locking/lockdep.c:3602)
[  305.270152]   _raw_spin_lock (include/linux/spinlock_api_smp.h:143 
kernel/locking/spinlock.c:151)
[  305.270152]   __page_check_address (include/linux/spinlock.h:303 
mm/rmap.c:624)
[  305.270152]   page_referenced_one (mm/rmap.c:706)
[  305.270152]   rmap_walk_anon (mm/rmap.c:1613)
[  305.270152]   rmap_walk (mm/rmap.c:1685)
[  305.270152]   page_referenced (mm/rmap.c:802)
[  305.270152]   shrink_active_list (mm/vmscan.c:1704)
[  305.270152]   balance_pgdat (mm/vmscan.c:2741 mm/vmscan.c:2996)
[  305.270152]   kswapd (mm/vmscan.c:3296)
[  305.270152]   kthread (kernel/kthread.c:216)
[  305.270152]   ret_from_fork (arch/x86/kernel/entry_64.S:555)
[  305.270152] irq event stamp: 20863
[  305.270152] hardirqs last  enabled at (20863): alloc_pages_vma 
(arch/x86/include/asm/paravirt.h:809 include/linux/seqlock.h:81 
include/linux/seqlock.h:146 include/linux/cpus
et.h:98 mm/mempolicy.c:1990)
[  305.270152] hardirqs last disabled at (20862): alloc_pages_vma 
(include/linux/seqlock.h:79 include/linux/seqlock.h:146 
include/linux/cpuset.h:98 mm/mempolicy.c:1990)
[  305.270152] softirqs last  enabled at (19858): __do_softirq 
(arch/x86/include/asm/preempt.h:22 kernel/softirq.c:298)
[  305.270152] softirqs last disabled at (19855): irq_exit 
(kernel/softirq.c:348 kernel/softirq.c:389)
[  305.270152]
[  305.270152] other info that might help us debug this:
[  305.270152]  Possible unsafe locking scenario:
[  305.270152]
[  305.270152]CPU0
[  305.270152]
[  305.270152]   lock(&(ptlock_ptr(page))->rlock#2);
[  305.270152]   
[  305.270152] lock(&(ptlock_ptr(page))->rlock#2);
[  305.270152]
[  305.270152]  *** DEADLOCK ***
[  305.270152]
[  305.270152] 2 locks held by trinity-c57/13619:
[  305.270152]  #0:  (&mm->mmap_sem){++}, at: SyS_madvise 
(arch/x86/include/asm/current.h:14 mm/madvise.c:492 mm/madvise.c:448)
[  305.270152]  #1:  (&(ptlock_ptr(page))->rlock#2){+.+.?.}, at: walk_pte_range 
(include/linux/spinlock.h:303 mm/pagewalk.c:33)
[  305.270152]
[  305.270152] stack backtrace:
[  305.270152] CPU: 23 PID: 13619 Comm: trinity-c57 Tainted: GW 
3.14.0-rc7-next-20140320-sasha-00015-gd752393-dirty #261
[  305.270152]  8804ab8e0d28 8804ab9c5968 844b76e7 
0001
[  305.270152]  8804ab8e 8804ab9c59c8 811a55f7 

[  305.270152]  0001 88040001 87e18ed8 
000a
[  305.270152] Call Trace:
[  305.270152]  dump_stack (lib/dump_stack.c:52)
[  305.270152]  print_usage_bug (kernel/locking/lockdep.c:2254)
[  305.270152]  ? check_usage_forwards (kernel/locking/lockdep.c:2371)
[  305.270152]  mark_lock_irq (kernel/locking/lockdep.c:2465)
[  305.270152]  mark_lock (kernel/locking/lockdep.c:2920)
[  305.270152]  mark_held_locks (kernel/locking/lockdep.c:2523)
[  305.270152]  lockdep_trace_alloc (kernel/locking/lockdep.c:2745 
kernel/locking/lockdep.c:2760)
[  305.270152]  __alloc_pages_nodemask (mm/page_alloc.c:2722)
[  305.270152]  ? mark_held_locks (kernel/locking/lockdep.c:2523)
[  305.270152]  ? alloc_pages_vma (arch/x86/include/asm/paravirt.h:809 
include/linux/seqlock.h:81 include/linux/seqlock.h:146 
include/linux/cpuset.h:98 mm/mempolicy.c:1990)
[  305.270152]  alloc_pages_vma (include/linux/mempolicy.h:76 
mm/mempolicy.c:2006)
[  305.270152]  ? read_swap_cache_async (mm/swap_state.c:328)
[  305.270152]  ? __const_udelay (arch/x86/lib/delay.c:126)
[  305.270152]  read_swap_cache_async (mm/swap_state.c:328)
[  305.270152]  ? walk_pte_range (include/linux/spinlock.h:303 mm/pagewalk.c:33)
[  305.270152]  swapin_walk_pte_entry (mm/madvise.c:152)
[  305.270152]  walk_pte_range (mm/pagewalk.c:47)
[  305.270152]  ? sched_clock (arch/x86/include/asm/paravirt.h:192 
arch/x86/kernel/tsc.c:305)
[  30

Re: [RFC PATCH v2 0/2] clk: Support for DT assigned clock parents and rates

2014-03-20 Thread Mike Turquette
Quoting Sylwester Nawrocki (2014-03-20 05:42:33)
> Hi Maxime,
> 
> On 06/03/14 14:45, Maxime Coquelin wrote:
> > Hi Sylwester,
> > 
> >   I like the principle of your implementation, but I have two questions:
> >   1 - How can we manage PM with this solution, as the parent/rate will 
> > be 
> > set only once at probe time?
> >   2 - How to set the parent of a parent clock (which can be shared with 
> > other devices)? Same question about the parent rates.
> 
> Thanks for your feedback and apologies for late reply.
> 
> IIUC your first concern is about a situation when clocks need to be
> reconfigured upon each resume from system sleep or runtime PM resume ?
> As I mentioned in v1 of the RFC I was considering having individual
> drivers calling explicitly the clocks set up routine. Presumably this
> would allow to resolve the power management related issue.
> One example I'm aware the approach as in this RFC wouldn't work is
> when a device in a SoC belongs to a power domain, which needs to be
> first switched on before we can start setting up and the clocks'
> configuration get lost after the power domain switch off.

I like Sylwester's approach of handling this one-time setup in the
driver core.

Any kind of fine grained power management should not be hidden by DT,
and by definition that logic belongs in the device driver. It can still
be nicely abstracted away by runtime pm[1].

Regards,
Mike

[1] Message-ID: <20140320114238.gq7...@n2100.arm.linux.org.uk>

> 
> OTOH I suspect devices for which one-time clocks setup is sufficient
> will be quite common. And for these there would need to be a single
> call to the setup routine in probe() I guess, since it wouldn't be
> possible to figure out just from the DT data when the actual call
> should be made.
> 
> For a global clocks configuration, I thought about specifying that
> in the clocks controller node, and then to have the setup routine
> called e.g. from of_clk_init(). I think that could work well enough,
> together with the patch [1], adding clock dependencies handling.
> But then the clock frequency set up function would need to be
> modified to respect the clock parent relationships, similarly as
> in patch series [2]. A just noticed [2] recently, after posting
> this RFC (adding Tero at Cc).
> 
> --
> Regards,
> Sylwester
> 
> [1] http://www.spinics.net/lists/arm-kernel/msg310507.html
> [2] http://www.spinics.net/lists/linux-omap/msg103069.html
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] staging: cxt1e1: replace kmalloc/kfree with OS_kmalloc/OS_kfree

2014-03-20 Thread Daeseok Youn

Replace kmalloc/kfree with OS_kmalloc/OS_kfree.
And also some allocation doesn't need to use GFP_DMA
so just use GFP_KERNEL.

c4_new() function is never called, remove it.

Signed-off-by: Daeseok Youn 
---
I'm not sure what GFP_DMA use correctly for km{,z}alloc().
Please review this. 
And this patch has coding style issues. coding style issues will be
fixed with another one. 

 drivers/staging/cxt1e1/hwprobe.c |2 +-
 drivers/staging/cxt1e1/linux.c   |   21 +--
 drivers/staging/cxt1e1/musycc.c  |   12 ---
 drivers/staging/cxt1e1/pmcc4_drv.c   |   47 +-
 drivers/staging/cxt1e1/sbecom_inline_linux.h |   23 +
 drivers/staging/cxt1e1/sbecrc.c  |5 ++-
 drivers/staging/cxt1e1/sbeproc.c |2 +-
 7 files changed, 32 insertions(+), 80 deletions(-)

diff --git a/drivers/staging/cxt1e1/hwprobe.c b/drivers/staging/cxt1e1/hwprobe.c
index 9b4198b..6e207f5 100644
--- a/drivers/staging/cxt1e1/hwprobe.c
+++ b/drivers/staging/cxt1e1/hwprobe.c
@@ -205,7 +205,7 @@ cleanup_devs(void)
 #ifdef CONFIG_SBE_PMCC4_NCOMM
free_irq(hi->pdev[1]->irq, hi->ndev);
 #endif
-   OS_kfree(hi->ndev);
+   kfree(hi->ndev);
}
 }
 
diff --git a/drivers/staging/cxt1e1/linux.c b/drivers/staging/cxt1e1/linux.c
index b02f5ade..4b4609d 100644
--- a/drivers/staging/cxt1e1/linux.c
+++ b/drivers/staging/cxt1e1/linux.c
@@ -60,7 +60,6 @@ status_tc4_chan_work_init(mpi_t *, mch_t *);
 voidmusycc_wq_chan_restart(void *);
 status_t __init c4_init(ci_t *, u_char *, u_char *);
 status_t __init c4_init2(ci_t *);
-ci_t   *__init c4_new(void *);
 int __init  c4hw_attach_all(void);
 void __init hdw_sn_get(hdw_info_t *, int);
 
@@ -418,7 +417,7 @@ create_chan(struct net_device *ndev, ci_t *ci,
struct c4_priv *priv;
 
/* allocate then fill in private data structure */
-   priv = OS_kmalloc(sizeof(struct c4_priv));
+   priv = kzalloc(sizeof(struct c4_priv), GFP_KERNEL);
if (!priv) {
pr_warning("%s: no memory for net_device !\n",
   ci->devname);
@@ -428,7 +427,7 @@ create_chan(struct net_device *ndev, ci_t *ci,
if (!dev) {
pr_warning("%s: no memory for hdlc_device !\n",
   ci->devname);
-   OS_kfree(priv);
+   kfree(priv);
return NULL;
}
priv->ci = ci;
@@ -972,8 +971,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
 
if (register_netdev(ndev) ||
(c4_init(ci, (u_char *) f0, (u_char *) f1) != 
SBE_DRVR_SUCCESS)) {
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
error_flag = -ENODEV;
return NULL;
}
@@ -998,8 +997,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
pr_warning("%s: MUSYCC could not get irq: %d\n",
   ndev->name, irq0);
unregister_netdev(ndev);
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
error_flag = -EIO;
return NULL;
}
@@ -1008,8 +1007,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
pr_warning("%s: EBUS could not get irq: %d\n", hi->devname, 
irq1);
unregister_netdev(ndev);
free_irq(irq0, ndev);
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
error_flag = -EIO;
return NULL;
}
@@ -1068,8 +1067,8 @@ c4_add_dev(hdw_info_t *hi, int brdno, unsigned long f0, 
unsigned long f1,
unregister_netdev(ndev);
free_irq(irq1, ndev);
free_irq(irq0, ndev);
-   OS_kfree(netdev_priv(ndev));
-   OS_kfree(ndev);
+   kfree(netdev_priv(ndev));
+   kfree(ndev);
/* failure, error_flag is set */
return NULL;
}
diff --git a/drivers/staging/cxt1e1/musycc.c b/drivers/staging/cxt1e1/musycc.c
index 7b4f6f2..c174f6d 100644
--- a/drivers/staging/cxt1e1/musycc.c
+++ b/drivers/staging/cxt1e1/musycc.c
@@ -744,7 +744,8 @@ musycc_init(ci_t *ci)
 
 #define INT_QUEUE_BOUNDARY  4
 
-regaddr = OS_kmalloc((INT_QUEUE_SIZE + 1) * sizeof(u_int32_t));
+   regaddr = kmalloc((INT_QUEUE_SIZE + 1) * sizeof(u_int32_t),
+ GFP_KERNEL | GFP_DMA);
 if (!regaddr)
return -ENOMEM;
 ci->iqd_p_saved = regaddr;  /* save orig val

SCSI: race condition between scsi_remove_target and scsi_probe_and_add_lun

2014-03-20 Thread Andrey Zonov
Hi,

I've got kernel panic on my box which works as FibreChannel initiator.
I was able to reproduce this panic by setting dev_loss_tmo=2 and
enabling/disabling ports every 5 seconds on the switch in 5 minutes.  I
added some debug points in the kernel code and that's what I've got so far:

1. system is inserting new device into __devices list

DEBUG: scsi_sysfs_device_initialize(): sdev=88046a931000 7:0:5:0
Pid: 910, comm: kworker/u:2 Tainted: P   O 3.2.48-swt9004 #33
Call Trace:
[] ? scsi_alloc_sdev+0x1d2/0x240
[] ? scsi_device_lookup_by_target+0x8d/0xc0
[] ? scsi_probe_and_add_lun+0x42a/0xb20
[] ? kobject_set_name_vargs+0x6d/0x80
[] ? dev_set_name+0x3f/0x50
[] ? kobject_get+0x12/0x20
[] ? fc_host_match+0x14/0x70 [scsi_transport_fc]
[] ? attribute_container_add_device+0x4f/0x160
[] ? kobject_get+0x12/0x20
[] ? get_device+0x14/0x20
[] ? scsi_alloc_target+0x295/0x2d0
[] ? device_release+0x1a/0x80
[] ? __scsi_scan_target+0xce/0x5f0
[] ? dequeue_task_fair+0x52/0x150
[] ? __schedule+0x25d/0x7d0
[] ? scsi_scan_target+0xc6/0xe0
[] ? fc_scsi_scan_rport+0xaf/0xc0 [scsi_transport_fc]
[] ? process_one_work+0x116/0x3a0
[] ? worker_thread+0x14c/0x400
[] ? rescuer_thread+0x310/0x310
[] ? rescuer_thread+0x310/0x310
[] ? kthread+0x96/0xa0
[] ? kernel_thread_helper+0x4/0x10
[] ? kthread_worker_fn+0x120/0x120
[] ? gs_change+0xb/0xb

2. later in scsi_probe_and_add_lun() this device is removing

DEBUG: __scsi_remove_device(): sdev=88046a931000 7:0:5:0
Pid: 910, comm: kworker/u:2 Tainted: P   O 3.2.48-swt9004 #33
Call Trace:
[] ? __scsi_remove_device+0x46/0x110
[] ? scsi_probe_and_add_lun+0x458/0xb20
[] ? dev_set_name+0x3f/0x50
[] ? kobject_get+0x12/0x20
[] ? scsi_alloc_target+0x295/0x2d0
[] ? device_release+0x1a/0x80
[] ? __scsi_scan_target+0xce/0x5f0
[] ? dequeue_task_fair+0x52/0x150
[] ? __schedule+0x25d/0x7d0
[] ? scsi_scan_target+0xc6/0xe0
[] ? fc_scsi_scan_rport+0xaf/0xc0 [scsi_transport_fc]
[] ? process_one_work+0x116/0x3a0
[] ? worker_thread+0x14c/0x400
[] ? rescuer_thread+0x310/0x310
[] ? rescuer_thread+0x310/0x310
[] ? kthread+0x96/0xa0
[] ? kernel_thread_helper+0x4/0x10
[] ? kthread_worker_fn+0x120/0x120
[] ? gs_change+0xb/0xb

3. another thread is trying to remove this device because of timeout

DEBUG: __scsi_remove_device(): sdev=88046a931000 7:0:5:0
Pid: 4, comm: kworker/0:0 Tainted: P   O 3.2.48-swt9004 #33
Call Trace:
[] ? __scsi_remove_device+0x46/0x110
[] ? mutex_lock+0x1a/0x40
[] ? scsi_remove_device+0x28/0x40
[] ? scsi_kmap_atomic_sg+0x180/0x180
[] ? scsi_remove_target+0x141/0x1e0
[] ? process_one_work+0x116/0x3a0
[] ? worker_thread+0x14c/0x400
[] ? rescuer_thread+0x310/0x310
[] ? rescuer_thread+0x310/0x310
[] ? kthread+0x96/0xa0
[] ? kernel_thread_helper+0x4/0x10
[] ? kthread_worker_fn+0x120/0x120
[] ? gs_change+0xb/0xb

and it's got dead sdev object.  I don't understand how this can happen
because __scsi_remove_target() iterating over __devices and getting sdev
reference under host_lock and that should be enough.

DEBUG: kref_put(): kref=88046a9312e0 val=-1
[ cut here ]
WARNING: at lib/kref.c:61 kref_put+0x88/0xc0()
Hardware name: X9DRi-LN4+/X9DR3-LN4+
Modules linked in: qla2xxx(O) igb ehci_hcd scsi_transport_fc
Pid: 4, comm: kworker/0:0 Tainted: P   O 3.2.48-swt9004 #33
Call Trace:
[] ? warn_slowpath_common+0x7b/0xc0
[] ? kobject_del+0x30/0x30
[] ? kref_put+0x88/0xc0
[] ? __scsi_remove_device+0x8c/0x110
[] ? mutex_lock+0x1a/0x40
[] ? scsi_remove_device+0x28/0x40
[] ? scsi_kmap_atomic_sg+0x180/0x180
[] ? scsi_remove_target+0x141/0x1e0
[] ? process_one_work+0x116/0x3a0
[] ? worker_thread+0x14c/0x400
[] ? rescuer_thread+0x310/0x310
[] ? rescuer_thread+0x310/0x310
[] ? kthread+0x96/0xa0
[] ? kernel_thread_helper+0x4/0x10
[] ? kthread_worker_fn+0x120/0x120
[] ? gs_change+0xb/0xb

Here is the patch which helped me:

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 9117d0b..676e5ff 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1094,6 +1094,7 @@ static void __scsi_remove_target(struct
scsi_target *starget)
unsigned long flags;
struct scsi_device *sdev;

+   mutex_lock(&shost->scan_mutex);
spin_lock_irqsave(shost->host_lock, flags);
  restart:
list_for_each_entry(sdev, &shost->__devices, siblings) {
@@ -1102,12 +1103,13 @@ static void __scsi_remove_target(struct
scsi_target *starget)
scsi_device_get(sdev))
continue;
spin_unlock_irqrestore(shost->host_lock, flags);
-   scsi_remove_device(sdev);
+   __scsi_remove_device(sdev);
scsi_device_put(sdev);
spin_lock_irqsave(shost->host_lock, flags);
goto restart;
}
spin_unlock_irqrestore(shost->host_lock, flags);
+   mutex_unlock(&shost->scan_mutex);
 }

 /**

I'm not sure about the fix is correct, but I was not able to reproduce
the panic.


P.S. Here is another p

Re: mxs-auart gives data from previous run after close and reopen

2014-03-20 Thread Peter Hurley

On 03/14/2014 04:11 PM, Stanislav Meduna wrote:

Hi,

following scenario:

- a Freescale i.MX28 machine
- RS232 AUART looped back Rx - Tx or two different ports cross-connected
- a test program sending data in one thread and receiving in the other:
   thread A periodically sends "Quick brown fox jumps over the lazy dog",
   thread B receives
- stop the program using ctrl-C
- restart

=> sometimes the receiving thread gets "dogQuick", receiving
characters from the previous transmission

The extra characters definitely come from the receiver - the transmission
is OK. This was verified by a scope.

My theory is that this behaviour is caused by mxs_auart_shutdown
function gating and mxs_auart_startup reenabling the clock instead
of doing a soft reset. If the clock is gated while the AUART already
has something in the FIFO, but did not generate the interrupt yet,
the internal state machine is frozen in this state. As soon as it
is reenabled, the characters are delivered to a new user.

I am using a 3.4.77 kernel, but the relevant code looks the same in
the recent kernels (when not using DMA). I also backported patches
waiting for the transmission FIFO to clear at mxs_auart_shutdown.

I did not find any possibility to fully clear the receiver - even
if one disables the AUART and reads everything from the FIFO,
the character currently in transmit might be still somewhere (the
reference manual states that disabling is effective after the
current character is received). Which might be 1 ms at 9600 and one
has no clue whether it is the case.

Also note that the function mxs_auart_reset is in fact not doing
a reset - it just makes sure that the AUART comes out of one after
the initialization. Maybe a full reset at startup is the solution...

Pleas Cc: me when answering.


Does disabling the fifo on shutdown clear the fifo?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 002/149] mm: page_alloc: exempt GFP_THISNODE allocations from zone fairness

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Johannes Weiner 

commit 27329369c9ecf37771b2a65202cbf5578cff3331 upstream.

Jan Stancek reports manual page migration encountering allocation
failures after some pages when there is still plenty of memory free, and
bisected the problem down to commit 81c0a2bb515f ("mm: page_alloc: fair
zone allocator policy").

The problem is that GFP_THISNODE obeys the zone fairness allocation
batches on one hand, but doesn't reset them and wake kswapd on the other
hand.  After a few of those allocations, the batches are exhausted and
the allocations fail.

Fixing this means either having GFP_THISNODE wake up kswapd, or
GFP_THISNODE not participating in zone fairness at all.  The latter
seems safer as an acute bugfix, we can clean up later.

Reported-by: Jan Stancek 
Signed-off-by: Johannes Weiner 
Acked-by: Rik van Riel 
Acked-by: Mel Gorman 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 mm/page_alloc.c |   26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1211,6 +1211,15 @@ void drain_zone_pages(struct zone *zone,
}
local_irq_restore(flags);
 }
+static bool gfp_thisnode_allocation(gfp_t gfp_mask)
+{
+   return (gfp_mask & GFP_THISNODE) == GFP_THISNODE;
+}
+#else
+static bool gfp_thisnode_allocation(gfp_t gfp_mask)
+{
+   return false;
+}
 #endif
 
 /*
@@ -1547,7 +1556,13 @@ again:
  get_pageblock_migratetype(page));
}
 
-   __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+   /*
+* NOTE: GFP_THISNODE allocations do not partake in the kswapd
+* aging protocol, so they can't be fair.
+*/
+   if (!gfp_thisnode_allocation(gfp_flags))
+   __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
+
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
local_irq_restore(flags);
@@ -1919,8 +1934,12 @@ zonelist_scan:
 * ultimately fall back to remote zones that do not
 * partake in the fairness round-robin cycle of this
 * zonelist.
+*
+* NOTE: GFP_THISNODE allocations do not partake in
+* the kswapd aging protocol, so they can't be fair.
 */
-   if (alloc_flags & ALLOC_WMARK_LOW) {
+   if ((alloc_flags & ALLOC_WMARK_LOW) &&
+   !gfp_thisnode_allocation(gfp_mask)) {
if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
continue;
if (!zone_local(preferred_zone, zone))
@@ -2486,8 +2505,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u
 * allowed per node queues are empty and that nodes are
 * over allocated.
 */
-   if (IS_ENABLED(CONFIG_NUMA) &&
-   (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
+   if (gfp_thisnode_allocation(gfp_mask))
goto nopage;
 
 restart:


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 011/149] net-tcp: fastopen: fix high order allocations

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Dumazet 

[ Upstream commit f5ddcbbb40aa0ba7fbfe22355d287603dbeeaaac ]

This patch fixes two bugs in fastopen :

1) The tcp_sendmsg(...,  @size) argument was ignored.

   Code was relying on user not fooling the kernel with iovec mismatches

2) When MTU is about 64KB, tcp_send_syn_data() attempts order-5
allocations, which are likely to fail when memory gets fragmented.

Fixes: 783237e8daf13 ("net-tcp: Fast Open client - sending SYN-data")
Signed-off-by: Eric Dumazet 
Cc: Yuchung Cheng 
Acked-by: Yuchung Cheng 
Tested-by: Yuchung Cheng 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 include/net/tcp.h |3 ++-
 net/ipv4/tcp.c|8 +---
 net/ipv4/tcp_output.c |7 ++-
 3 files changed, 13 insertions(+), 5 deletions(-)

--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1312,7 +1312,8 @@ struct tcp_fastopen_request {
/* Fast Open cookie. Size 0 means a cookie request */
struct tcp_fastopen_cookie  cookie;
struct msghdr   *data;  /* data in MSG_FASTOPEN */
-   u16 copied; /* queued in tcp_connect() */
+   size_t  size;
+   int copied; /* queued in tcp_connect() */
 };
 void tcp_free_fastopen_req(struct tcp_sock *tp);
 
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1002,7 +1002,8 @@ void tcp_free_fastopen_req(struct tcp_so
}
 }
 
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+   int *copied, size_t size)
 {
struct tcp_sock *tp = tcp_sk(sk);
int err, flags;
@@ -1017,11 +1018,12 @@ static int tcp_sendmsg_fastopen(struct s
if (unlikely(tp->fastopen_req == NULL))
return -ENOBUFS;
tp->fastopen_req->data = msg;
+   tp->fastopen_req->size = size;
 
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
-   *size = tp->fastopen_req->copied;
+   *copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
 }
@@ -1041,7 +1043,7 @@ int tcp_sendmsg(struct kiocb *iocb, stru
 
flags = msg->msg_flags;
if (flags & MSG_FASTOPEN) {
-   err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+   err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
else if (err)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2887,7 +2887,12 @@ static int tcp_send_syn_data(struct sock
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
 
-   syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
+   space = min_t(size_t, space, fo->size);
+
+   /* limit to order-0 allocations */
+   space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
+
+   syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
   sk->sk_allocation);
if (syn_data == NULL)
goto fallback;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 004/149] ocfs2: fix quota file corruption

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Jan Kara 

commit 15c34a760630ca2c803848fba90ca0646a9907dd upstream.

Global quota files are accessed from different nodes.  Thus we cannot
cache offset of quota structure in the quota file after we drop our node
reference count to it because after that moment quota structure may be
freed and reallocated elsewhere by a different node resulting in
corruption of quota file.

Fix the problem by clearing dq_off when we are releasing dquot structure.
We also remove the DB_READ_B handling because it is useless -
DQ_ACTIVE_B is set iff DQ_READ_B is set.

Signed-off-by: Jan Kara 
Cc: Goldwyn Rodrigues 
Cc: Joel Becker 
Reviewed-by: Mark Fasheh 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/ocfs2/quota_global.c |   27 +--
 fs/ocfs2/quota_local.c  |4 
 2 files changed, 17 insertions(+), 14 deletions(-)

--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dq
 */
if (status < 0)
mlog_errno(status);
+   /*
+* Clear dq_off so that we search for the structure in quota file next
+* time we acquire it. The structure might be deleted and reallocated
+* elsewhere by another node while our dquot structure is on freelist.
+*/
+   dquot->dq_off = 0;
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
 out_trans:
ocfs2_commit_trans(osb, handle);
@@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dq
status = ocfs2_lock_global_qf(info, 1);
if (status < 0)
goto out;
-   if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
-   status = ocfs2_qinfo_lock(info, 0);
-   if (status < 0)
-   goto out_dq;
-   status = qtree_read_dquot(&info->dqi_gi, dquot);
-   ocfs2_qinfo_unlock(info, 0);
-   if (status < 0)
-   goto out_dq;
-   }
-   set_bit(DQ_READ_B, &dquot->dq_flags);
+   status = ocfs2_qinfo_lock(info, 0);
+   if (status < 0)
+   goto out_dq;
+   /*
+* We always want to read dquot structure from disk because we don't
+* know what happened with it while it was on freelist.
+*/
+   status = qtree_read_dquot(&info->dqi_gi, dquot);
+   ocfs2_qinfo_unlock(info, 0);
+   if (status < 0)
+   goto out_dq;
 
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
 
 out:
-   /* Clear the read bit so that next time someone uses this
-* dquot he reads fresh info from disk and allocates local
-* dquot structure */
-   clear_bit(DQ_READ_B, &dquot->dq_flags);
return status;
 }
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 000/149] 3.13.7-stable review

2014-03-20 Thread Greg Kroah-Hartman
This is the start of the stable review cycle for the 3.13.7 release.
There are 149 patches in this series, all will be posted as a response
to this one.  If anyone has any issues with these being applied, please
let me know.

Responses should be made by Sun Mar 23 00:03:54 UTC 2014.
Anything received after that time might be too late.

The whole patch series can be found in one patch at:
kernel.org/pub/linux/kernel/v3.0/stable-review/patch-3.13.7-rc1.gz
and the diffstat can be found below.

thanks,

greg k-h

-
Pseudo-Shortlog of commits:

Greg Kroah-Hartman 
Linux 3.13.7-rc1

Filipe Brandenburger 
memcg: reparent charges of children before processing parent

Steve Capper 
arm64: mm: Add double logical invert to pte accessors

Nicholas Bellinger 
bio-integrity: Fix bio_integrity_verify segment start bug

Qais Yousef 
MIPS: include linux/types.h

Oleg Drokin 
Fix mountpoint reference leakage in linkat

Shuah Khan 
regulator: core: Change dummy supplies error message to a warning

Roman Volkov 
ALSA: oxygen: modify adjust_dg_dac_routing function

Dirk Brandewie 
intel_pstate: Add support for Baytrail turbo P states

Dirk Brandewie 
intel_pstate: Add setting voltage value for baytrail P states.

Gao feng 
audit: don't generate loginuid log when audit disabled

Filipe David Borba Manana 
Btrfs: fix data corruption when reading/updating compressed extents

Filipe David Borba Manana 
Btrfs: fix tree mod logging

Filipe David Borba Manana 
Btrfs: return immediately if tree log mod is not necessary

Suresh Siddha 
x86, fpu: Check tsk_used_math() in kernel_fpu_end() for eager FPU

Ales Novak 
SCSI: storvsc: NULL pointer dereference fix

Chad Dupuis 
SCSI: qla2xxx: Fix multiqueue MSI-X registration.

Giridhar Malavali 
SCSI: qla2xxx: Poll during initialization for ISP25xx and ISP83xx

Lukasz Dorau 
SCSI: isci: correct erroneous for_each_isci_host macro

Dan Williams 
SCSI: isci: fix reset timeout handling

Marc Kleine-Budde 
can: flexcan: flexcan_remove(): add missing netif_napi_del()

Marc Kleine-Budde 
can: flexcan: factor out transceiver {en,dis}able into seperate functions

Marc Kleine-Budde 
can: flexcan: fix transition from and to low power mode in chip_{en,dis}able

Marc Kleine-Budde 
can: flexcan: flexcan_open(): fix error path if flexcan_chip_start() fails

Marc Kleine-Budde 
can: flexcan: fix shutdown: first disable chip, then all interrupts

Anton Blanchard 
net: unix socket code abuses csum_partial

Heinz Mauelshagen 
dm cache: fix access beyond end of origin device

Heinz Mauelshagen 
dm cache: fix truncation bug when copying a block to/from >2TB fast device

Joe Thornber 
dm space map metadata: fix refcount decrement below 0 which caused 
corruption

Heinz Mauelshagen 
dm cache mq: fix memory allocation failure for large cache devices

Laura Abbott 
mm/compaction: break out of loop on !PageBuddy in isolate_freepages_block

Arnd Bergmann 
vmxnet3: fix building without CONFIG_PCI_MSI

Neil Horman 
vmxnet3: fix netpoll race condition

Bjorn Helgaas 
PCI: Enable INTx in pci_reenable_device() only when MSI/MSI-X not enabled

Anton Blanchard 
ibmveth: Fix endian issues with MAC addresses

Radim Krčmář 
KVM: SVM: fix cr8 intercept window

Michael Kerrisk 
ipc: Fix 2 bugs in msgrcv() MSG_COPY implementation

Richard Weinberger 
i2c: Remove usage of orphaned symbol OF_I2C

Lars-Peter Clausen 
ASoC: si476x: Fix IO setup

Lars-Peter Clausen 
ASoC: 88pm860: Fix IO setup

Patrick Lai 
ASoC: pcm: free path list before exiting from error conditions

Alex Deucher 
drm/radeon/si: fix typo in dpm sq ramping setup

Alex Deucher 
drm/radeon: fix minor typos in si_dpm.c

Alex Deucher 
drm/radeon/cik: properly set compute ring status on disable

Alex Deucher 
drm/radeon/cik: stop the sdma engines in the enable() function

Alex Deucher 
drm/radeon/cik: properly set sdma ring status on disable

Alex Deucher 
drm/radeon: fix runpm disabling on non-PX harder

Alex Deucher 
drm/radeon: re-order firmware loading in preparation for dpm rework

Ville Syrjälä 
drm/i915: Reject >165MHz modes w/ DVI monitors

Imre Deak 
drm/i915: fix pch pci device enumeration

Alex Deucher 
drm/radeon/dpm: fix typo in EVERGREEN_SMC_FIRMWARE_HEADER_softRegisters

Alex Deucher 
drm/radeon/atom: select the proper number of lanes in transmitter setup

Artem Fetishev 
fs/proc/base.c: fix GPF in /proc/$PID/map_files

Nicholas Bellinger 
iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug

Mark Rutland 
mm/readahead.c: fix do_readahead() for no readpage(s)

Nicholas Bellinger 
iser-target: Fix command leak for tx_desc->comp_llnode_batch

Nicholas Bellinger 
iser-target: Fix post_send_buf_count for RDMA READ/WRITE

Nicholas Bellinger 
iser-target: Ignore completions for FRWRs in isert_cq_tx_work

Nicholas Be

[PATCH 3.13 003/149] mm: include VM_MIXEDMAP flag in the VM_SPECIAL list to avoid m(un)locking

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Vlastimil Babka 

commit 9050d7eba40b3d79551668f54e68fd6f51945ef3 upstream.

Daniel Borkmann reported a VM_BUG_ON assertion failing:

  [ cut here ]
  kernel BUG at mm/mlock.c:528!
  invalid opcode:  [#1] SMP
  Modules linked in: ccm arc4 iwldvm [...]
   video
  CPU: 3 PID: 2266 Comm: netsniff-ng Not tainted 3.14.0-rc2+ #8
  Hardware name: LENOVO 2429BP3/2429BP3, BIOS G4ET37WW (1.12 ) 05/29/2012
  task: 8801f87f9820 ti: 88002cb44000 task.ti: 88002cb44000
  RIP: 0010:[]  [] 
munlock_vma_pages_range+0x2e0/0x2f0
  Call Trace:
do_munmap+0x18f/0x3b0
vm_munmap+0x41/0x60
SyS_munmap+0x22/0x30
system_call_fastpath+0x1a/0x1f
  RIP   munlock_vma_pages_range+0x2e0/0x2f0
  ---[ end trace a0088dcf07ae10f2 ]---

because munlock_vma_pages_range() thinks it's unexpectedly in the middle
of a THP page.  This can be reproduced with default config since 3.11
kernels.  A reproducer can be found in the kernel's selftest directory
for networking by running ./psock_tpacket.

The problem is that an order=2 compound page (allocated by
alloc_one_pg_vec_page() is part of the munlocked VM_MIXEDMAP vma (mapped
by packet_mmap()) and mistaken for a THP page and assumed to be order=9.

The checks for THP in munlock came with commit ff6a6da60b89 ("mm:
accelerate munlock() treatment of THP pages"), i.e.  since 3.9, but did
not trigger a bug.  It just makes munlock_vma_pages_range() skip such
compound pages until the next 512-pages-aligned page, when it encounters
a head page.  This is however not a problem for vma's where mlocking has
no effect anyway, but it can distort the accounting.

Since commit 7225522bb429 ("mm: munlock: batch non-THP page isolation
and munlock+putback using pagevec") this can trigger a VM_BUG_ON in
PageTransHuge() check.

This patch fixes the issue by adding VM_MIXEDMAP flag to VM_SPECIAL, a
list of flags that make vma's non-mlockable and non-mergeable.  The
reasoning is that VM_MIXEDMAP vma's are similar to VM_PFNMAP, which is
already on the VM_SPECIAL list, and both are intended for non-LRU pages
where mlocking makes no sense anyway.  Related Lkml discussion can be
found in [2].

 [1] tools/testing/selftests/net/psock_tpacket
 [2] https://lkml.org/lkml/2014/1/10/427

Signed-off-by: Vlastimil Babka 
Signed-off-by: Daniel Borkmann 
Reported-by: Daniel Borkmann 
Tested-by: Daniel Borkmann 
Cc: Thomas Hellstrom 
Cc: John David Anglin 
Cc: HATAYAMA Daisuke 
Cc: Konstantin Khlebnikov 
Cc: Carsten Otte 
Cc: Jared Hulbert 
Tested-by: Hannes Frederic Sowa 
Cc: Kirill A. Shutemov 
Acked-by: Rik van Riel 
Cc: Andrea Arcangeli 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 include/linux/mm.h |2 +-
 mm/huge_memory.c   |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -165,7 +165,7 @@ extern unsigned int kobjsize(const void
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP)
 
 /*
  * mapping from the currently active vm_flags protection bits (the
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1960,7 +1960,7 @@ out:
return ret;
 }
 
-#define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
+#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
 
 int hugepage_madvise(struct vm_area_struct *vma,
 unsigned long *vm_flags, int advice)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 006/149] memcg: fix endless loop in __mem_cgroup_iter_next()

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Hugh Dickins 

commit ce48225fe3b1b0d1fc9fceb96ac3d8a879e45114 upstream.

Commit 0eef615665ed ("memcg: fix css reference leak and endless loop in
mem_cgroup_iter") got the interaction with the commit a few before it
d8ad30559715 ("mm/memcg: iteration skip memcgs not yet fully
initialized") slightly wrong, and we didn't notice at the time.

It's elusive, and harder to get than the original, but for a couple of
days before rc1, I several times saw a endless loop similar to that
supposedly being fixed.

This time it was a tighter loop in __mem_cgroup_iter_next(): because we
can get here when our root has already been offlined, and the ordering
of conditions was such that we then just cycled around forever.

Fixes: 0eef615665ed ("memcg: fix css reference leak and endless loop in 
mem_cgroup_iter").
Signed-off-by: Hugh Dickins 
Acked-by: Michal Hocko 
Cc: Johannes Weiner 
Cc: Greg Thelen 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 mm/memcontrol.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1108,8 +1108,8 @@ skip_node:
 * skipping css reference should be safe.
 */
if (next_css) {
-   if ((next_css->flags & CSS_ONLINE) &&
-   (next_css == &root->css || 
css_tryget(next_css)))
+   if ((next_css == &root->css) ||
+   ((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
return mem_cgroup_from_css(next_css);
 
prev_css = next_css;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 005/149] ocfs2 syncs the wrong range...

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Al Viro 

commit 1b56e98990bcdbb20b9fab163654b9315bf158e8 upstream.

Signed-off-by: Al Viro 
Signed-off-by: Greg Kroah-Hartman 

---
 fs/ocfs2/file.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2370,8 +2370,8 @@ out_dio:
 
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
-   ret = filemap_fdatawrite_range(file->f_mapping, pos,
-  pos + count - 1);
+   ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
+  *ppos + count - 1);
if (ret < 0)
written = ret;
 
@@ -2384,8 +2384,8 @@ out_dio:
}
 
if (!ret)
-   ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+   ret = filemap_fdatawait_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
}
 
/*


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 001/149] zram: avoid null access when fail to alloc meta

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Minchan Kim 

commit db5d711e2db776f18219b033e5dc4fb7e4264dd7 upstream.

zram_meta_alloc could fail so caller should check it.  Otherwise, your
system will hang.

Signed-off-by: Minchan Kim 
Acked-by: Jerome Marchand 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/staging/zram/zram_drv.c |2 ++
 1 file changed, 2 insertions(+)

--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -621,6 +621,8 @@ static ssize_t disksize_store(struct dev
 
disksize = PAGE_ALIGN(disksize);
meta = zram_meta_alloc(disksize);
+   if (!meta)
+   return -ENOMEM;
down_write(&zram->init_lock);
if (zram->init_done) {
up_write(&zram->init_lock);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 008/149] rapidio/tsi721: fix tasklet termination in dma channel release

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Alexandre Bounine 

commit 04379dffdd4da820d51a1566ad2e86f3b1ad97ed upstream.

This patch is a modification of the patch originally proposed by
Xiaotian Feng : https://lkml.org/lkml/2012/11/5/413
This new version disables DMA channel interrupts and ensures that the
tasklet wil not be scheduled again before calling tasklet_kill().

Unfortunately the updated patch was not released at that time due to
planned rework of Tsi721 mport driver to use threaded interrupts (which
has yet to happen).  Recently the issue was reported again:
https://lkml.org/lkml/2014/2/19/762.

Description from the original Xiaotian's patch:

 "Some drivers use tasklet_disable in device remove/release process,
  tasklet_disable will inc tasklet->count and return.  If the tasklet is
  not handled yet under some softirq pressure, the tasklet will be
  placed on the tasklet_vec, never have a chance to be excuted.  This
  might lead to a heavy loaded ksoftirqd, wakeup with pending_softirq,
  but tasklet is disabled.  tasklet_kill should be used in this case."

This patch is applicable to kernel versions starting from v3.5.

Signed-off-by: Alexandre Bounine 
Cc: Matt Porter 
Cc: Xiaotian Feng 
Reviewed-by: Thomas Gleixner 
Cc: Mike Galbraith 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Greg Kroah-Hartman 

---
 drivers/rapidio/devices/tsi721.h |1 +
 drivers/rapidio/devices/tsi721_dma.c |   27 ++-
 2 files changed, 19 insertions(+), 9 deletions(-)

--- a/drivers/rapidio/devices/tsi721.h
+++ b/drivers/rapidio/devices/tsi721.h
@@ -678,6 +678,7 @@ struct tsi721_bdma_chan {
struct list_headfree_list;
dma_cookie_tcompleted_cookie;
struct tasklet_struct   tasklet;
+   boolactive;
 };
 
 #endif /* CONFIG_RAPIDIO_DMA_ENGINE */
--- a/drivers/rapidio/devices/tsi721_dma.c
+++ b/drivers/rapidio/devices/tsi721_dma.c
@@ -206,8 +206,8 @@ void tsi721_bdma_handler(struct tsi721_b
 {
/* Disable BDMA channel interrupts */
iowrite32(0, bdma_chan->regs + TSI721_DMAC_INTE);
-
-   tasklet_schedule(&bdma_chan->tasklet);
+   if (bdma_chan->active)
+   tasklet_schedule(&bdma_chan->tasklet);
 }
 
 #ifdef CONFIG_PCI_MSI
@@ -562,7 +562,7 @@ static int tsi721_alloc_chan_resources(s
}
 #endif /* CONFIG_PCI_MSI */
 
-   tasklet_enable(&bdma_chan->tasklet);
+   bdma_chan->active = true;
tsi721_bdma_interrupt_enable(bdma_chan, 1);
 
return bdma_chan->bd_num - 1;
@@ -576,9 +576,7 @@ err_out:
 static void tsi721_free_chan_resources(struct dma_chan *dchan)
 {
struct tsi721_bdma_chan *bdma_chan = to_tsi721_chan(dchan);
-#ifdef CONFIG_PCI_MSI
struct tsi721_device *priv = to_tsi721(dchan->device);
-#endif
LIST_HEAD(list);
 
dev_dbg(dchan->device->dev, "%s: Entry\n", __func__);
@@ -589,14 +587,25 @@ static void tsi721_free_chan_resources(s
BUG_ON(!list_empty(&bdma_chan->active_list));
BUG_ON(!list_empty(&bdma_chan->queue));
 
-   tasklet_disable(&bdma_chan->tasklet);
+   tsi721_bdma_interrupt_enable(bdma_chan, 0);
+   bdma_chan->active = false;
+
+#ifdef CONFIG_PCI_MSI
+   if (priv->flags & TSI721_USING_MSIX) {
+   synchronize_irq(priv->msix[TSI721_VECT_DMA0_DONE +
+  bdma_chan->id].vector);
+   synchronize_irq(priv->msix[TSI721_VECT_DMA0_INT +
+  bdma_chan->id].vector);
+   } else
+#endif
+   synchronize_irq(priv->pdev->irq);
+
+   tasklet_kill(&bdma_chan->tasklet);
 
spin_lock_bh(&bdma_chan->lock);
list_splice_init(&bdma_chan->free_list, &list);
spin_unlock_bh(&bdma_chan->lock);
 
-   tsi721_bdma_interrupt_enable(bdma_chan, 0);
-
 #ifdef CONFIG_PCI_MSI
if (priv->flags & TSI721_USING_MSIX) {
free_irq(priv->msix[TSI721_VECT_DMA0_DONE +
@@ -790,6 +799,7 @@ int tsi721_register_dma(struct tsi721_de
bdma_chan->dchan.cookie = 1;
bdma_chan->dchan.chan_id = i;
bdma_chan->id = i;
+   bdma_chan->active = false;
 
spin_lock_init(&bdma_chan->lock);
 
@@ -799,7 +809,6 @@ int tsi721_register_dma(struct tsi721_de
 
tasklet_init(&bdma_chan->tasklet, tsi721_dma_tasklet,
 (unsigned long)bdma_chan);
-   tasklet_disable(&bdma_chan->tasklet);
list_add_tail(&bdma_chan->dchan.device_node,
  &mport->dma.channels);
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 010/149] tun: remove bogus hardware vlan acceleration flags from vlan_features

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Fernando Luis Vazquez Cao 

[ Upstream commit 6671b2240c54585d4afb5286a29f1569fe5e40a8 ]

Even though only the outer vlan tag can be HW accelerated in the transmission
path, in the TUN/TAP driver vlan_features mirrors hw_features, which happens
to have the NETIF_F_HW_VLAN_?TAG_TX flags set. Because of this, during packet
tranmisssion through a stacked vlan device dev_hard_start_xmit, (incorrectly)
assuming that the vlan device supports hardware vlan acceleration, does not
add the vlan header to the skb payload and the inner vlan tags are lost
(vlan_tci contains the outer vlan tag when userspace reads the packet from
the tap device).

Signed-off-by: Fernando Luis Vazquez Cao 
Signed-off-by: Toshiaki Makita 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/tun.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1651,7 +1651,9 @@ static int tun_set_iff(struct net *net,
   TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
   NETIF_F_HW_VLAN_STAG_TX;
dev->features = dev->hw_features;
-   dev->vlan_features = dev->features;
+   dev->vlan_features = dev->features &
+~(NETIF_F_HW_VLAN_CTAG_TX |
+  NETIF_F_HW_VLAN_STAG_TX);
 
INIT_LIST_HEAD(&tun->disabled);
err = tun_attach(tun, file, false);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 022/149] macvlan: Add support for always_on offload features

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Vlad Yasevich 

[ Upstream commit 8b4703e9bd1172a5f8244276ebb94302e6153e26 ]

Macvlan currently inherits all of its features from the lower
device.  When lower device disables offload support, this causes
macvlan to disable offload support as well.  This causes
performance regression when using macvlan/macvtap in bridge
mode.

It can be easily demonstrated by creating 2 namespaces using
macvlan in bridge mode and running netperf between them:

MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () 
port 0 AF_INET
Recv   SendSend
Socket Socket  Message  Elapsed
Size   SizeSize Time Throughput
bytes  bytes   bytessecs.10^6bits/sec

 87380  16384  1638420.001204.61

To restore the performance, we add software offload features
to the list of "always_on" features for macvlan.  This way
when a namespace or a guest using macvtap initially sends a
packet, this packet will not be segmented at macvlan level.
It will only be segmented when macvlan sends the packet
to the lower device.

MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () 
port 0 AF_INET
Recv   SendSend
Socket Socket  Message  Elapsed
Size   SizeSize Time Throughput
bytes  bytes   bytessecs.10^6bits/sec

 87380  16384  1638420.005507.35

Fixes: 6acf54f1cf0a6747bac9fea26f34cfc5a9029523 (macvtap: Add support of packet 
capture on macvtap device.)
Fixes: 797f87f83b60685ff8a13fa0572d2f10393c50d3 (macvlan: fix netdev feature 
propagation from lower device)
CC: Florian Westphal 
CC: Christian Borntraeger 
CC: Jason Wang 
CC: Michael S. Tsirkin 
Tested-by: Christian Borntraeger 
Signed-off-by: Vlad Yasevich 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/macvlan.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -507,6 +507,9 @@ static int macvlan_change_mtu(struct net
 static struct lock_class_key macvlan_netdev_xmit_lock_key;
 static struct lock_class_key macvlan_netdev_addr_lock_key;
 
+#define ALWAYS_ON_FEATURES \
+   (NETIF_F_SG | NETIF_F_GEN_CSUM | NETIF_F_GSO_SOFTWARE | NETIF_F_LLTX)
+
 #define MACVLAN_FEATURES \
(NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
@@ -540,7 +543,7 @@ static int macvlan_init(struct net_devic
dev->state  = (dev->state & ~MACVLAN_STATE_MASK) |
  (lowerdev->state & MACVLAN_STATE_MASK);
dev->features   = lowerdev->features & MACVLAN_FEATURES;
-   dev->features   |= NETIF_F_LLTX;
+   dev->features   |= ALWAYS_ON_FEATURES;
dev->gso_max_size   = lowerdev->gso_max_size;
dev->iflink = lowerdev->ifindex;
dev->hard_header_len= lowerdev->hard_header_len;
@@ -700,7 +703,7 @@ static netdev_features_t macvlan_fix_fea
features = netdev_increment_features(vlan->lowerdev->features,
 features,
 mask);
-   features |= NETIF_F_LLTX;
+   features |= ALWAYS_ON_FEATURES;
 
return features;
 }


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 012/149] neigh: recompute reachabletime before returning from neigh_periodic_work()

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Duan Jiong 

[ Upstream commit feff9ab2e7fa773b6a3965f77375fe89f7fd85cf ]

If the neigh table's entries is less than gc_thresh1, the function
will return directly, and the reachabletime will not be recompute,
so the reachabletime can be guessed.

Signed-off-by: Duan Jiong 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/core/neighbour.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -764,9 +764,6 @@ static void neigh_periodic_work(struct w
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
 
-   if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
-   goto out;
-
/*
 *  periodically recompute ReachableTime from random function
 */
@@ -779,6 +776,9 @@ static void neigh_periodic_work(struct w
neigh_rand_reach_time(p->base_reachable_time);
}
 
+   if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+   goto out;
+
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 009/149] veth: Fix vlan_features so as to be able to use stacked vlan interfaces

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Toshiaki Makita 

[ Upstream commit 8d0d21f4053c07714802cbe8b1fe26913ec296cc ]

Even if we create a stacked vlan interface such as veth0.10.20, it sends
single tagged frames (tagged with only vid 10).
Because vlan_features of a veth interface has the
NETIF_F_HW_VLAN_[CTAG/STAG]_TX bits, veth0.10 also has that feature, so
dev_hard_start_xmit(veth0.10) doesn't call __vlan_put_tag() and
vlan_dev_hard_start_xmit(veth0.10) overwrites vlan_tci.
This prevents us from using a combination of 802.1ad and 802.1Q
in containers, etc.

Signed-off-by: Toshiaki Makita 
Acked-by: Flavio Leitner 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/veth.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -285,7 +285,8 @@ static void veth_setup(struct net_device
dev->ethtool_ops = &veth_ethtool_ops;
dev->features |= NETIF_F_LLTX;
dev->features |= VETH_FEATURES;
-   dev->vlan_features = dev->features;
+   dev->vlan_features = dev->features &
+~(NETIF_F_HW_VLAN_CTAG_TX | 
NETIF_F_HW_VLAN_STAG_TX);
dev->destructor = veth_dev_free;
 
dev->hw_features = VETH_FEATURES;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 024/149] mac80211: fix AP powersave TX vs. wakeup race

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Emmanuel Grumbach 

commit 1d147bfa64293b2723c4fec50922168658e613ba upstream.

There is a race between the TX path and the STA wakeup: while
a station is sleeping, mac80211 buffers frames until it wakes
up, then the frames are transmitted. However, the RX and TX
path are concurrent, so the packet indicating wakeup can be
processed while a packet is being transmitted.

This can lead to a situation where the buffered frames list
is emptied on the one side, while a frame is being added on
the other side, as the station is still seen as sleeping in
the TX path.

As a result, the newly added frame will not be send anytime
soon. It might be sent much later (and out of order) when the
station goes to sleep and wakes up the next time.

Additionally, it can lead to the crash below.

Fix all this by synchronising both paths with a new lock.
Both path are not fastpath since they handle PS situations.

In a later patch we'll remove the extra skb queue locks to
reduce locking overhead.

BUG: unable to handle kernel
NULL pointer dereference at 00b0
IP: [] ieee80211_report_used_skb+0x11/0x3e0 [mac80211]
*pde = 
Oops:  [#1] SMP DEBUG_PAGEALLOC
EIP: 0060:[] EFLAGS: 00210282 CPU: 1
EIP is at ieee80211_report_used_skb+0x11/0x3e0 [mac80211]
EAX: e5900da0 EBX:  ECX: 0001 EDX: 
ESI: e41d00c0 EDI: e5900da0 EBP: ebe458e4 ESP: ebe458b0
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
CR0: 8005003b CR2: 00b0 CR3: 25a78000 CR4: 000407d0
DR0:  DR1:  DR2:  DR3: 
DR6: 0ff0 DR7: 0400
Process iperf (pid: 3934, ti=ebe44000 task=e757c0b0 task.ti=ebe44000)
iwlwifi :02:00.0: I iwl_pcie_enqueue_hcmd Sending command LQ_CMD (#4e), 
seq: 0x0903, 92 bytes at 3[3]:9
Stack:
 e403b32c ebe458c4 0022 00200286 e403b338 ebe458cc c10960bb e5900da0
 ff76a6ec ebe458d8  e41d00c0 e5900da0 ebe458f0 ff6f1b75 e403b210
 ebe4598c ff723dc1  ff76a6ec e597c978 e403b758 0002 0002
Call Trace:
 [] ieee80211_free_txskb+0x15/0x20 [mac80211]
 [] invoke_tx_handlers+0x1661/0x1780 [mac80211]
 [] ieee80211_tx+0x75/0x100 [mac80211]
 [] ieee80211_xmit+0x8f/0xc0 [mac80211]
 [] ieee80211_subif_start_xmit+0x4fe/0xe20 [mac80211]
 [] dev_hard_start_xmit+0x450/0x950
 [] sch_direct_xmit+0xa9/0x250
 [] __qdisc_run+0x4b/0x150
 [] dev_queue_xmit+0x2c2/0xca0

Reported-by: Yaara Rozenblum 
Signed-off-by: Emmanuel Grumbach 
Reviewed-by: Stanislaw Gruszka 
[reword commit log, use a separate lock]
Signed-off-by: Johannes Berg 
Signed-off-by: Greg Kroah-Hartman 

---
 net/mac80211/sta_info.c |4 
 net/mac80211/sta_info.h |7 +++
 net/mac80211/tx.c   |   15 +++
 3 files changed, 22 insertions(+), 4 deletions(-)

--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -340,6 +340,7 @@ struct sta_info *sta_info_alloc(struct i
return NULL;
 
spin_lock_init(&sta->lock);
+   spin_lock_init(&sta->ps_lock);
INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
@@ -1073,6 +1074,8 @@ void ieee80211_sta_ps_deliver_wakeup(str
 
skb_queue_head_init(&pending);
 
+   /* sync with ieee80211_tx_h_unicast_ps_buf */
+   spin_lock(&sta->ps_lock);
/* Send all buffered frames to the station */
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
int count = skb_queue_len(&pending), tmp;
@@ -1092,6 +1095,7 @@ void ieee80211_sta_ps_deliver_wakeup(str
}
 
ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta);
+   spin_unlock(&sta->ps_lock);
 
/* This station just woke up and isn't aware of our SMPS state */
if (!ieee80211_smps_is_restrictive(sta->known_smps_mode,
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -245,6 +245,7 @@ struct sta_ampdu_mlme {
  * @drv_unblock_wk: used for driver PS unblocking
  * @listen_interval: listen interval of this station, when we're acting as AP
  * @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly
+ * @ps_lock: used for powersave (when mac80211 is the AP) related locking
  * @ps_tx_buf: buffers (per AC) of frames to transmit to this station
  * when it leaves power saving state or polls
  * @tx_filtered: buffers (per AC) of frames we already tried to
@@ -330,10 +331,8 @@ struct sta_info {
/* use the accessors defined below */
unsigned long _flags;
 
-   /*
-* STA powersave frame queues, no more than the internal
-* locking required.
-*/
+   /* STA powersave lock and frame queues */
+   spinlock_t ps_lock;
struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS];
struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS];
unsigned long driver_buffered_tids;
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -

[PATCH 3.13 007/149] sched: Fix double normalization of vruntime

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: George McCollister 

commit 791c9e0292671a3bfa95286bb5c08129d8605618 upstream.

dequeue_entity() is called when p->on_rq and sets se->on_rq = 0
which appears to guarentee that the !se->on_rq condition is met.
If the task has done set_current_state(TASK_INTERRUPTIBLE) without
schedule() the second condition will be met and vruntime will be
incorrectly adjusted twice.

In certain cases this can result in the task's vruntime never increasing
past the vruntime of other tasks on the CFS' run queue, starving them of
CPU time.

This patch changes switched_from_fair() to use !p->on_rq instead of
!se->on_rq.

I'm able to cause a task with a priority of 120 to starve all other
tasks with the same priority on an ARM platform running 3.2.51-rt72
PREEMPT RT by writing one character at time to a serial tty (16550 UART)
in a tight loop. I'm also able to verify making this change corrects the
problem on that platform and kernel version.

Signed-off-by: George McCollister 
Signed-off-by: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/1392767811-28916-1-git-send-email-george.mccollis...@gmail.com
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 kernel/sched/fair.c |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7012,15 +7012,15 @@ static void switched_from_fair(struct rq
struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
/*
-* Ensure the task's vruntime is normalized, so that when its
+* Ensure the task's vruntime is normalized, so that when it's
 * switched back to the fair class the enqueue_entity(.flags=0) will
 * do the right thing.
 *
-* If it was on_rq, then the dequeue_entity(.flags=0) will already
-* have normalized the vruntime, if it was !on_rq, then only when
+* If it's on_rq, then the dequeue_entity(.flags=0) will already
+* have normalized the vruntime, if it's !on_rq, then only when
 * the task is sleeping will it still have non-normalized vruntime.
 */
-   if (!se->on_rq && p->state != TASK_RUNNING) {
+   if (!p->on_rq && p->state != TASK_RUNNING) {
/*
 * Fix up our vruntime so that the current sleep doesn't
 * cause 'unlimited' sleep bonus.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 023/149] mac80211: send control port protocol frames to the VO queue

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Felix Fietkau 

commit 1bf4bbb4024dcdab5e57634dd8ae1072d42a53ac upstream.

Improves reliability of wifi connections with WPA, since authentication
frames are prioritized over normal traffic and also typically exempt
from aggregation.

Signed-off-by: Felix Fietkau 
Signed-off-by: Johannes Berg 
Signed-off-by: Greg Kroah-Hartman 

---
 net/mac80211/wme.c |5 +
 1 file changed, 5 insertions(+)

--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -153,6 +153,11 @@ u16 ieee80211_select_queue(struct ieee80
return IEEE80211_AC_BE;
}
 
+   if (skb->protocol == sdata->control_port_protocol) {
+   skb->priority = 7;
+   return ieee80211_downgrade_queue(sdata, skb);
+   }
+
/* use the data classifier to determine what 802.1d tag the
 * data frame has */
skb->priority = cfg80211_classify8021d(skb);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 015/149] ipv4: ipv6: better estimate tunnel header cut for correct ufo handling

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Hannes Frederic Sowa 

[ Upstream commit 91a48a2e85a3b70ce10ead34b4ab5347f8d215c9 ]

Currently the UFO fragmentation process does not correctly handle inner
UDP frames.

(The following tcpdumps are captured on the parent interface with ufo
disabled while tunnel has ufo enabled, 2000 bytes payload, mtu 1280,
both sit device):

IPv6:
16:39:10.031613 IP (tos 0x0, ttl 64, id 3208, offset 0, flags [DF], proto IPv6 
(41), length 1300)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload 
length: 1240) 2001::1 > 2001::8: frag (0x0001:0|1232) 44883 > distinct: 
UDP, length 2000
16:39:10.031709 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPv6 
(41), length 844)
192.168.122.151 > 1.1.1.1: IP6 (hlim 64, next-header Fragment (44) payload 
length: 784) 2001::1 > 2001::8: frag (0x0001:0|776) 58979 > 46366: UDP, 
length 5471

We can see that fragmentation header offset is not correctly updated.
(fragmentation id handling is corrected by 916e4cf46d0204 ("ipv6: reuse
ip6_frag_id from ip6_ufo_append_data")).

IPv4:
16:39:57.737761 IP (tos 0x0, ttl 64, id 3209, offset 0, flags [DF], proto IPIP 
(4), length 1296)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57034, offset 0, flags 
[none], proto UDP (17), length 1276)
192.168.99.1.35961 > 192.168.99.2.distinct: UDP, length 2000
16:39:57.738028 IP (tos 0x0, ttl 64, id 3210, offset 0, flags [DF], proto IPIP 
(4), length 792)
192.168.122.151 > 1.1.1.1: IP (tos 0x0, ttl 64, id 57035, offset 0, flags 
[none], proto UDP (17), length 772)
192.168.99.1.13531 > 192.168.99.2.20653: UDP, length 51109

In this case fragmentation id is incremented and offset is not updated.

First, I aligned inet_gso_segment and ipv6_gso_segment:
* align naming of flags
* ipv6_gso_segment: setting skb->encapsulation is unnecessary, as we
  always ensure that the state of this flag is left untouched when
  returning from upper gso segmenation function
* ipv6_gso_segment: move skb_reset_inner_headers below updating the
  fragmentation header data, we don't care for updating fragmentation
  header data
* remove currently unneeded comment indicating skb->encapsulation might
  get changed by upper gso_segment callback (gre and udp-tunnel reset
  encapsulation after segmentation on each fragment)

If we encounter an IPIP or SIT gso skb we now check for the protocol ==
IPPROTO_UDP and that we at least have already traversed another ip(6)
protocol header.

The reason why we have to special case GSO_IPIP and GSO_SIT is that
we reset skb->encapsulation to 0 while skb_mac_gso_segment the inner
protocol of GSO_UDP_TUNNEL or GSO_GRE packets.

Reported-by: Wolfgang Walter 
Cc: Cong Wang 
Cc: Tom Herbert 
Cc: Eric Dumazet 
Signed-off-by: Hannes Frederic Sowa 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv4/af_inet.c |7 +--
 net/ipv6/ip6_offload.c |   20 
 2 files changed, 17 insertions(+), 10 deletions(-)

--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1299,8 +1299,11 @@ static struct sk_buff *inet_gso_segment(
 
segs = ERR_PTR(-EPROTONOSUPPORT);
 
-   /* Note : following gso_segment() might change skb->encapsulation */
-   udpfrag = !skb->encapsulation && proto == IPPROTO_UDP;
+   if (skb->encapsulation &&
+   skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+   udpfrag = proto == IPPROTO_UDP && encap;
+   else
+   udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
 
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
-   bool tunnel;
+   bool encap, udpfrag;
int nhoff;
 
if (unlikely(skb_shinfo(skb)->gso_type &
@@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h
goto out;
 
-   tunnel = SKB_GSO_CB(skb)->encap_level > 0;
-   if (tunnel)
+   encap = SKB_GSO_CB(skb)->encap_level > 0;
+   if (encap)
features = skb->dev->hw_enc_features & netif_skb_features(skb);
SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
 
@@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(
 
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
 
+   if (skb->encapsulation &&
+   skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+   udpfrag = proto == IPPROTO_UDP && encap;
+   else
+   udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transpo

[PATCH 3.13 016/149] sfc: check for NULL efx->ptp_data in efx_ptp_event

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Edward Cree 

[ Upstream commit 8f355e5cee63c2c0c145d8206c4245d0189f47ff ]

If we receive a PTP event from the NIC when we haven't set up PTP state
in the driver, we attempt to read through a NULL pointer efx->ptp_data,
triggering a panic.

Signed-off-by: Edward Cree 
Acked-by: Shradha Shah 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/ethernet/sfc/ptp.c |7 +++
 1 file changed, 7 insertions(+)

--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -1360,6 +1360,13 @@ void efx_ptp_event(struct efx_nic *efx,
struct efx_ptp_data *ptp = efx->ptp_data;
int code = EFX_QWORD_FIELD(*ev, MCDI_EVENT_CODE);
 
+   if (!ptp) {
+   if (net_ratelimit())
+   netif_warn(efx, drv, efx->net_dev,
+  "Received PTP event but PTP not set up\n");
+   return;
+   }
+
if (!ptp->enabled)
return;
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 014/149] ipv6: reuse ip6_frag_id from ip6_ufo_append_data

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Hannes Frederic Sowa 

[ Upstream commit 916e4cf46d0204806c062c8c6c4d1f633852c5b6 ]

Currently we generate a new fragmentation id on UFO segmentation. It
is pretty hairy to identify the correct net namespace and dst there.
Especially tunnels use IFF_XMIT_DST_RELEASE and thus have no skb_dst
available at all.

This causes unreliable or very predictable ipv6 fragmentation id
generation while segmentation.

Luckily we already have pregenerated the ip6_frag_id in
ip6_ufo_append_data and can use it here.

Signed-off-by: Hannes Frederic Sowa 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv6/udp_offload.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment
fptr = (struct frag_hdr *)(skb_network_header(skb) + 
unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
-   ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+   fptr->identification = skb_shinfo(skb)->ip6_frag_id;
 
/* Fragment the skb. ipv6 header and the remaining fields of the
 * fragment header are updated in ipv6_gso_segment()


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 026/149] mac80211: fix association to 20/40 MHz VHT networks

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Johannes Berg 

commit cb664981607a6b5b3d670ad57bbda893b2528d96 upstream.

When a VHT network uses 20 or 40 MHz as per the HT operation
information, the channel center frequency segment 0 field in
the VHT operation information is reserved, so ignore it.

This fixes association with such networks when the AP puts 0
into the field, previously we'd disconnect due to an invalid
channel with the message
wlan0: AP VHT information is invalid, disable VHT

Fixes: f2d9d270c15ae ("mac80211: support VHT association")
Reported-by: Tim Nelson 
Signed-off-by: Johannes Berg 
Signed-off-by: Greg Kroah-Hartman 

---
 net/mac80211/mlme.c |1 +
 1 file changed, 1 insertion(+)

--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -222,6 +222,7 @@ ieee80211_determine_chantype(struct ieee
switch (vht_oper->chan_width) {
case IEEE80211_VHT_CHANWIDTH_USE_HT:
vht_chandef.width = chandef->width;
+   vht_chandef.center_freq1 = chandef->center_freq1;
break;
case IEEE80211_VHT_CHANWIDTH_80MHZ:
vht_chandef.width = NL80211_CHAN_WIDTH_80;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 025/149] mac80211: dont validate unchanged AP bandwidth while tracking

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Johannes Berg 

commit 963a1852fbac4f75a2d938fa2e734ef1e6d4c044 upstream.

The MLME code in mac80211 must track whether or not the AP changed
bandwidth, but if there's no change while tracking it shouldn't do
anything, otherwise regulatory updates can make it impossible to
connect to certain APs if the regulatory database doesn't match the
information from the AP. See the precise scenario described in the
code.

This still leaves some possible problems with CSA or if the AP
actually changed bandwidth, but those cases are less common and
won't completely prevent using it.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=70881

Reported-and-tested-by: Nate Carlson 
Signed-off-by: Johannes Berg 
Signed-off-by: Greg Kroah-Hartman 

---
 net/mac80211/mlme.c |   22 ++
 1 file changed, 22 insertions(+)

--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -271,6 +271,28 @@ ieee80211_determine_chantype(struct ieee
ret = 0;
 
 out:
+   /*
+* When tracking the current AP, don't do any further checks if the
+* new chandef is identical to the one we're currently using for the
+* connection. This keeps us from playing ping-pong with regulatory,
+* without it the following can happen (for example):
+*  - connect to an AP with 80 MHz, world regdom allows 80 MHz
+*  - AP advertises regdom US
+*  - CRDA loads regdom US with 80 MHz prohibited (old database)
+*  - the code below detects an unsupported channel, downgrades, and
+*we disconnect from the AP in the caller
+*  - disconnect causes CRDA to reload world regdomain and the game
+*starts anew.
+* (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
+*
+* It seems possible that there are still scenarios with CSA or real
+* bandwidth changes where a this could happen, but those cases are
+* less common and wouldn't completely prevent using the AP.
+*/
+   if (tracking &&
+   cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef))
+   return ret;
+
/* don't print the message below for VHT mismatch if VHT is disabled */
if (ret & IEEE80211_STA_DISABLE_VHT)
vht_chandef = *chandef;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 017/149] sch_tbf: Fix potential memory leak in tbf_change().

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Hiroaki SHIMODA 

[ Upstream commit 724b9e1d75ab3401aaa081bd4efb440c1b3509db ]

The allocated child qdisc is not freed in error conditions.
Defer the allocation after user configuration turns out to be
valid and acceptable.

Fixes: cc106e441a63b ("net: sched: tbf: fix the calculation of max_size")
Signed-off-by: Hiroaki SHIMODA 
Cc: Yang Yingliang 
Acked-by: Eric Dumazet 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/sched/sch_tbf.c |   24 
 1 file changed, 12 insertions(+), 12 deletions(-)

--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -332,18 +332,6 @@ static int tbf_change(struct Qdisc *sch,
qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
  tb[TCA_TBF_PTAB]));
 
-   if (q->qdisc != &noop_qdisc) {
-   err = fifo_set_limit(q->qdisc, qopt->limit);
-   if (err)
-   goto done;
-   } else if (qopt->limit > 0) {
-   child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
-   if (IS_ERR(child)) {
-   err = PTR_ERR(child);
-   goto done;
-   }
-   }
-
buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
 
@@ -377,6 +365,18 @@ static int tbf_change(struct Qdisc *sch,
goto done;
}
 
+   if (q->qdisc != &noop_qdisc) {
+   err = fifo_set_limit(q->qdisc, qopt->limit);
+   if (err)
+   goto done;
+   } else if (qopt->limit > 0) {
+   child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+   if (IS_ERR(child)) {
+   err = PTR_ERR(child);
+   goto done;
+   }
+   }
+
sch_tree_lock(sch);
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 020/149] ip_tunnel:multicast process cause panic due to skb->_skb_refdst NULL pointer

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Xin Long 

[ Upstream commit 10ddceb22bab11dab10ba645c7df2e4a8e7a5db5 ]

when ip_tunnel process multicast packets, it may check if the packet is looped
back packet though 'rt_is_output_route(skb_rtable(skb))' in ip_tunnel_rcv(),
but before that , skb->_skb_refdst has been dropped in iptunnel_pull_header(),
so which leads to a panic.

fix the bug: https://bugzilla.kernel.org/show_bug.cgi?id=70681

Signed-off-by: Xin Long 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 net/ipv4/ip_tunnel_core.c |1 -
 1 file changed, 1 deletion(-)

--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -109,7 +109,6 @@ int iptunnel_pull_header(struct sk_buff
secpath_reset(skb);
if (!skb->l4_rxhash)
skb->rxhash = 0;
-   skb_dst_drop(skb);
skb->vlan_tci = 0;
skb_set_queue_mapping(skb, 0);
skb->pkt_type = PACKET_HOST;


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3.13 013/149] virtio-net: alloc big buffers also when guest can receive UFO

2014-03-20 Thread Greg Kroah-Hartman
3.13-stable review patch.  If anyone has any objections, please let me know.

--

From: Jason Wang 

[ Upstream commit 0e7ede80d929ff0f830c44a543daa1acd590c749 ]

We should alloc big buffers also when guest can receive UFO
packets to let the big packets fit into guest rx buffer.

Fixes 5c5167515d80f78f6bb538492c423adcae31ad65
(virtio-net: Allow UFO feature to be set and advertised.)

Cc: Rusty Russell 
Cc: Michael S. Tsirkin 
Cc: Sridhar Samudrala 
Signed-off-by: Jason Wang 
Acked-by: Michael S. Tsirkin 
Acked-by: Rusty Russell 
Signed-off-by: David S. Miller 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/net/virtio_net.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1645,7 +1645,8 @@ static int virtnet_probe(struct virtio_d
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
-   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN))
+   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
+   virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
vi->big_packets = true;
 
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


  1   2   3   4   5   6   7   8   9   10   >