date:20130219

[PATCH 4/4] dcache: don't need to take d_lock in prepend_path()

2013-02-19 Thread Waiman Long

The d_lock was used in prepend_path() to protect dentry->d_name from
being changed under the hood. As the caller of prepend_path() has
to take the rename_lock before calling into it, there is no chance
that d_name will be changed. The d_lock lock is only needed when the
rename_lock is not taken.

Signed-off-by: Waiman Long 
---
 fs/dcache.c |3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index b1487e2..0e911fc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2547,6 +2547,7 @@ static int prepend_name(char **buffer, int *buflen, 
struct qstr *name)
  * @buflen: pointer to buffer length
  *
  * Caller holds the rename_lock.
+ * There is no need to lock the dentry as its name cannot be changed.
  */
 static int prepend_path(const struct path *path,
const struct path *root,
@@ -2573,9 +2574,7 @@ static int prepend_path(const struct path *path,
}
parent = dentry->d_parent;
prefetch(parent);
-   spin_lock(>d_lock);
error = prepend_name(buffer, buflen, >d_name);
-   spin_unlock(>d_lock);
if (!error)
error = prepend(buffer, buflen, "/", 1);
if (error)
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/4] dcache: change rename_lock to a sequence read/write lock

2013-02-19 Thread Waiman Long

The d_path() and related kernel functions currently take a writer
lock on rename_lock because they need to follow pointers. By changing
rename_lock to be the new sequence read/write lock, a reader lock
can be taken and multiple d_path() threads can proceed concurrently
without blocking each other.

It is unlikely that the frequency of filesystem changes and d_path()
name lookup will be high enough to cause writer starvation, the current
limitation of the read/write lock should be acceptable in that case.

All the sites where rename_lock is referenced were modified to use the
sequence read/write lock declaration and access functions.

Signed-off-by: Waiman Long 
---
 fs/autofs4/waitq.c |6 ++--
 fs/ceph/mds_client.c   |4 +-
 fs/cifs/dir.c  |4 +-
 fs/dcache.c|   87 ---
 fs/nfs/namespace.c |6 ++--
 include/linux/dcache.h |4 +-
 kernel/auditsc.c   |5 ++-
 7 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 03bc1d3..95eee02 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -199,7 +199,7 @@ rename_retry:
buf = *name;
len = 0;
 
-   seq = read_seqbegin(_lock);
+   seq = read_seqrwbegin(_lock);
rcu_read_lock();
spin_lock(>fs_lock);
for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
@@ -208,7 +208,7 @@ rename_retry:
if (!len || --len > NAME_MAX) {
spin_unlock(>fs_lock);
rcu_read_unlock();
-   if (read_seqretry(_lock, seq))
+   if (read_seqrwretry(_lock, seq))
goto rename_retry;
return 0;
}
@@ -224,7 +224,7 @@ rename_retry:
}
spin_unlock(>fs_lock);
rcu_read_unlock();
-   if (read_seqretry(_lock, seq))
+   if (read_seqrwretry(_lock, seq))
goto rename_retry;
 
return len;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 9165eb8..da6bd2c 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1458,7 +1458,7 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int 
*plen, u64 *base,
 
 retry:
len = 0;
-   seq = read_seqbegin(_lock);
+   seq = read_seqrwbegin(_lock);
rcu_read_lock();
for (temp = dentry; !IS_ROOT(temp);) {
struct inode *inode = temp->d_inode;
@@ -1508,7 +1508,7 @@ retry:
temp = temp->d_parent;
}
rcu_read_unlock();
-   if (pos != 0 || read_seqretry(_lock, seq)) {
+   if (pos != 0 || read_seqrwretry(_lock, seq)) {
pr_err("build_path did not end path lookup where "
   "expected, namelen is %d, pos is %d\n", len, pos);
/* presumably this is only possible if racing with a
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 8719bbe..4842523 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -96,7 +96,7 @@ build_path_from_dentry(struct dentry *direntry)
dfsplen = 0;
 cifs_bp_rename_retry:
namelen = dfsplen;
-   seq = read_seqbegin(_lock);
+   seq = read_seqrwbegin(_lock);
rcu_read_lock();
for (temp = direntry; !IS_ROOT(temp);) {
namelen += (1 + temp->d_name.len);
@@ -136,7 +136,7 @@ cifs_bp_rename_retry:
}
}
rcu_read_unlock();
-   if (namelen != dfsplen || read_seqretry(_lock, seq)) {
+   if (namelen != dfsplen || read_seqrwretry(_lock, seq)) {
cFYI(1, "did not end path lookup where expected. namelen=%d "
"dfsplen=%d", namelen, dfsplen);
/* presumably this is only possible if racing with a rename
diff --git a/fs/dcache.c b/fs/dcache.c
index 20cc789..b1487e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -82,7 +83,7 @@ int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
-__cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
+__cacheline_aligned_in_smp DEFINE_SEQRWLOCK(rename_lock);
 
 EXPORT_SYMBOL(rename_lock);
 
@@ -1030,7 +1031,7 @@ static struct dentry *try_to_ascend(struct dentry *old, 
int locked, unsigned seq
 */
if (new != old->d_parent ||
 (old->d_flags & DCACHE_DENTRY_KILLED) ||
-(!locked && read_seqretry(_lock, seq))) {
+(!locked && read_seqrwretry(_lock, seq))) {
spin_unlock(>d_lock);
new = NULL;
}
@@ -1059,7 +1060,7 @@ int have_submounts(struct dentry *parent)
unsigned seq;
int locked = 0;
 
-   seq = read_seqbegin(_lock);
+   seq = read_seqrwbegin(_lock);
 again:
this_parent = parent;
 
@@ -1102,23 +1103,23 @@ resume:
goto resume;
}

[PATCH 38/81] drivers/rtc/rtc-pl031.c: fix the missing operation on enable

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Haojian Zhuang 

commit e7e034e18a0ab6bafb2425c3242cac311164f4d6 upstream.

The RTC control register should be enabled in the process of
initializing.

Without this patch, I failed to enable RTC in Hisilicon Hi3620 SoC.  The
register mapping section in RTC is always read as zero.  So I doubt that
ST guys may already enable this register in bootloader.  So they won't
meet this issue.

Signed-off-by: Haojian Zhuang 
Cc: Srinidhi Kasagar 
Cc: Linus Walleij 
Cc: Alessandro Zummo 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
[ herton: adjust context ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/rtc/rtc-pl031.c |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index cc05339..c42054b 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -44,6 +44,7 @@
 #define RTC_YMR0x34/* Year match register */
 #define RTC_YLR0x38/* Year data load register */
 
+#define RTC_CR_EN  (1 << 0)/* counter enable bit */
 #define RTC_CR_CWEN(1 << 26)   /* Clockwatch enable bit */
 
 #define RTC_TCR_EN (1 << 1) /* Periodic timer enable bit */
@@ -304,7 +305,7 @@ static int pl031_probe(struct amba_device *adev, const 
struct amba_id *id)
int ret;
struct pl031_local *ldata;
struct rtc_class_ops *ops = id->data;
-   unsigned long time;
+   unsigned long time, data;
 
ret = amba_request_regions(adev, NULL);
if (ret)
@@ -331,10 +332,11 @@ static int pl031_probe(struct amba_device *adev, const 
struct amba_id *id)
dev_dbg(>dev, "designer ID = 0x%02x\n", ldata->hw_designer);
dev_dbg(>dev, "revision = 0x%01x\n", ldata->hw_revision);
 
+   data = readl(ldata->base + RTC_CR);
/* Enable the clockwatch on ST Variants */
if (ldata->hw_designer == AMBA_VENDOR_ST)
-   writel(readl(ldata->base + RTC_CR) | RTC_CR_CWEN,
-  ldata->base + RTC_CR);
+   data |= RTC_CR_CWEN;
+   writel(data | RTC_CR_EN, ldata->base + RTC_CR);
 
/*
 * On ST PL031 variants, the RTC reset value does not provide correct
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Should a swapped out page be deleted from swap cache?

2013-02-19 Thread Hugh Dickins

On Tue, 19 Feb 2013, Will Huck wrote:
> 
> Another question:

I don't see the connection to deleting a swapped out page from swap cache.

> 
> Why kernel memory mapping use direct mapping instead of kmalloc/vmalloc which
> will setup mapping on demand?

I may misunderstand you, and "kernel memory mapping".

kmalloc does not set up a mapping, it uses the direct mapping already set up.

It would be circular if the basic page allocation primitives used kmalloc,
since kmalloc relies on the basic page allocation primitives.

vmalloc is less efficient than using the direct mapping (repeated setup
and teardown, no use of hugepages), but necessary when you want a larger
virtual array than you're likely to find from the buddy allocator.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 34/81] rtlwifi: Fix scheduling while atomic bug

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Larry Finger 

commit a5ffbe0a1993a27072742ef7db6cf9839956fce9 upstream.

Kernel commits 41affd5 and 6539306 changed the locking in rtl_lps_leave()
from a spinlock to a mutex by doing the calls indirectly from a work queue
to reduce the time that interrupts were disabled. This change was fine for
most systems; however a scheduling while atomic bug was reported in
https://bugzilla.redhat.com/show_bug.cgi?id=903881. The backtrace indicates
that routine rtl_is_special(), which calls rtl_lps_leave() in three places
was entered in atomic context. These direct calls are replaced by putting a
request on the appropriate work queue.

Signed-off-by: Larry Finger 
Reported-and-tested-by: Nathaniel Doherty 
Cc: Nathaniel Doherty 
Cc: Stanislaw Gruszka 
Signed-off-by: John W. Linville 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/wireless/rtlwifi/base.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/rtlwifi/base.c 
b/drivers/net/wireless/rtlwifi/base.c
index f4c852c..4110a6d 100644
--- a/drivers/net/wireless/rtlwifi/base.c
+++ b/drivers/net/wireless/rtlwifi/base.c
@@ -980,7 +980,8 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct 
sk_buff *skb, u8 is_tx)
 is_tx ? "Tx" : "Rx");
 
if (is_tx) {
-   rtl_lps_leave(hw);
+   schedule_work(>
+ works.lps_leave_work);
ppsc->last_delaylps_stamp_jiffies =
jiffies;
}
@@ -990,7 +991,7 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct 
sk_buff *skb, u8 is_tx)
}
} else if (ETH_P_ARP == ether_type) {
if (is_tx) {
-   rtl_lps_leave(hw);
+   schedule_work(>works.lps_leave_work);
ppsc->last_delaylps_stamp_jiffies = jiffies;
}
 
@@ -1000,7 +1001,7 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct 
sk_buff *skb, u8 is_tx)
 "802.1X %s EAPOL pkt!!\n", is_tx ? "Tx" : "Rx");
 
if (is_tx) {
-   rtl_lps_leave(hw);
+   schedule_work(>works.lps_leave_work);
ppsc->last_delaylps_stamp_jiffies = jiffies;
}
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 40/81] HID: usbhid: quirk for Formosa IR receiver

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Nicholas Santos 

commit 320cde19a4e8f122b19d2df7a5c00636e11ca3fb upstream.

Patch to add the Formosa Industrial Computing, Inc. Infrared Receiver
[IR605A/Q] to hid-ids.h and hid-quirks.c.  This IR receiver causes about a 10
second timeout when the usbhid driver attempts to initialze the device.  Adding
this device to the quirks list with HID_QUIRK_NO_INIT_REPORTS removes the
delay.

Signed-off-by: Nicholas Santos 
[jkos...@suse.cz: fix ordering]
Signed-off-by: Jiri Kosina 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/hid/hid-ids.h   |3 +++
 drivers/hid/usbhid/hid-quirks.c |1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 5ddfcc7..dc48cd1 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -296,6 +296,9 @@
 #define USB_VENDOR_ID_EZKEY0x0518
 #define USB_DEVICE_ID_BTC_8193 0x0002
 
+#define USB_VENDOR_ID_FORMOSA  0x147a
+#define USB_DEVICE_ID_FORMOSA_IR_RECEIVER  0xe03e
+
 #define USB_VENDOR_ID_FREESCALE0x15A2
 #define USB_DEVICE_ID_FREESCALE_MX28   0x004F
 
diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c
index 8865fa3..1a4bc41 100644
--- a/drivers/hid/usbhid/hid-quirks.c
+++ b/drivers/hid/usbhid/hid-quirks.c
@@ -70,6 +70,7 @@ static const struct hid_blacklist {
{ USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_AXIS_295, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_DMI, USB_DEVICE_ID_DMI_ENC, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
+   { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, 
HID_QUIRK_NO_INIT_REPORTS },
{ USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, 
HID_QUIRK_NOGET },
{ USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
{ USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN, 
HID_QUIRK_NO_INIT_REPORTS },
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH RFC] video: Add Hyper-V Synthetic Video Frame Buffer Driver

2013-02-19 Thread Haiyang Zhang

> -Original Message-
> From: linux-fbdev-ow...@vger.kernel.org [mailto:linux-fbdev-
> ow...@vger.kernel.org] On Behalf Of Olaf Hering
> Sent: Tuesday, February 19, 2013 1:40 PM
> To: Haiyang Zhang
> Cc: florianschandi...@gmx.de; linux-fb...@vger.kernel.org; KY Srinivasan;
> jasow...@redhat.com; linux-kernel@vger.kernel.org;
> de...@linuxdriverproject.org
> Subject: Re: [PATCH RFC] video: Add Hyper-V Synthetic Video Frame Buffer
> Driver
> 
> On Tue, Feb 19, Haiyang Zhang wrote:
> 
> > The emulated video device is a separate device from the synthetic
> video.
> > The synthetic driver can only take control of the synthetic video, but
> not
> > the emulated video.
> 
> Please add this to the comment above.

Will do.

> > Actually, we already have a similar mechanism in ata/ata_piix.c to
> disable
> > emulated IDE drive on Hyper-V, so it won't conflict with the synthetic
> drive.
> 
> I havent read the vesafb code, but I think it can kind of give up the
> hardware, something ata_piix can not do.

In my test, the vesafb doesn't automatically give up the emulated video device,
unless I add the DMI based mechanism to let it exit on Hyper-V.

Thanks,
- Haiyang

[PATCH 41/81] kernel/resource.c: fix stack overflow in __reserve_region_with_split()

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: T Makphaibulchoke 

commit 4965f5667f36a95b41cda6638875bc992bd7d18b upstream.

Using a recursive call add a non-conflicting region in
__reserve_region_with_split() could result in a stack overflow in the case
that the recursive calls are too deep.  Convert the recursive calls to an
iterative loop to avoid the problem.

Tested on a machine containing 135 regions.  The kernel no longer panicked
with stack overflow.

Also tested with code arbitrarily adding regions with no conflict,
embedding two consecutive conflicts and embedding two non-consecutive
conflicts.

Signed-off-by: T Makphaibulchoke 
Reviewed-by: Ram Pai 
Cc: Paul Gortmaker 
Cc: Wei Yang 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Cc: Jiri Slaby 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 kernel/resource.c |   50 ++
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index e1d2b8e..7de982e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -758,6 +758,7 @@ static void __init __reserve_region_with_split(struct 
resource *root,
struct resource *parent = root;
struct resource *conflict;
struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
+   struct resource *next_res = NULL;
 
if (!res)
return;
@@ -767,21 +768,46 @@ static void __init __reserve_region_with_split(struct 
resource *root,
res->end = end;
res->flags = IORESOURCE_BUSY;
 
-   conflict = __request_resource(parent, res);
-   if (!conflict)
-   return;
+   while (1) {
 
-   /* failed, split and try again */
-   kfree(res);
+   conflict = __request_resource(parent, res);
+   if (!conflict) {
+   if (!next_res)
+   break;
+   res = next_res;
+   next_res = NULL;
+   continue;
+   }
 
-   /* conflict covered whole area */
-   if (conflict->start <= start && conflict->end >= end)
-   return;
+   /* conflict covered whole area */
+   if (conflict->start <= res->start &&
+   conflict->end >= res->end) {
+   kfree(res);
+   WARN_ON(next_res);
+   break;
+   }
+
+   /* failed, split and try again */
+   if (conflict->start > res->start) {
+   end = res->end;
+   res->end = conflict->start - 1;
+   if (conflict->end < end) {
+   next_res = kzalloc(sizeof(*next_res),
+   GFP_ATOMIC);
+   if (!next_res) {
+   kfree(res);
+   break;
+   }
+   next_res->name = name;
+   next_res->start = conflict->end + 1;
+   next_res->end = end;
+   next_res->flags = IORESOURCE_BUSY;
+   }
+   } else {
+   res->start = conflict->end + 1;
+   }
+   }
 
-   if (conflict->start > start)
-   __reserve_region_with_split(root, start, conflict->start-1, 
name);
-   if (conflict->end < end)
-   __reserve_region_with_split(root, conflict->end+1, end, name);
 }
 
 void __init reserve_region_with_split(struct resource *root,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 45/81] MAINTAINERS: Stephen Hemminger email change

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Stephen Hemminger 

commit adbbf69d1a54abf424e91875746a610dcc80017d upstream.

I changed my email because the vyatta.com mail server is now
redirected to brocade.com; and the Brocade mail system
is not friendly to Linux desktop users.

Signed-off-by: Stephen Hemminger 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 MAINTAINERS |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0d67d1b..5927882 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2663,7 +2663,7 @@ S:Maintained
 F: drivers/net/ethernet/i825xx/eexpress.*
 
 ETHERNET BRIDGE
-M: Stephen Hemminger 
+M: Stephen Hemminger 
 L: bri...@lists.linux-foundation.org
 L: net...@vger.kernel.org
 W: http://www.linuxfoundation.org/en/Net:Bridge
@@ -4385,7 +4385,7 @@ S:Maintained
 
 MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2)
 M: Mirko Lindner 
-M: Stephen Hemminger 
+M: Stephen Hemminger 
 L: net...@vger.kernel.org
 S: Maintained
 F: drivers/net/ethernet/marvell/sk*
@@ -4630,7 +4630,7 @@ S:Supported
 F: drivers/infiniband/hw/nes/
 
 NETEM NETWORK EMULATOR
-M: Stephen Hemminger 
+M: Stephen Hemminger 
 L: ne...@lists.linux-foundation.org
 S: Maintained
 F: net/sched/sch_netem.c
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] of: use platform_device_add

2013-02-19 Thread Jason Gunthorpe

On Sun, Feb 17, 2013 at 10:49:08AM +, Grant Likely wrote:
> > > The patch introduce a regression on imx6q boot.  The IOMUXC block on
> > > imx6q is special.  It acts not only a pin controller but also a system
> > > controller with a bunch of system level registers in there.  That's why
> > > we currently have the following two nodes in imx6q device tree with the
> > > same start "reg" address, which work with drivers/mfd/syscon.c and
> > > drivers/pinctrl/pinctrl-imx6q.c respectively.
> > >
> > > gpr: iomuxc-gpr@020e {
> > > compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
> > > reg = <0x020e 0x38>;
> > > };
> > >
> > > iomuxc: iomuxc@020e {
> > > compatible = "fsl,imx6q-iomuxc";
> > > reg = <0x020e 0x4000>;
> > > };
> > >
> > > With the patch in place, pinctrl-imx6q fails to register like below.
> > >
> > > syscon 20e.iomuxc: syscon regmap start 0x20e end 0x20e3fff 
> > > registered
> > > imx6q-pinctrl 20e.iomuxc: can't request region for resource [mem 
> > > 0x020e-0x020e3fff]
> > > imx6q-pinctrl: probe of 20e.iomuxc failed with error -16
> 
> Strictly you're not supposed to do that with the device tree. There
> shouldn't be two nodes using the same overlapping memory region unless
> they are parent/child. That rule has been around for a long time, but
> the core never checked for it. What /should/ happen is the two drivers
> should be cooperating for the register region and only one device
> driver probe sets up both behaviours.

This case was something we both discussed when this patch was first
brough up, and both our tests seemed like it was OK.. What is going on
here that these non-staggered regions are failing? It looks like the
platform devices registered but the devm_request_and_iormap failed?

> >> It also breaks all of_amba_device users.
> >>
> >> of_amba_device_create() --> amba_device_add() --> request_resource()
> >> and fails.
> >
> > Presumably that's because we no longer know what the parent resource
> > is supposed to be?
> 
> Hmmm, it looks that way, yes. Currently the OF code is using
> iomem_resource as the parent for all amba_device_add() calls
> (driver/of/platform.c), but if the parent node gets registered as a
> platform device and it has the resources then the parenthood chain
> doesn't match up. It isn't immediately obvious to me how to fix this.
> I'm going to drop the patch from my tree. I could use some help
> figuring out what the correct behaviour really should be here.

I looked for a bit and it wasn't obvious to me where the colliding
request_resource was coming from. The DTs for amba busses seem to all
be placed under the root node, or within a simple bus, so there is not
parent platform device and the use of iomem_resource should still be OK?

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 50/81] net/mlx4_core: Set number of msix vectors under SRIOV mode to firmware defaults

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Or Gerlitz 

commit ca4c7b35f75492de7fbf5ee95be07481c348caee upstream.

The lines

if (mlx4_is_mfunc(dev)) {
nreq = 2;
} else {

which hard code the number of requested msi-x vectors under multi-function
mode to two can be removed completely, since the firmware sets num_eqs and
reserved_eqs appropriately Thus, the code line:

nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs, nreq);

is by itself sufficient and correct for all cases. Currently, for mfunc
mode num_eqs = 32 and reserved_eqs = 28, hence four vectors will be enabled.

This triples (one vector is used for the async events and commands EQ) the
horse power provided for processing of incoming packets on netdev RSS scheme,
IO initiators/targets commands processing flows, etc.

Reviewed-by: Jack Morgenstein 
Signed-off-by: Amir Vadai 
Signed-off-by: Or Gerlitz 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/mellanox/mlx4/main.c |   11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c 
b/drivers/net/ethernet/mellanox/mlx4/main.c
index a0313de..c749b82 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1545,15 +1545,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
int i;
 
if (msi_x) {
-   /* In multifunction mode each function gets 2 msi-X vectors
-* one for data path completions anf the other for asynch events
-* or command completions */
-   if (mlx4_is_mfunc(dev)) {
-   nreq = 2;
-   } else {
-   nreq = min_t(int, dev->caps.num_eqs -
-dev->caps.reserved_eqs, nreq);
-   }
+   nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
+nreq);
 
entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
if (!entries)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 53/81] r8169: remove the obsolete and incorrect AMD workaround

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: =?UTF-8?q?Timo=20Ter=C3=A4s?= 

commit 5d0feaff230c0abfe4a112e6f09f096ed99e0b2d upstream.

This was introduced in commit 6dccd16 "r8169: merge with version
6.001.00 of Realtek's r8169 driver". I did not find the version
6.001.00 online, but in 6.002.00 or any later r8169 from Realtek
this hunk is no longer present.

Also commit 05af214 "r8169: fix Ethernet Hangup for RTL8110SC
rev d" claims to have fixed this issue otherwise.

The magic compare mask of 0xfffe000 is dubious as it masks
parts of the Reserved part, and parts of the VLAN tag. But this
does not make much sense as the VLAN tag parts are perfectly
valid there. In matter of fact this seems to be triggered with
any VLAN tagged packet as RxVlanTag bit is matched. I would
suspect 0xfffe was intended to test reserved part only.

Finally, this hunk is evil as it can cause more packets to be
handled than what was NAPI quota causing net/core/dev.c:
net_rx_action(): WARN_ON_ONCE(work > weight) to trigger, and
mess up the NAPI state causing device to hang.

As result, any system using VLANs and having high receive
traffic (so that NAPI poll budget limits rtl_rx) would result
in device hang.

Signed-off-by: Timo Teräs 
Acked-by: Francois Romieu 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/realtek/r8169.c |7 ---
 1 file changed, 7 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c 
b/drivers/net/ethernet/realtek/r8169.c
index 0e09bb8..67f73ae 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -5812,13 +5812,6 @@ process_pkt:
tp->rx_stats.bytes += pkt_size;
u64_stats_update_end(>rx_stats.syncp);
}
-
-   /* Work around for AMD plateform. */
-   if ((desc->opts2 & cpu_to_le32(0xfffe000)) &&
-   (tp->mac_version == RTL_GIGA_MAC_VER_05)) {
-   desc->opts2 = 0;
-   cur_rx++;
-   }
}
 
count = cur_rx - tp->cur_rx;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 56/81] ipv6: do not create neighbor entries for local delivery

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Marcelo Ricardo Leitner 

commit bd30e947207e2ea0ff2c08f5b4a03025ddce48d3 upstream.

They will be created at output, if ever needed. This avoids creating
empty neighbor entries when TPROXYing/Forwarding packets for addresses
that are not even directly reachable.

Note that IPv4 already handles it this way. No neighbor entries are
created for local input.

Tested by myself and customer.

Signed-off-by: Jiri Pirko 
Signed-off-by: Marcelo Ricardo Leitner 
Signed-off-by: David S. Miller 
[ herton: adjust if condition, add additional RTF_LOCAL flag for the
  check ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/ipv6/route.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fd44184..08c149c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -846,7 +846,8 @@ restart:
dst_hold(>dst);
read_unlock_bh(>tb6_lock);
 
-   if (!dst_get_neighbour_noref_raw(>dst) && !(rt->rt6i_flags & 
RTF_NONEXTHOP))
+   if (!dst_get_neighbour_noref_raw(>dst) &&
+   !(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_LOCAL)))
nrt = rt6_alloc_cow(rt, >daddr, >saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, >daddr);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 54/81] net: loopback: fix a dst refcounting issue

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Dumazet 

commit 794ed393b707f01858f5ebe2ae5eabaf89d00022 upstream.

Ben Greear reported crashes in ip_rcv_finish() on a stress
test involving many macvlans.

We tracked the bug to a dst use after free. ip_rcv_finish()
was calling dst->input() and got garbage for dst->input value.

It appears the bug is in loopback driver, lacking
a skb_dst_force() before calling netif_rx().

As a result, a non refcounted dst, normally protected by a
RCU read_lock section, was escaping this section and could
be freed before the packet being processed.

  [] loopback_xmit+0x64/0x83
  [] dev_hard_start_xmit+0x26c/0x35e
  [] dev_queue_xmit+0x2c4/0x37c
  [] ? dev_hard_start_xmit+0x35e/0x35e
  [] ? eth_header+0x28/0xb6
  [] neigh_resolve_output+0x176/0x1a7
  [] ip_finish_output2+0x297/0x30d
  [] ? ip_finish_output2+0x137/0x30d
  [] ip_finish_output+0x63/0x68
  [] ip_output+0x61/0x67
  [] dst_output+0x17/0x1b
  [] ip_local_out+0x1e/0x23
  [] ip_queue_xmit+0x315/0x353
  [] ? ip_send_unicast_reply+0x2cc/0x2cc
  [] tcp_transmit_skb+0x7ca/0x80b
  [] tcp_connect+0x53c/0x587
  [] ? getnstimeofday+0x44/0x7d
  [] ? ktime_get_real+0x11/0x3e
  [] tcp_v4_connect+0x3c2/0x431
  [] __inet_stream_connect+0x84/0x287
  [] ? inet_stream_connect+0x22/0x49
  [] ? _local_bh_enable_ip+0x84/0x9f
  [] ? local_bh_enable+0xd/0x11
  [] ? lock_sock_nested+0x6e/0x79
  [] ? inet_stream_connect+0x22/0x49
  [] inet_stream_connect+0x33/0x49
  [] sys_connect+0x75/0x98

This bug was introduced in linux-2.6.35, in commit
7fee226ad2397b (net: add a noref bit on skb dst)

skb_dst_force() is enforced in dev_queue_xmit() for devices having a
qdisc.

Reported-by: Ben Greear 
Signed-off-by: Eric Dumazet 
Tested-by: Ben Greear 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/loopback.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 32eb94e..a3d4707 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -77,6 +77,11 @@ static netdev_tx_t loopback_xmit(struct sk_buff *skb,
 
skb_orphan(skb);
 
+   /* Before queueing this packet to netif_rx(),
+* make sure dst is refcounted.
+*/
+   skb_dst_force(skb);
+
skb->protocol = eth_type_trans(skb, dev);
 
/* it's OK to use per_cpu_ptr() because BHs are off */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 51/81] isdn/gigaset: fix zero size border case in debug dump

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Tilman Schmidt 

commit d721a1752ba544df8d7d36959038b26bc92bdf80 upstream.

If subtracting 12 from l leaves zero we'd do a zero size allocation,
leading to an oops later when we try to set the NUL terminator.

Reported-by: Dan Carpenter 
Signed-off-by: Tilman Schmidt 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/isdn/gigaset/capi.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index 27e4a3e..f45b5b0 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -248,6 +248,8 @@ static inline void dump_rawmsg(enum debuglevel level, const 
char *tag,
CAPIMSG_APPID(data), CAPIMSG_MSGID(data), l,
CAPIMSG_CONTROL(data));
l -= 12;
+   if (l <= 0)
+   return;
dbgline = kmalloc(3 * l, GFP_ATOMIC);
if (!dbgline)
return;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 48/81] net: calxedaxgmac: throw away overrun frames

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Rob Herring 

commit d6fb3be544b46a7611a3373fcaa62b5b0be01888 upstream.

The xgmac driver assumes 1 frame per descriptor. If a frame larger than
the descriptor's buffer size is received, the frame will spill over into
the next descriptor. So check for received frames that span more than one
descriptor and discard them. This prevents a crash if we receive erroneous
large packets.

Signed-off-by: Rob Herring 
Cc: net...@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/calxeda/xgmac.c |4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/calxeda/xgmac.c 
b/drivers/net/ethernet/calxeda/xgmac.c
index 8b0a0e4..8a3cd87 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -546,6 +546,10 @@ static int desc_get_rx_status(struct xgmac_priv *priv, 
struct xgmac_dma_desc *p)
return -1;
}
 
+   /* All frames should fit into a single buffer */
+   if (!(status & RXDESC_FIRST_SEG) || !(status & RXDESC_LAST_SEG))
+   return -1;
+
/* Check if packet has checksum already */
if ((status & RXDESC_FRAME_TYPE) && (status & RXDESC_EXT_STATUS) &&
!(ext_status & RXDESC_IP_PAYLOAD_MASK))
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 47/81] macvlan: fix macvlan_get_size()

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Dumazet 

commit 01fe944f1024bd4e5c327ddbe8d657656b66af2f upstream.

commit df8ef8f3aaa (macvlan: add FDB bridge ops and macvlan flags)
forgot to update macvlan_get_size() after the addition of
IFLA_MACVLAN_FLAGS

Signed-off-by: Eric Dumazet 
Cc: John Fastabend 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/macvlan.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 66a9bfe..62ce7b8 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -822,7 +822,10 @@ static int macvlan_changelink(struct net_device *dev,
 
 static size_t macvlan_get_size(const struct net_device *dev)
 {
-   return nla_total_size(4);
+   return (0
+   + nla_total_size(4) /* IFLA_MACVLAN_MODE */
+   + nla_total_size(2) /* IFLA_MACVLAN_FLAGS */
+   );
 }
 
 static int macvlan_fill_info(struct sk_buff *skb,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 44/81] ipv6: fix the noflags test in addrconf_get_prefix_route

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Romain Kuntz 

commit 85da53bf1c336bb07ac038fb951403ab0478d2c5 upstream.

The tests on the flags in addrconf_get_prefix_route() does no make
much sense: the 'noflags' parameter contains the set of flags that
must not match with the route flags, so the test must be done
against 'noflags', and not against 'flags'.

Signed-off-by: Romain Kuntz 
Acked-by: YOSHIFUJI Hideaki 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/ipv6/addrconf.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0808ad5..fc9ac78 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1741,7 +1741,7 @@ static struct rt6_info *addrconf_get_prefix_route(const 
struct in6_addr *pfx,
continue;
if ((rt->rt6i_flags & flags) != flags)
continue;
-   if ((noflags != 0) && ((rt->rt6i_flags & flags) != 0))
+   if ((rt->rt6i_flags & noflags) != 0)
continue;
dst_hold(>dst);
break;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 58/81] packet: fix leakage of tx_ring memory

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Phil Sutter 

commit 9665d5d62487e8e7b1f546c00e11107155384b9a upstream.

When releasing a packet socket, the routine packet_set_ring() is reused
to free rings instead of allocating them. But when calling it for the
first time, it fills req->tp_block_nr with the value of rb->pg_vec_len
which in the second invocation makes it bail out since req->tp_block_nr
is greater zero but req->tp_block_size is zero.

This patch solves the problem by passing a zeroed auto-variable to
packet_set_ring() upon each invocation from packet_release().

As far as I can tell, this issue exists even since 69e3c75 (net: TX_RING
and packet mmap), i.e. the original inclusion of TX ring support into
af_packet, but applies only to sockets with both RX and TX ring
allocated, which is probably why this was unnoticed all the time.

Signed-off-by: Phil Sutter 
Cc: Johann Baudy 
Cc: Daniel Borkmann 
Acked-by: Daniel Borkmann 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/packet/af_packet.c |   10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 901cffd..02b1ef8 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2450,13 +2450,15 @@ static int packet_release(struct socket *sock)
 
packet_flush_mclist(sk);
 
-   memset(_u, 0, sizeof(req_u));
-
-   if (po->rx_ring.pg_vec)
+   if (po->rx_ring.pg_vec) {
+   memset(_u, 0, sizeof(req_u));
packet_set_ring(sk, _u, 1, 0);
+   }
 
-   if (po->tx_ring.pg_vec)
+   if (po->tx_ring.pg_vec) {
+   memset(_u, 0, sizeof(req_u));
packet_set_ring(sk, _u, 1, 1);
+   }
 
fanout_release(sk);
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 62/81] net: sctp: sctp_endpoint_free: zero out secret key data

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Daniel Borkmann 

commit b5c37fe6e24eec194bb29d22fdd55d73bcc709bf upstream.

On sctp_endpoint_destroy, previously used sensitive keying material
should be zeroed out before the memory is returned, as we already do
with e.g. auth keys when released.

Signed-off-by: Daniel Borkmann 
Acked-by: Vlad Yasevich 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/sctp/endpointola.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 68a385d..58cd035 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -248,6 +248,8 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
 /* Final destructor for endpoint.  */
 static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
 {
+   int i;
+
SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return);
 
/* Free up the HMAC transform. */
@@ -270,6 +272,9 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
sctp_inq_free(>base.inqueue);
sctp_bind_addr_free(>base.bind_addr);
 
+   for (i = 0; i < SCTP_HOW_MANY_SECRETS; ++i)
+   memset(>secret_key[i], 0, SCTP_SECRET_SIZE);
+
/* Remove and free the port */
if (sctp_sk(ep->base.sk)->bind_hash)
sctp_put_port(ep->base.sk);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: WARNING: at drivers/pci/pci.c:1397 pci_disable_device

2013-02-19 Thread Bjorn Helgaas

On Tue, Feb 19, 2013 at 11:34 AM, Jiri Slaby  wrote:
> Hi,
>
> so I hit that one:
> +   dev_WARN_ONCE(>dev, atomic_read(>enable_cnt) <= 0,
> + "disabling already-disabled device");
>
> during suspend (to ram):
> WARNING: at drivers/pci/pci.c:1397 pci_disable_device+0x90/0xa0()
> Hardware name: To Be Filled By O.E.M.
> Device e1000e
> disabling already-disabled device
> Modules linked in: dvb_usb_dib0700 dib0090 dib7000p dib7000m dib0070
> dib8000 dib3000mc dibx000_common microcode
> Pid: 31027, comm: kworker/u:35 Not tainted 3.8.0-rc7-next-20130218_64+ #1768
> Call Trace:
>  [] ? do_pci_disable_device+0x30/0x60
>  [] warn_slowpath_common+0x7f/0xc0
>  [] warn_slowpath_fmt+0x46/0x50
>  [] pci_disable_device+0x90/0xa0
>  [] __e1000_shutdown+0x262/0x8b0
>  [] e1000_suspend+0x23/0x50
>  [] ? wait_for_completion+0x31/0x100
>  [] pci_pm_suspend+0x77/0x140
>  [] ? __pm_runtime_barrier+0x1a/0x130
>  [] ? pci_pm_poweroff+0xf0/0xf0
>  [] dpm_run_callback+0x58/0x90
>  [] __device_suspend+0xeb/0x280
>  [] async_suspend+0x1f/0xa0
>  [] async_run_entry_fn+0x3b/0x140
>  [] process_one_work+0x174/0x410
>  [] worker_thread+0x116/0x400
>  [] ? busy_worker_rebind_fn+0xc0/0xc0
>  [] kthread+0xc0/0xd0
>  [] ? kthread_create_on_node+0x130/0x130
>  [] ret_from_fork+0x7c/0xb0
>  [] ? kthread_create_on_node+0x130/0x130
> ---[ end trace 6c5060f8b8fb9175 ]---
> e1000e :00:19.0: System wakeup enabled by ACPI

I think Konstantin posted an e1000e patch that would fix this, but
it's going via the e1000e folks.  Hopefully both my tree and the
e1000e patch will be merged soon.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 60/81] sctp: refactor sctp_outq_teardown to insure proper re-initalization

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Neil Horman 

commit 2f94aabd9f6c925d77aecb3ff020f1cc12ed8f86 upstream.

Jamie Parsons reported a problem recently, in which the re-initalization of an
association (The duplicate init case), resulted in a loss of receive window
space.  He tracked down the root cause to sctp_outq_teardown, which discarded
all the data on an outq during a re-initalization of the corresponding
association, but never reset the outq->outstanding_data field to zero.  I wrote,
and he tested this fix, which does a proper full re-initalization of the outq,
fixing this problem, and hopefully future proofing us from simmilar issues down
the road.

Signed-off-by: Neil Horman 
Reported-by: Jamie Parsons 
Tested-by: Jamie Parsons 
CC: Jamie Parsons 
CC: Vlad Yasevich 
CC: "David S. Miller" 
CC: net...@vger.kernel.org
Acked-by: Vlad Yasevich 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/sctp/outqueue.c |   12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a0fa19f..0716290 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -223,7 +223,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct 
sctp_outq *q)
 
 /* Free the outqueue structure and any related pending chunks.
  */
-void sctp_outq_teardown(struct sctp_outq *q)
+static void __sctp_outq_teardown(struct sctp_outq *q)
 {
struct sctp_transport *transport;
struct list_head *lchunk, *temp;
@@ -276,8 +276,6 @@ void sctp_outq_teardown(struct sctp_outq *q)
sctp_chunk_free(chunk);
}
 
-   q->error = 0;
-
/* Throw away any leftover control chunks. */
list_for_each_entry_safe(chunk, tmp, >control_chunk_list, list) {
list_del_init(>list);
@@ -285,11 +283,17 @@ void sctp_outq_teardown(struct sctp_outq *q)
}
 }
 
+void sctp_outq_teardown(struct sctp_outq *q)
+{
+   __sctp_outq_teardown(q);
+   sctp_outq_init(q->asoc, q);
+}
+
 /* Free the outqueue structure and any related pending chunks.  */
 void sctp_outq_free(struct sctp_outq *q)
 {
/* Throw away leftover chunks. */
-   sctp_outq_teardown(q);
+   __sctp_outq_teardown(q);
 
/* If we were kmalloc()'d, free the memory.  */
if (q->malloced)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: How to populate Battery information through ACPI tables

2013-02-19 Thread Pallala, Ramakrishna

> > Mika, I want to populate this characterization data as device
> > specific/custom data which could be anything And may not be entirely
> > related to battery. Is this is possible?
> 
> Yes, for example you could have a custom ACPI method with your device which
> then returns this information.
> 
> See for example chapter 10.2.2.1 from the ACPI spec. It describes _BIF method
> that returns some battery data to the caller.

Mika, _BIF method returns battery data/struct which is defined Table. 10-233.

If I want to pass some device specific data seems like I can use _DSM(9.14.1) 
method and pass custom /device specific data to the driver.
And I believe this data format can be anything and private to the device? Can 
you confirm?

Thanks,
Ram
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 66/81] netback: correct netbk_tx_err to handle wrap around.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Ian Campbell 

commit b9149729ebdcfce63f853aa54a404c6a8f6ebbf3 upstream.

Signed-off-by: Ian Campbell 
Acked-by: Jan Beulich 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/xen-netback/netback.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index c503a58..f3c3a68 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -851,7 +851,7 @@ static void netbk_tx_err(struct xenvif *vif,
 
do {
make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
-   if (cons >= end)
+   if (cons == end)
break;
txp = RING_GET_REQUEST(>tx, cons++);
} while (1);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] [media] stv090x: do not unlock unheld mutex in stv090x_sleep()

2013-02-19 Thread Alexey Khoroshilov

goto err and goto err_gateoff before mutex_lock(>internal->demod_lock)
lead to unlock of unheld mutex in stv090x_sleep().

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov 
---
 drivers/media/dvb-frontends/stv090x.c |   22 --
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/media/dvb-frontends/stv090x.c 
b/drivers/media/dvb-frontends/stv090x.c
index 13caec0..4f600ac 100644
--- a/drivers/media/dvb-frontends/stv090x.c
+++ b/drivers/media/dvb-frontends/stv090x.c
@@ -3906,12 +3906,12 @@ static int stv090x_sleep(struct dvb_frontend *fe)
reg = stv090x_read_reg(state, STV090x_TSTTNR1);
STV090x_SETFIELD(reg, ADC1_PON_FIELD, 0);
if (stv090x_write_reg(state, STV090x_TSTTNR1, reg) < 0)
-   goto err;
+   goto err_unlock;
/* power off DiSEqC 1 */
reg = stv090x_read_reg(state, STV090x_TSTTNR2);
STV090x_SETFIELD(reg, DISEQC1_PON_FIELD, 0);
if (stv090x_write_reg(state, STV090x_TSTTNR2, reg) < 0)
-   goto err;
+   goto err_unlock;
 
/* check whether path 2 is already sleeping, that is when
   ADC2 is off */
@@ -3930,7 +3930,7 @@ static int stv090x_sleep(struct dvb_frontend *fe)
if (full_standby)
STV090x_SETFIELD(reg, STOP_CLKFEC_FIELD, 1);
if (stv090x_write_reg(state, STV090x_STOPCLK1, reg) < 0)
-   goto err;
+   goto err_unlock;
reg = stv090x_read_reg(state, STV090x_STOPCLK2);
/* sampling 1 clock */
STV090x_SETFIELD(reg, STOP_CLKSAMP1_FIELD, 1);
@@ -3941,7 +3941,7 @@ static int stv090x_sleep(struct dvb_frontend *fe)
if (full_standby)
STV090x_SETFIELD(reg, STOP_CLKTS_FIELD, 1);
if (stv090x_write_reg(state, STV090x_STOPCLK2, reg) < 0)
-   goto err;
+   goto err_unlock;
break;
 
case STV090x_DEMODULATOR_1:
@@ -3949,12 +3949,12 @@ static int stv090x_sleep(struct dvb_frontend *fe)
reg = stv090x_read_reg(state, STV090x_TSTTNR3);
STV090x_SETFIELD(reg, ADC2_PON_FIELD, 0);
if (stv090x_write_reg(state, STV090x_TSTTNR3, reg) < 0)
-   goto err;
+   goto err_unlock;
/* power off DiSEqC 2 */
reg = stv090x_read_reg(state, STV090x_TSTTNR4);
STV090x_SETFIELD(reg, DISEQC2_PON_FIELD, 0);
if (stv090x_write_reg(state, STV090x_TSTTNR4, reg) < 0)
-   goto err;
+   goto err_unlock;
 
/* check whether path 1 is already sleeping, that is when
   ADC1 is off */
@@ -3973,7 +3973,7 @@ static int stv090x_sleep(struct dvb_frontend *fe)
if (full_standby)
STV090x_SETFIELD(reg, STOP_CLKFEC_FIELD, 1);
if (stv090x_write_reg(state, STV090x_STOPCLK1, reg) < 0)
-   goto err;
+   goto err_unlock;
reg = stv090x_read_reg(state, STV090x_STOPCLK2);
/* sampling 2 clock */
STV090x_SETFIELD(reg, STOP_CLKSAMP2_FIELD, 1);
@@ -3984,7 +3984,7 @@ static int stv090x_sleep(struct dvb_frontend *fe)
if (full_standby)
STV090x_SETFIELD(reg, STOP_CLKTS_FIELD, 1);
if (stv090x_write_reg(state, STV090x_STOPCLK2, reg) < 0)
-   goto err;
+   goto err_unlock;
break;
 
default:
@@ -3997,7 +3997,7 @@ static int stv090x_sleep(struct dvb_frontend *fe)
reg = stv090x_read_reg(state, STV090x_SYNTCTRL);
STV090x_SETFIELD(reg, STANDBY_FIELD, 0x01);
if (stv090x_write_reg(state, STV090x_SYNTCTRL, reg) < 0)
-   goto err;
+   goto err_unlock;
}
 
mutex_unlock(>internal->demod_lock);
@@ -4005,8 +4005,10 @@ static int stv090x_sleep(struct dvb_frontend *fe)
 
 err_gateoff:
stv090x_i2c_gate_ctrl(state, 0);
-err:
+   goto err;
+err_unlock:
mutex_unlock(>internal->demod_lock);
+err:
dprintk(FE_ERROR, 1, "I/O error");
return -1;
 }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 68/81] tcp: fix for zero packets_in_flight was too broad

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= 

commit 6731d2095bd4aef18027c72ef845ab1087c3ba63 upstream.

There are transients during normal FRTO procedure during which
the packets_in_flight can go to zero between write_queue state
updates and firing the resulting segments out. As FRTO processing
occurs during that window the check must be more precise to
not match "spuriously" :-). More specificly, e.g., when
packets_in_flight is zero but FLAG_DATA_ACKED is true the problematic
branch that set cwnd into zero would not be taken and new segments
might be sent out later.

Signed-off-by: Ilpo Järvinen 
Tested-by: Eric Dumazet 
Acked-by: Neal Cardwell 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/ipv4/tcp_input.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6affa92..3b14d81 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3695,8 +3695,7 @@ static bool tcp_process_frto(struct sock *sk, int flag)
((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
tp->undo_marker = 0;
 
-   if (!before(tp->snd_una, tp->frto_highmark) ||
-   !tcp_packets_in_flight(tp)) {
+   if (!before(tp->snd_una, tp->frto_highmark)) {
tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
return true;
}
@@ -3716,6 +3715,11 @@ static bool tcp_process_frto(struct sock *sk, int flag)
}
} else {
if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+   if (!tcp_packets_in_flight(tp)) {
+   tcp_enter_frto_loss(sk, 2, flag);
+   return true;
+   }
+
/* Prevent sending of new data. */
tp->snd_cwnd = min(tp->snd_cwnd,
   tcp_packets_in_flight(tp));
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 69/81] bridge: Pull ip header into skb->data before looking into ip header.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Sarveshwar Bandi 

commit 6caab7b0544e83e6c160b5e80f5a4a7dd69545c7 upstream.

If lower layer driver leaves the ip header in the skb fragment, it needs to
be first pulled into skb->data before inspecting ip header length or ip version
number.

Signed-off-by: Sarveshwar Bandi 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/bridge/br_netfilter.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index e41456b..ab52468 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -256,6 +256,9 @@ static int br_parse_ip_options(struct sk_buff *skb)
struct net_device *dev = skb->dev;
u32 len;
 
+   if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+   goto inhdr_error;
+
iph = ip_hdr(skb);
opt = &(IPCB(skb)->opt);
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 72/81] Revert "USB: Handle warm reset failure on empty port."

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Herton Ronaldo Krzesinski 

This reverts commit ff88c5021f17139d961478e40372f6bb028321bc, which is a
cherry-pick of commit 65bdac5effd15d6af619b3b7218627ef4d84ed6a upstream.

As discussed on a recent thread on stable/lkml/etc. ("[regression]
external HDD in USB3 enclosure cannot be dynamically removed"), this
changed caused issues, and shall be reapplied later with the proper
fixes, once they go into Linus tree.

Cc: Sarah Sharp 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/core/hub.c |   12 +---
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index c9590c6..1e8b3bd 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2488,11 +2488,6 @@ static int hub_port_wait_reset(struct usb_hub *hub, int 
port1,
return 0;
}
} else {
-   if (!(portstatus & USB_PORT_STAT_CONNECTION) ||
-   hub_port_warm_reset_required(hub,
-   portstatus))
-   return -ENOTCONN;
-
return 0;
}
 
@@ -4537,14 +4532,9 @@ static void hub_events(void)
 * SS.Inactive state.
 */
if (hub_port_warm_reset_required(hub, portstatus)) {
-   int status;
-
dev_dbg(hub_dev, "warm reset port %d\n", i);
-   status = hub_port_reset(hub, i, NULL,
+   hub_port_reset(hub, i, NULL,
HUB_BH_RESET_TIME, true);
-   if (status < 0)
-   hub_port_disable(hub, i, 1);
-   connect_change = 0;
}
 
if (connect_change)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 75/81] x86/apic: Work around boot failure on HP ProLiant DL980 G7 Server systems

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Stoney Wang 

commit cb214ede7657db458fd0b2a25ea0b28dbf900ebc upstream.

When a HP ProLiant DL980 G7 Server boots a regular kernel,
there will be intermittent lost interrupts which could
result in a hang or (in extreme cases) data loss.

The reason is that this system only supports x2apic physical
mode, while the kernel boots with a logical-cluster default
setting.

This bug can be worked around by specifying the "x2apic_phys" or
"nox2apic" boot option, but we want to handle this system
without requiring manual workarounds.

The BIOS sets ACPI_FADT_APIC_PHYSICAL in FADT table.
As all apicids are smaller than 255, BIOS need to pass the
control to the OS with xapic mode, according to x2apic-spec,
chapter 2.9.

Current code handle x2apic when BIOS pass with xapic mode
enabled:

When user specifies x2apic_phys, or FADT indicates PHYSICAL:

1. During madt oem check, apic driver is set with xapic logical
   or xapic phys driver at first.

2. enable_IR_x2apic() will enable x2apic_mode.

3. if user specifies x2apic_phys on the boot line, x2apic_phys_probe()
   will install the correct x2apic phys driver and use x2apic phys mode.
   Otherwise it will skip the driver will let x2apic_cluster_probe to
   take over to install x2apic cluster driver (wrong one) even though FADT
   indicates PHYSICAL, because x2apic_phys_probe does not check
   FADT PHYSICAL.

Add checking x2apic_fadt_phys in x2apic_phys_probe() to fix the
problem.

Signed-off-by: Stoney Wang 
[ updated the changelog and simplified the code ]
Signed-off-by: Yinghai Lu 
Link: 
http://lkml.kernel.org/r/1360263182-16226-1-git-send-email-ying...@kernel.org
Signed-off-by: Ingo Molnar 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 arch/x86/kernel/apic/x2apic_phys.c |   21 +++--
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/apic/x2apic_phys.c 
b/arch/x86/kernel/apic/x2apic_phys.c
index c17e982..d14fee3 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
 }
 early_param("x2apic_phys", set_x2apic_phys_mode);
 
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static bool x2apic_fadt_phys(void)
 {
-   if (x2apic_phys)
-   return x2apic_enabled();
-   else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
-   (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
-   x2apic_enabled()) {
+   if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+   (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
-   return 1;
+   return true;
}
-   else
-   return 0;
+   return false;
+}
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+   return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
 }
 
 static void
@@ -114,7 +115,7 @@ static void init_x2apic_ldr(void)
 
 static int x2apic_phys_probe(void)
 {
-   if (x2apic_mode && x2apic_phys)
+   if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
return 1;
 
return apic == _x2apic_phys;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 71/81] tg3: Fix crc errors on jumbo frame receive

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Nithin Nayak Sujir 

commit daf3ec688e057f6060fb9bb0819feac7a8bbf45c upstream.

TG3_PHY_AUXCTL_SMDSP_ENABLE/DISABLE macros do a blind write to the phy
auxiliary control register and overwrite the EXT_PKT_LEN (bit 14) resulting
in intermittent crc errors on jumbo frames with some link partners. Change
the code to do a read/modify/write.

Signed-off-by: Nithin Nayak Sujir 
Signed-off-by: Michael Chan 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/broadcom/tg3.c |   58 +--
 1 file changed, 35 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 82a8ed2..4da6a86 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -1136,14 +1136,26 @@ static int tg3_phy_auxctl_write(struct tg3 *tp, int 
reg, u32 set)
return tg3_writephy(tp, MII_TG3_AUX_CTRL, set | reg);
 }
 
-#define TG3_PHY_AUXCTL_SMDSP_ENABLE(tp) \
-   tg3_phy_auxctl_write((tp), MII_TG3_AUXCTL_SHDWSEL_AUXCTL, \
-MII_TG3_AUXCTL_ACTL_SMDSP_ENA | \
-MII_TG3_AUXCTL_ACTL_TX_6DB)
+static int tg3_phy_toggle_auxctl_smdsp(struct tg3 *tp, bool enable)
+{
+   u32 val;
+   int err;
+
+   err = tg3_phy_auxctl_read(tp, MII_TG3_AUXCTL_SHDWSEL_AUXCTL, );
 
-#define TG3_PHY_AUXCTL_SMDSP_DISABLE(tp) \
-   tg3_phy_auxctl_write((tp), MII_TG3_AUXCTL_SHDWSEL_AUXCTL, \
-MII_TG3_AUXCTL_ACTL_TX_6DB);
+   if (err)
+   return err;
+   if (enable)
+
+   val |= MII_TG3_AUXCTL_ACTL_SMDSP_ENA;
+   else
+   val &= ~MII_TG3_AUXCTL_ACTL_SMDSP_ENA;
+
+   err = tg3_phy_auxctl_write((tp), MII_TG3_AUXCTL_SHDWSEL_AUXCTL,
+  val | MII_TG3_AUXCTL_ACTL_TX_6DB);
+
+   return err;
+}
 
 static int tg3_bmcr_reset(struct tg3 *tp)
 {
@@ -2076,7 +2088,7 @@ static void tg3_phy_apply_otp(struct tg3 *tp)
 
otp = tp->phy_otp;
 
-   if (TG3_PHY_AUXCTL_SMDSP_ENABLE(tp))
+   if (tg3_phy_toggle_auxctl_smdsp(tp, true))
return;
 
phy = ((otp & TG3_OTP_AGCTGT_MASK) >> TG3_OTP_AGCTGT_SHIFT);
@@ -2101,7 +2113,7 @@ static void tg3_phy_apply_otp(struct tg3 *tp)
  ((otp & TG3_OTP_RCOFF_MASK) >> TG3_OTP_RCOFF_SHIFT);
tg3_phydsp_write(tp, MII_TG3_DSP_EXP97, phy);
 
-   TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+   tg3_phy_toggle_auxctl_smdsp(tp, false);
 }
 
 static void tg3_phy_eee_adjust(struct tg3 *tp, u32 current_link_up)
@@ -2137,9 +2149,9 @@ static void tg3_phy_eee_adjust(struct tg3 *tp, u32 
current_link_up)
 
if (!tp->setlpicnt) {
if (current_link_up == 1 &&
-  !TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
+  !tg3_phy_toggle_auxctl_smdsp(tp, true)) {
tg3_phydsp_write(tp, MII_TG3_DSP_TAP26, 0x);
-   TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+   tg3_phy_toggle_auxctl_smdsp(tp, false);
}
 
val = tr32(TG3_CPMU_EEE_MODE);
@@ -2155,11 +2167,11 @@ static void tg3_phy_eee_enable(struct tg3 *tp)
(GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5717 ||
 GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 ||
 tg3_flag(tp, 57765_CLASS)) &&
-   !TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
+   !tg3_phy_toggle_auxctl_smdsp(tp, true)) {
val = MII_TG3_DSP_TAP26_ALNOKO |
  MII_TG3_DSP_TAP26_RMRXSTO;
tg3_phydsp_write(tp, MII_TG3_DSP_TAP26, val);
-   TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+   tg3_phy_toggle_auxctl_smdsp(tp, false);
}
 
val = tr32(TG3_CPMU_EEE_MODE);
@@ -2303,7 +2315,7 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp)
tg3_writephy(tp, MII_CTRL1000,
 CTL1000_AS_MASTER | CTL1000_ENABLE_MASTER);
 
-   err = TG3_PHY_AUXCTL_SMDSP_ENABLE(tp);
+   err = tg3_phy_toggle_auxctl_smdsp(tp, true);
if (err)
return err;
 
@@ -2324,7 +2336,7 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp)
tg3_writephy(tp, MII_TG3_DSP_ADDRESS, 0x8200);
tg3_writephy(tp, MII_TG3_DSP_CONTROL, 0x);
 
-   TG3_PHY_AUXCTL_SMDSP_DISABLE(tp);
+   tg3_phy_toggle_auxctl_smdsp(tp, false);
 
tg3_writephy(tp, MII_CTRL1000, phy9_orig);
 
@@ -2413,10 +2425,10 @@ static int tg3_phy_reset(struct tg3 *tp)
 
 out:
if ((tp->phy_flags & TG3_PHYFLG_ADC_BUG) &&
-   !TG3_PHY_AUXCTL_SMDSP_ENABLE(tp)) {
+   !tg3_phy_toggle_auxctl_smdsp(tp, true)) {
tg3_phydsp_write(tp, 0x201f, 0x2aaa);
tg3_phydsp_write(tp, 0x000a, 0x0323);
-

[PATCH 63/81] xen/netback: shutdown the ring if it contains garbage.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Ian Campbell 

commit 48856286b64e4b66ec62b94e504d0b29c1ade664 upstream.

A buggy or malicious frontend should not be able to confuse netback.
If we spot anything which is not as it should be then shutdown the
device and don't try to continue with the ring in a potentially
hostile state. Well behaved and non-hostile frontends will not be
penalised.

As well as making the existing checks for such errors fatal also add a
new check that ensures that there isn't an insane number of requests
on the ring (i.e. more than would fit in the ring). If the ring
contains garbage then previously is was possible to loop over this
insane number, getting an error each time and therefore not generating
any more pending requests and therefore not exiting the loop in
xen_netbk_tx_build_gops for an externded period.

Also turn various netdev_dbg calls which no precipitate a fatal error
into netdev_err, they are rate limited because the device is shutdown
afterwards.

This fixes at least one known DoS/softlockup of the backend domain.

Signed-off-by: Ian Campbell 
Reviewed-by: Konrad Rzeszutek Wilk 
Acked-by: Jan Beulich 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/xen-netback/common.h|3 ++
 drivers/net/xen-netback/interface.c |   23 -
 drivers/net/xen-netback/netback.c   |   62 +--
 3 files changed, 62 insertions(+), 26 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 94b79c3..9d7f172 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -151,6 +151,9 @@ void xen_netbk_queue_tx_skb(struct xenvif *vif, struct 
sk_buff *skb);
 /* Notify xenvif that ring now has space to send an skb to the frontend */
 void xenvif_notify_tx_completion(struct xenvif *vif);
 
+/* Prevent the device from generating any further traffic. */
+void xenvif_carrier_off(struct xenvif *vif);
+
 /* Returns number of ring slots required to send an skb to the frontend */
 unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff 
*skb);
 
diff --git a/drivers/net/xen-netback/interface.c 
b/drivers/net/xen-netback/interface.c
index b7d41f8..b8c5193 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -343,17 +343,22 @@ err:
return err;
 }
 
-void xenvif_disconnect(struct xenvif *vif)
+void xenvif_carrier_off(struct xenvif *vif)
 {
struct net_device *dev = vif->dev;
-   if (netif_carrier_ok(dev)) {
-   rtnl_lock();
-   netif_carrier_off(dev); /* discard queued packets */
-   if (netif_running(dev))
-   xenvif_down(vif);
-   rtnl_unlock();
-   xenvif_put(vif);
-   }
+
+   rtnl_lock();
+   netif_carrier_off(dev); /* discard queued packets */
+   if (netif_running(dev))
+   xenvif_down(vif);
+   rtnl_unlock();
+   xenvif_put(vif);
+}
+
+void xenvif_disconnect(struct xenvif *vif)
+{
+   if (netif_carrier_ok(vif->dev))
+   xenvif_carrier_off(vif);
 
atomic_dec(>refcnt);
wait_event(vif->waiting_to_free, atomic_read(>refcnt) == 0);
diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index f4a6fca..ae321c0 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -859,6 +859,13 @@ static void netbk_tx_err(struct xenvif *vif,
xenvif_put(vif);
 }
 
+static void netbk_fatal_tx_err(struct xenvif *vif)
+{
+   netdev_err(vif->dev, "fatal error; disabling device\n");
+   xenvif_carrier_off(vif);
+   xenvif_put(vif);
+}
+
 static int netbk_count_requests(struct xenvif *vif,
struct xen_netif_tx_request *first,
struct xen_netif_tx_request *txp,
@@ -872,19 +879,22 @@ static int netbk_count_requests(struct xenvif *vif,
 
do {
if (frags >= work_to_do) {
-   netdev_dbg(vif->dev, "Need more frags\n");
+   netdev_err(vif->dev, "Need more frags\n");
+   netbk_fatal_tx_err(vif);
return -frags;
}
 
if (unlikely(frags >= MAX_SKB_FRAGS)) {
-   netdev_dbg(vif->dev, "Too many frags\n");
+   netdev_err(vif->dev, "Too many frags\n");
+   netbk_fatal_tx_err(vif);
return -frags;
}
 
memcpy(txp, RING_GET_REQUEST(>tx, cons + frags),
   sizeof(*txp));
if (txp->size > first->size) {
-   netdev_dbg(vif->dev, "Frags galore\n");
+   netdev_err(vif->dev, "Frag is bigger than frame.\n");
+

Re: Should a swapped out page be deleted from swap cache?

2013-02-19 Thread Hugh Dickins

On Tue, 19 Feb 2013, Ric Mason wrote:
> 
> There is a call of try_to_free_swap in function swap_writepage, if
> swap_writepage is call from shrink_page_list path, PageSwapCache(page) ==
> trure, PageWriteback(page) maybe false, page_swapcount(page) == 0, then will
> delete the page from swap cache and free swap slot, where I miss?

That's correct.  PageWriteback is sure to be false there.  page_swapcount
usually won't be 0 there, but sometimes it will be, and in that case we
do want to delete from swap cache and free the swap slot.

Hugh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 77/81] mm: don't overwrite mm->def_flags in do_mlockall()

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Gerald Schaefer 

commit 9977f0f164d46613288e0b5778eae500dfe06f31 upstream.

With commit 8e72033f2a48 ("thp: make MADV_HUGEPAGE check for
mm->def_flags") the VM_NOHUGEPAGE flag may be set on s390 in
mm->def_flags for certain processes, to prevent future thp mappings.
This would be overwritten by do_mlockall(), which sets it back to 0 with
an optional VM_LOCKED flag set.

To fix this, instead of overwriting mm->def_flags in do_mlockall(), only
the VM_LOCKED flag should be set or cleared.

Signed-off-by: Gerald Schaefer 
Reported-by: Vivek Goyal 
Cc: Andrea Arcangeli 
Cc: Hugh Dickins 
Cc: Martin Schwidefsky 
Cc: Heiko Carstens 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 mm/mlock.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/mlock.c b/mm/mlock.c
index ef726e8..3283272 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -524,11 +524,11 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, 
len)
 static int do_mlockall(int flags)
 {
struct vm_area_struct * vma, * prev = NULL;
-   unsigned int def_flags = 0;
 
if (flags & MCL_FUTURE)
-   def_flags = VM_LOCKED;
-   current->mm->def_flags = def_flags;
+   current->mm->def_flags |= VM_LOCKED;
+   else
+   current->mm->def_flags &= ~VM_LOCKED;
if (flags == MCL_FUTURE)
goto out;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 78/81] x86/mm: Check if PUD is large when validating a kernel address

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Mel Gorman 

commit 0ee364eb316348ddf3e0dfcd986f5f13f528f821 upstream.

A user reported the following oops when a backup process reads
/proc/kcore:

 BUG: unable to handle kernel paging request at bb00ff33b000
 IP: [] kern_addr_valid+0xbe/0x110
 [...]

 Call Trace:
  [] read_kcore+0x17a/0x370
  [] proc_reg_read+0x77/0xc0
  [] vfs_read+0xc7/0x130
  [] sys_read+0x53/0xa0
  [] system_call_fastpath+0x16/0x1b

Investigation determined that the bug triggered when reading
system RAM at the 4G mark. On this system, that was the first
address using 1G pages for the virt->phys direct mapping so the
PUD is pointing to a physical address, not a PMD page.

The problem is that the page table walker in kern_addr_valid() is
not checking pud_large() and treats the physical address as if
it was a PMD.  If it happens to look like pmd_none then it'll
silently fail, probably returning zeros instead of real data. If
the data happens to look like a present PMD though, it will be
walked resulting in the oops above.

This patch adds the necessary pud_large() check.

Unfortunately the problem was not readily reproducible and now
they are running the backup program without accessing
/proc/kcore so the patch has not been validated but I think it
makes sense.

Signed-off-by: Mel Gorman 
Reviewed-by: Rik van Riel 
Reviewed-by: Michal Hocko 
Acked-by: Johannes Weiner 
Cc: linux...@kvack.org
Link: http://lkml.kernel.org/r/20130211145236.gx21...@suse.de
Signed-off-by: Ingo Molnar 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 arch/x86/include/asm/pgtable.h |5 +
 arch/x86/mm/init_64.c  |3 +++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index c3520d7..3f3dd52 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
 }
 
+static inline unsigned long pud_pfn(pud_t pud)
+{
+   return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
 #define pte_page(pte)  pfn_to_page(pte_pfn(pte))
 
 static inline int pmd_large(pmd_t pte)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3baff25..ce42da7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -829,6 +829,9 @@ int kern_addr_valid(unsigned long addr)
if (pud_none(*pud))
return 0;
 
+   if (pud_large(*pud))
+   return pfn_valid(pud_pfn(*pud));
+
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 80/81] efi: Clear EFI_RUNTIME_SERVICES rather than EFI_BOOT by "noefi" boot parameter

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Satoru Takeuchi 

commit 1de63d60cd5b0d33a812efa455d5933bf1564a51 upstream.

There was a serious problem in samsung-laptop that its platform driver is
designed to run under BIOS and running under EFI can cause the machine to
become bricked or can cause Machine Check Exceptions.

Discussion about this problem:
https://bugs.launchpad.net/ubuntu-cdimage/+bug/1040557
https://bugzilla.kernel.org/show_bug.cgi?id=47121

The patches to fix this problem:
efi: Make 'efi_enabled' a function to query EFI facilities
83e68189745ad931c2afd45d8ee3303929233e7f

samsung-laptop: Disable on EFI hardware
e0094244e41c4d0c7ad69920681972fc45d8ce34

Unfortunately this problem comes back again if users specify "noefi" option.
This parameter clears EFI_BOOT and that driver continues to run even if running
under EFI. Refer to the document, this parameter should clear
EFI_RUNTIME_SERVICES instead.

Documentation/kernel-parameters.txt:
===
...
noefi   [X86] Disable EFI runtime services support.
...
===

Documentation/x86/x86_64/uefi.txt:
===
...
- If some or all EFI runtime services don't work, you can try following
  kernel command line parameters to turn off some or all EFI runtime
  services.
noefi   turn off all EFI runtime services
...
===

Signed-off-by: Satoru Takeuchi 
Link: http://lkml.kernel.org/r/511c2c04.2070...@jp.fujitsu.com
Cc: Matt Fleming 
Signed-off-by: H. Peter Anvin 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 arch/x86/platform/efi/efi.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 6fcd4ad..3705bb0 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -86,7 +86,7 @@ EXPORT_SYMBOL(efi_enabled);
 
 static int __init setup_noefi(char *arg)
 {
-   clear_bit(EFI_BOOT, _efi_facility);
+   clear_bit(EFI_RUNTIME_SERVICES, _efi_facility);
return 0;
 }
 early_param("noefi", setup_noefi);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 81/81] mm: fix pageblock bitmap allocation

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Linus Torvalds 

commit 7c45512df987c5619db041b5c9b80d281e26d3db upstream.

Commit c060f943d092 ("mm: use aligned zone start for pfn_to_bitidx
calculation") fixed out calculation of the index into the pageblock
bitmap when a !SPARSEMEM zome was not aligned to pageblock_nr_pages.

However, the _allocation_ of that bitmap had never taken this alignment
requirement into accout, so depending on the exact size and alignment of
the zone, the use of that index could then access past the allocation,
resulting in some very subtle memory corruption.

This was reported (and bisected) by Ingo Molnar: one of his random
config builds would hang with certain very specific kernel command line
options.

In the meantime, commit c060f943d092 has been marked for stable, so this
fix needs to be back-ported to the stable kernels that backported the
commit to use the right alignment.

Bisected-and-tested-by: Ingo Molnar 
Acked-by: Mel Gorman 
Signed-off-by: Linus Torvalds 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 mm/page_alloc.c |   15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 007bf3b..0ed96c7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4272,10 +4272,11 @@ static void __meminit calculate_node_totalpages(struct 
pglist_data *pgdat,
  * round what is now in bits to nearest long in bits, then return it in
  * bytes.
  */
-static unsigned long __init usemap_size(unsigned long zonesize)
+static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned 
long zonesize)
 {
unsigned long usemapsize;
 
+   zonesize += zone_start_pfn & (pageblock_nr_pages-1);
usemapsize = roundup(zonesize, pageblock_nr_pages);
usemapsize = usemapsize >> pageblock_order;
usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4285,17 +4286,19 @@ static unsigned long __init usemap_size(unsigned long 
zonesize)
 }
 
 static void __init setup_usemap(struct pglist_data *pgdat,
-   struct zone *zone, unsigned long zonesize)
+   struct zone *zone,
+   unsigned long zone_start_pfn,
+   unsigned long zonesize)
 {
-   unsigned long usemapsize = usemap_size(zonesize);
+   unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
zone->pageblock_flags = NULL;
if (usemapsize)
zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
   usemapsize);
 }
 #else
-static inline void setup_usemap(struct pglist_data *pgdat,
-   struct zone *zone, unsigned long zonesize) {}
+static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
+   unsigned long zone_start_pfn, unsigned long 
zonesize) {}
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4414,7 +4417,7 @@ static void __paginginit free_area_init_core(struct 
pglist_data *pgdat,
continue;
 
set_pageblock_order();
-   setup_usemap(pgdat, zone, size);
+   setup_usemap(pgdat, zone, zone_start_pfn, size);
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 79/81] PCI/PM: Clean up PME state when removing a device

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: "Rafael J. Wysocki" 

commit 249bfb83cf8ba658955f0245ac3981d941f746ee upstream.

Devices are added to pci_pme_list when drivers use pci_enable_wake()
or pci_wake_from_d3(), but they aren't removed from the list unless
the driver explicitly disables wakeup.  Many drivers never disable
wakeup, so their devices remain on the list even after they are
removed, e.g., via hotplug.  A subsequent PME poll will oops when
it tries to touch the device.

This patch disables PME# on a device before removing it, which removes
the device from pci_pme_list.  This is safe even if the device never
had PME# enabled.

This oops can be triggered by unplugging a Thunderbolt ethernet adapter
on a Macbook Pro, as reported by Daniel below.

[bhelgaas: changelog]
Reference: 
http://lkml.kernel.org/r/camvg2svg21yim1wkh4_2pen2n+cr2-zv7tbh3gj+8mwevzj...@mail.gmail.com
Reported-and-tested-by: Daniel J Blueman 
Signed-off-by: Rafael J. Wysocki 
Signed-off-by: Bjorn Helgaas 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/pci/remove.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index fd77e2b..eae55c7 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -19,6 +19,8 @@ static void pci_free_resources(struct pci_dev *dev)
 
 static void pci_stop_dev(struct pci_dev *dev)
 {
+   pci_pme_active(dev, false);
+
if (dev->is_added) {
pci_proc_detach_device(dev);
pci_remove_sysfs_dev_files(dev);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 76/81] drivers/rtc/rtc-pl031.c: restore ST variant functionality

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Linus Walleij 

commit 3399cfb5df9594495b876d1843a7165f77366b2b upstream.

Commit e7e034e18a0a ("drivers/rtc/rtc-pl031.c: fix the missing operation
on enable") accidentally broke the ST variants of PL031.

The bit that is being poked as "clockwatch" enable bit for the ST
variants does the work of bit 0 on this variant.  Bit 0 is used for a
clock divider on the ST variants, and setting it to 1 will affect
timekeeping in a very bad way.

Signed-off-by: Linus Walleij 
Acked-by: Haojian Zhuang 
Cc: Mian Yousaf KAUKAB 
Cc: Srinidhi Kasagar 
Cc: Alessandro Zummo 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
[ herton: adjust context ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/rtc/rtc-pl031.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-pl031.c b/drivers/rtc/rtc-pl031.c
index c42054b..b232996 100644
--- a/drivers/rtc/rtc-pl031.c
+++ b/drivers/rtc/rtc-pl031.c
@@ -336,7 +336,9 @@ static int pl031_probe(struct amba_device *adev, const 
struct amba_id *id)
/* Enable the clockwatch on ST Variants */
if (ldata->hw_designer == AMBA_VENDOR_ST)
data |= RTC_CR_CWEN;
-   writel(data | RTC_CR_EN, ldata->base + RTC_CR);
+   else
+   data |= RTC_CR_EN;
+   writel(data, ldata->base + RTC_CR);
 
/*
 * On ST PL031 variants, the RTC reset value does not provide correct
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 74/81] x86: Do not leak kernel page mapping locations

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Kees Cook 

commit e575a86fdc50d013bf3ad3aa81d9100e8e6cc60d upstream.

Without this patch, it is trivial to determine kernel page
mappings by examining the error code reported to dmesg[1].
Instead, declare the entire kernel memory space as a violation
of a present page.

Additionally, since show_unhandled_signals is enabled by
default, switch branch hinting to the more realistic
expectation, and unobfuscate the setting of the PF_PROT bit to
improve readability.

[1] http://vulnfactory.org/blog/2013/02/06/a-linux-memory-trick/

Reported-by: Dan Rosenberg 
Suggested-by: Brad Spengler 
Signed-off-by: Kees Cook 
Acked-by: H. Peter Anvin 
Cc: Paul E. McKenney 
Cc: Frederic Weisbecker 
Cc: Eric W. Biederman 
Cc: Linus Torvalds 
Cc: Andrew Morton 
Cc: Peter Zijlstra 
Link: http://lkml.kernel.org/r/20130207174413.ga12...@www.outflux.net
Signed-off-by: Ingo Molnar 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 arch/x86/mm/fault.c |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d..c6b10e2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -747,13 +747,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned 
long error_code,
return;
}
 #endif
+   /* Kernel addresses are always protection faults: */
+   if (address >= TASK_SIZE)
+   error_code |= PF_PROT;
 
-   if (unlikely(show_unhandled_signals))
+   if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
 
-   /* Kernel addresses are always protection faults: */
tsk->thread.cr2 = address;
-   tsk->thread.error_code  = error_code | (address >= TASK_SIZE);
+   tsk->thread.error_code  = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
 
force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 70/81] tg3: Avoid null pointer dereference in tg3_interrupt in netconsole mode

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Nithin Nayak Sujir 

commit 9c13cb8bb477a83b9a3c9e5a5478a4e21294a760 upstream.

When netconsole is enabled, logging messages generated during tg3_open
can result in a null pointer dereference for the uninitialized tg3
status block. Use the irq_sync flag to disable polling in the early
stages. irq_sync is cleared when the driver is enabling interrupts after
all initialization is completed.

Signed-off-by: Nithin Nayak Sujir 
Signed-off-by: Michael Chan 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/broadcom/tg3.c |4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/tg3.c 
b/drivers/net/ethernet/broadcom/tg3.c
index 15f8b00..82a8ed2 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6594,6 +6594,9 @@ static void tg3_poll_controller(struct net_device *dev)
int i;
struct tg3 *tp = netdev_priv(dev);
 
+   if (tg3_irq_sync(tp))
+   return;
+
for (i = 0; i < tp->irq_cnt; i++)
tg3_interrupt(tp->napi[i].irq_vec, >napi[i]);
 }
@@ -15556,6 +15559,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
tp->pm_cap = pm_cap;
tp->rx_mode = TG3_DEF_RX_MODE;
tp->tx_mode = TG3_DEF_TX_MODE;
+   tp->irq_sync = 1;
 
if (tg3_debug > 0)
tp->msg_enable = tg3_debug;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 73/81] s390/timer: avoid overflow when programming clock comparator

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Heiko Carstens 

commit d911e03d097bdc01363df5d81c43f69432eb785c upstream.

Since ed4f209 "s390/time: fix sched_clock() overflow" a new helper function
is used to avoid overflows when converting TOD format values to nanosecond
values.
The kvm interrupt code formerly however only worked by accident because of
an overflow. It tried to program a timer that would expire in more than ~29
years. Because of the old TOD-to-nanoseconds overflow bug the real expiry
value however was much smaller, but now it isn't anymore.
This however triggers yet another bug in the function that programs the clock
comparator s390_next_ktime(): if the absolute "expires" value is after 2042
this will result in an overflow and the programmed value is lower than the
current TOD value which immediatly triggers a clock comparator (= timer)
interrupt.
Since the timer isn't expired it will be programmed immediately again and so
on... the result is a dead system.
To fix this simply program the maximum possible value if an overflow is
detected.

Reported-by: Christian Borntraeger 
Tested-by: Christian Borntraeger 
Signed-off-by: Heiko Carstens 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 arch/s390/kernel/time.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index c5531db..747ab28 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -121,6 +121,9 @@ static int s390_next_ktime(ktime_t expires,
nsecs = ktime_to_ns(ktime_add(timespec_to_ktime(ts), expires));
do_div(nsecs, 125);
S390_lowcore.clock_comparator = sched_clock_base_cc + (nsecs << 9);
+   /* Program the maximum value if we have an overflow (== year 2042) */
+   if (unlikely(S390_lowcore.clock_comparator < sched_clock_base_cc))
+   S390_lowcore.clock_comparator = -1ULL;
set_clock_comparator(S390_lowcore.clock_comparator);
return 0;
 }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: slab: odd BUG on kzalloc

2013-02-19 Thread Dave Jones

On Tue, Feb 19, 2013 at 01:18:25PM -0500, Sasha Levin wrote:

 > >> [  169.930103] ---[ end trace 4d135f3def21b4bd ]---
 > >>
 > >> The code translates to the following in fs/pipe.c:alloc_pipe_info :
 > >>
 > >> pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
 > >> if (pipe) {
 > >> pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * 
 > >> PIPE_DEF_BUFFERS, GFP_KERNEL); <=== this
 > >> if (pipe->bufs) {
 > >> init_waitqueue_head(>wait);
 > 
 > Looks like it's not specific to pipe(). I've also got this one now:
 > 
 > Since I've managed to reproduce it, I'll go ahead and add slub_debug and see 
 > what it tells us.

I'm curious, did you recently upgrade gcc, or other parts of the toolchain ?
This, and one of the other 'weird' bugs you reported recently have me wondering
if perhaps you're seeing a compiler bug.

Dave

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 67/81] tcp: frto should not set snd_cwnd to 0

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Dumazet 

commit 2e5f421211ff76c17130b4597bc06df4eeead24f upstream.

Commit 9dc274151a548 (tcp: fix ABC in tcp_slow_start())
uncovered a bug in FRTO code :
tcp_process_frto() is setting snd_cwnd to 0 if the number
of in flight packets is 0.

As Neal pointed out, if no packet is in flight we lost our
chance to disambiguate whether a loss timeout was spurious.

We should assume it was a proper loss.

Reported-by: Pasi Kärkkäinen 
Signed-off-by: Neal Cardwell 
Signed-off-by: Eric Dumazet 
Cc: Ilpo Järvinen 
Cc: Yuchung Cheng 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/ipv4/tcp_input.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 19c430c..6affa92 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3695,7 +3695,8 @@ static bool tcp_process_frto(struct sock *sk, int flag)
((tp->frto_counter >= 2) && (flag & FLAG_RETRANS_DATA_ACKED)))
tp->undo_marker = 0;
 
-   if (!before(tp->snd_una, tp->frto_highmark)) {
+   if (!before(tp->snd_una, tp->frto_highmark) ||
+   !tcp_packets_in_flight(tp)) {
tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 2 : 3), flag);
return true;
}
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 65/81] xen/netback: free already allocated memory on failure in xen_netbk_get_requests

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Ian Campbell 

commit 4cc7c1cb7b11b6f3515bd9075527576a1eecc4aa upstream.

Signed-off-by: Ian Campbell 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/xen-netback/netback.c |   13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index e7913e0..c503a58 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -949,7 +949,7 @@ static struct gnttab_copy *xen_netbk_get_requests(struct 
xen_netbk *netbk,
pending_idx = netbk->pending_ring[index];
page = xen_netbk_alloc_page(netbk, skb, pending_idx);
if (!page)
-   return NULL;
+   goto err;
 
gop->source.u.ref = txp->gref;
gop->source.domid = vif->domid;
@@ -971,6 +971,17 @@ static struct gnttab_copy *xen_netbk_get_requests(struct 
xen_netbk *netbk,
}
 
return gop;
+err:
+   /* Unwind, freeing all pages and sending error responses. */
+   while (i-- > start) {
+   xen_netbk_idx_release(netbk, frag_get_pending_idx([i]),
+ XEN_NETIF_RSP_ERROR);
+   }
+   /* The head too, if necessary. */
+   if (start)
+   xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
+
+   return NULL;
 }
 
 static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 61/81] net: sctp: sctp_setsockopt_auth_key: use kzfree instead of kfree

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Daniel Borkmann 

commit 6ba542a291a5e558603ac51cda9bded347ce7627 upstream.

In sctp_setsockopt_auth_key, we create a temporary copy of the user
passed shared auth key for the endpoint or association and after
internal setup, we free it right away. Since it's sensitive data, we
should zero out the key before returning the memory back to the
allocator. Thus, use kzfree instead of kfree, just as we do in
sctp_auth_key_put().

Signed-off-by: Daniel Borkmann 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/sctp/socket.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 944cfce..957bb6e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3375,7 +3375,7 @@ static int sctp_setsockopt_auth_key(struct sock *sk,
 
ret = sctp_auth_set_key(sctp_sk(sk)->ep, asoc, authkey);
 out:
-   kfree(authkey);
+   kzfree(authkey);
return ret;
 }
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 64/81] xen/netback: don't leak pages on failure in xen_netbk_tx_check_gop.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Matthew Daley 

commit 7d5145d8eb2b9791533ffe4dc003b129b9696c48 upstream.

Signed-off-by: Matthew Daley 
Reviewed-by: Konrad Rzeszutek Wilk 
Acked-by: Ian Campbell 
Acked-by: Jan Beulich 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/xen-netback/netback.c |   38 +
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/drivers/net/xen-netback/netback.c 
b/drivers/net/xen-netback/netback.c
index ae321c0..e7913e0 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -146,7 +146,8 @@ void xen_netbk_remove_xenvif(struct xenvif *vif)
atomic_dec(>netfront_count);
 }
 
-static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx);
+static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx,
+ u8 status);
 static void make_tx_response(struct xenvif *vif,
 struct xen_netif_tx_request *txp,
 s8   st);
@@ -978,30 +979,20 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk,
 {
struct gnttab_copy *gop = *gopp;
u16 pending_idx = *((u16 *)skb->data);
-   struct pending_tx_info *pending_tx_info = netbk->pending_tx_info;
-   struct xenvif *vif = pending_tx_info[pending_idx].vif;
-   struct xen_netif_tx_request *txp;
struct skb_shared_info *shinfo = skb_shinfo(skb);
int nr_frags = shinfo->nr_frags;
int i, err, start;
 
/* Check status of header. */
err = gop->status;
-   if (unlikely(err)) {
-   pending_ring_idx_t index;
-   index = pending_index(netbk->pending_prod++);
-   txp = _tx_info[pending_idx].req;
-   make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
-   netbk->pending_ring[index] = pending_idx;
-   xenvif_put(vif);
-   }
+   if (unlikely(err))
+   xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
 
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(>frags[0]) == pending_idx);
 
for (i = start; i < nr_frags; i++) {
int j, newerr;
-   pending_ring_idx_t index;
 
pending_idx = frag_get_pending_idx(>frags[i]);
 
@@ -1010,16 +1001,12 @@ static int xen_netbk_tx_check_gop(struct xen_netbk 
*netbk,
if (likely(!newerr)) {
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
-   xen_netbk_idx_release(netbk, pending_idx);
+   xen_netbk_idx_release(netbk, pending_idx, 
XEN_NETIF_RSP_OKAY);
continue;
}
 
/* Error on this fragment: respond to client with an error. */
-   txp = >pending_tx_info[pending_idx].req;
-   make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
-   index = pending_index(netbk->pending_prod++);
-   netbk->pending_ring[index] = pending_idx;
-   xenvif_put(vif);
+   xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR);
 
/* Not the first error? Preceding frags already invalidated. */
if (err)
@@ -1027,10 +1014,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk 
*netbk,
 
/* First error: invalidate header and preceding fragments. */
pending_idx = *((u16 *)skb->data);
-   xen_netbk_idx_release(netbk, pending_idx);
+   xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
for (j = start; j < i; j++) {
pending_idx = frag_get_pending_idx(>frags[j]);
-   xen_netbk_idx_release(netbk, pending_idx);
+   xen_netbk_idx_release(netbk, pending_idx, 
XEN_NETIF_RSP_OKAY);
}
 
/* Remember the error: invalidate all subsequent fragments. */
@@ -1064,7 +1051,7 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, 
struct sk_buff *skb)
 
/* Take an extra reference to offset xen_netbk_idx_release */
get_page(netbk->mmap_pages[pending_idx]);
-   xen_netbk_idx_release(netbk, pending_idx);
+   xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY);
}
 }
 
@@ -1447,7 +1434,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk)
txp->size -= data_len;
} else {
/* Schedule a response immediately. */
-   xen_netbk_idx_release(netbk, pending_idx);
+   xen_netbk_idx_release(netbk, pending_idx, 
XEN_NETIF_RSP_OKAY);

[PATCH 59/81] atm/iphase: rename fregt_t -> ffreg_t

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Heiko Carstens 

commit ab54ee80aa7585f9666ff4dd665441d7ce41f1e8 upstream.

We have conflicting type qualifiers for "freg_t" in s390's ptrace.h and the
iphase atm device driver, which causes the compile error below.
Unfortunately the s390 typedef can't be renamed, since it's a user visible api,
nor can I change the include order in s390 code to avoid the conflict.

So simply rename the iphase typedef to a new name. Fixes this compile error:

In file included from drivers/atm/iphase.c:66:0:
drivers/atm/iphase.h:639:25: error: conflicting type qualifiers for 'freg_t'
In file included from next/arch/s390/include/asm/ptrace.h:9:0,
 from next/arch/s390/include/asm/lowcore.h:12,
 from next/arch/s390/include/asm/thread_info.h:30,
 from include/linux/thread_info.h:54,
 from include/linux/preempt.h:9,
 from include/linux/spinlock.h:50,
 from include/linux/seqlock.h:29,
 from include/linux/time.h:5,
 from include/linux/stat.h:18,
 from include/linux/module.h:10,
 from drivers/atm/iphase.c:43:
next/arch/s390/include/uapi/asm/ptrace.h:197:3: note: previous declaration of 
'freg_t' was here

Signed-off-by: Heiko Carstens 
Acked-by: chas williams - CONTRACTOR 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/atm/iphase.h |  146 +-
 1 file changed, 73 insertions(+), 73 deletions(-)

diff --git a/drivers/atm/iphase.h b/drivers/atm/iphase.h
index 6a0955e..53ecac5 100644
--- a/drivers/atm/iphase.h
+++ b/drivers/atm/iphase.h
@@ -636,82 +636,82 @@ struct rx_buf_desc {
 #define SEG_BASE IPHASE5575_FRAG_CONTROL_REG_BASE  
 #define REASS_BASE IPHASE5575_REASS_CONTROL_REG_BASE  
 
-typedef volatile u_int  freg_t;
+typedef volatile u_int ffreg_t;
 typedef u_int   rreg_t;
 
 typedef struct _ffredn_t {
-freg_t  idlehead_high;  /* Idle cell header (high)  */
-freg_t  idlehead_low;   /* Idle cell header (low)   */
-freg_t  maxrate;/* Maximum rate */
-freg_t  stparms;/* Traffic Management Parameters*/
-freg_t  abrubr_abr; /* ABRUBR Priority Byte 1, TCR Byte 0   */
-freg_t  rm_type;/*  */
-u_int   filler5[0x17 - 0x06];
-freg_t  cmd_reg;/* Command register */
-u_int   filler18[0x20 - 0x18];
-freg_t  cbr_base;   /* CBR Pointer Base */
-freg_t  vbr_base;   /* VBR Pointer Base */
-freg_t  abr_base;   /* ABR Pointer Base */
-freg_t  ubr_base;   /* UBR Pointer Base */
-u_int   filler24;
-freg_t  vbrwq_base; /* VBR Wait Queue Base  */
-freg_t  abrwq_base; /* ABR Wait Queue Base  */
-freg_t  ubrwq_base; /* UBR Wait Queue Base  */
-freg_t  vct_base;   /* Main VC Table Base   */
-freg_t  vcte_base;  /* Extended Main VC Table Base  */
-u_int   filler2a[0x2C - 0x2A];
-freg_t  cbr_tab_beg;/* CBR Table Begin  */
-freg_t  cbr_tab_end;/* CBR Table End*/
-freg_t  cbr_pointer;/* CBR Pointer  */
-u_int   filler2f[0x30 - 0x2F];
-freg_t  prq_st_adr; /* Packet Ready Queue Start Address */
-freg_t  prq_ed_adr; /* Packet Ready Queue End Address   */
-freg_t  prq_rd_ptr; /* Packet Ready Queue read pointer  */
-freg_t  prq_wr_ptr; /* Packet Ready Queue write pointer */
-freg_t  tcq_st_adr; /* Transmit Complete Queue Start Address*/
-freg_t  tcq_ed_adr; /* Transmit Complete Queue End Address  */
-freg_t  tcq_rd_ptr; /* Transmit Complete Queue read pointer */
-freg_t  tcq_wr_ptr; /* Transmit Complete Queue write pointer*/
-u_int   filler38[0x40 - 0x38];
-freg_t  queue_base; /* Base address for PRQ and TCQ */
-freg_t  desc_base;  /* Base address of descriptor table */
-u_int   filler42[0x45 - 0x42];
-freg_t  mode_reg_0; /* Mode register 0  */
-freg_t  mode_reg_1; /* Mode register 1  */
-freg_t  intr_status_reg;/* Interrupt Status register*/
-freg_t  mask_reg;   /* Mask Register*/
-freg_t  cell_ctr_high1; /* Total cell transfer count (high) */
-freg_t  cell_ctr_lo1;   /* Total cell transfer count (low)  */
-

[PATCH 57/81] via-rhine: Fix bugs in NAPI support.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: "David S. Miller" 

commit 559bcac35facfed49ab4f408e162971612dcfdf3 upstream.

1) rhine_tx() should use dev_kfree_skb() not dev_kfree_skb_irq()

2) rhine_slow_event_task's NAPI triggering logic is racey, it
   should just hit the interrupt mask register.  This is the
   same as commit 7dbb491878a2c51d372a8890fa45a8ff80358af1
   ("r8169: avoid NAPI scheduling delay.") made to fix the same
   problem in the r8169 driver.  From Francois Romieu.

Reported-by: Jamie Gloudon 
Tested-by: Jamie Gloudon 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/via/via-rhine.c |8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/via/via-rhine.c 
b/drivers/net/ethernet/via/via-rhine.c
index 0459c09..046526e0 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1802,7 +1802,7 @@ static void rhine_tx(struct net_device *dev)
 rp->tx_skbuff[entry]->len,
 PCI_DMA_TODEVICE);
}
-   dev_kfree_skb_irq(rp->tx_skbuff[entry]);
+   dev_kfree_skb(rp->tx_skbuff[entry]);
rp->tx_skbuff[entry] = NULL;
entry = (++rp->dirty_tx) % TX_RING_SIZE;
}
@@ -2011,11 +2011,7 @@ static void rhine_slow_event_task(struct work_struct 
*work)
if (intr_status & IntrPCIErr)
netif_warn(rp, hw, dev, "PCI error\n");
 
-   napi_disable(>napi);
-   rhine_irq_disable(rp);
-   /* Slow and safe. Consider __napi_schedule as a replacement ? */
-   napi_enable(>napi);
-   napi_schedule(>napi);
+   iowrite16(RHINE_EVENT & 0x, rp->base + IntrEnable);
 
 out_unlock:
mutex_unlock(>task_lock);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 55/81] pktgen: correctly handle failures when adding a device

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Cong Wang 

commit 604dfd6efc9b79bce432f2394791708d8e8f6efc upstream.

The return value of pktgen_add_device() is not checked, so
even if we fail to add some device, for example, non-exist one,
we still see "OK:...". This patch fixes it.

After this patch, I got:

# echo "add_device non-exist" > /proc/net/pktgen/kpktgend_0
-bash: echo: write error: No such device
# cat /proc/net/pktgen/kpktgend_0
Running:
Stopped:
Result: ERROR: can not add device non-exist
# echo "add_device eth0" > /proc/net/pktgen/kpktgend_0
# cat /proc/net/pktgen/kpktgend_0
Running:
Stopped: eth0
Result: OK: add_device=eth0

(Candidate for -stable)

Cc: David S. Miller 
Signed-off-by: Cong Wang 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/core/pktgen.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index aa278cd..2a42802 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1797,10 +1797,13 @@ static ssize_t pktgen_thread_write(struct file *file,
return -EFAULT;
i += len;
mutex_lock(_thread_lock);
-   pktgen_add_device(t, f);
+   ret = pktgen_add_device(t, f);
mutex_unlock(_thread_lock);
-   ret = count;
-   sprintf(pg_result, "OK: add_device=%s", f);
+   if (!ret) {
+   ret = count;
+   sprintf(pg_result, "OK: add_device=%s", f);
+   } else
+   sprintf(pg_result, "ERROR: can not add device %s", f);
goto out;
}
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 52/81] netxen: fix off by one bug in netxen_release_tx_buffer()

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Eric Dumazet 

commit a05948f296ce103989b28a2606e47d2e287c3c89 upstream.

Christoph Paasch found netxen could trigger a BUG in its dismantle
phase, in netxen_release_tx_buffer(), using full size TSO packets.

cmd_buf->frag_count includes the skb->data part, so the loop must
start at index 1 instead of 0, or else we can make an out
of bound access to cmd_buff->frag_array[MAX_SKB_FRAGS + 2]

Christoph provided the fixes in netxen_map_tx_skb() function.
In case of a dma mapping error, its better to clear the dma fields
so that we don't try to unmap them again in netxen_release_tx_buffer()

Reported-by: Christoph Paasch 
Signed-off-by: Eric Dumazet 
Tested-by: Christoph Paasch 
Cc: Sony Chacko 
Cc: Rajesh Borundia 
Signed-off-by: Christoph Paasch 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 .../net/ethernet/qlogic/netxen/netxen_nic_init.c   |2 +-
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c 
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
index 8694124..fdddfcc 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_init.c
@@ -144,7 +144,7 @@ void netxen_release_tx_buffers(struct netxen_adapter 
*adapter)
 buffrag->length, PCI_DMA_TODEVICE);
buffrag->dma = 0ULL;
}
-   for (j = 0; j < cmd_buf->frag_count; j++) {
+   for (j = 1; j < cmd_buf->frag_count; j++) {
buffrag++;
if (buffrag->dma) {
pci_unmap_page(adapter->pdev, buffrag->dma,
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c 
b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index a77c558..d6a8218 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1963,10 +1963,12 @@ unwind:
while (--i >= 0) {
nf = >frag_array[i+1];
pci_unmap_page(pdev, nf->dma, nf->length, PCI_DMA_TODEVICE);
+   nf->dma = 0ULL;
}
 
nf = >frag_array[0];
pci_unmap_single(pdev, nf->dma, skb_headlen(skb), PCI_DMA_TODEVICE);
+   nf->dma = 0ULL;
 
 out_err:
return -ENOMEM;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 46/81] ipv6: fix header length calculation in ip6_append_data()

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Romain KUNTZ 

commit 7efdba5bd9a2f3e2059beeb45c9fa55eefe1bced upstream.

Commit 299b0767 (ipv6: Fix IPsec slowpath fragmentation problem)
has introduced a error in the header length calculation that
provokes corrupted packets when non-fragmentable extensions
headers (Destination Option or Routing Header Type 2) are used.

rt->rt6i_nfheader_len is the length of the non-fragmentable
extension header, and it should be substracted to
rt->dst.header_len, and not to exthdrlen, as it was done before
commit 299b0767.

This patch reverts to the original and correct behavior. It has
been successfully tested with and without IPsec on packets
that include non-fragmentable extensions headers.

Signed-off-by: Romain Kuntz 
Acked-by: Steffen Klassert 
Signed-off-by: David S. Miller 
[ herton: adjust context ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/ipv6/ip6_output.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index decc21d1..4703c70 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1293,10 +1293,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void 
*from, char *to,
cork->length = 0;
sk->sk_sndmsg_page = NULL;
sk->sk_sndmsg_off = 0;
-   exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
+   exthdrlen = (opt ? opt->opt_flen : 0);
length += exthdrlen;
transhdrlen += exthdrlen;
-   dst_exthdrlen = rt->dst.header_len;
+   dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
} else {
rt = (struct rt6_info *)cork->dst;
fl6 = >cork.fl.u.ip6;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 49/81] net/mlx4_en: Fix bridged vSwitch configuration for non SRIOV mode

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Yan Burman 

commit 213815a1e6ae70b9648483b110bc5081795f99e8 upstream.

Commit 5b4c4d36860e "mlx4_en: Allow communication between functions on
same host" introduced a regression under which a bridge acting as vSwitch
whose uplink is an mlx4 Ethernet device become non-operative in native
(non sriov) mode. This happens since broadcast ARP requests sent by VMs
were loopback-ed by the HW and hence the bridge learned VM source MACs
on both the VM and the uplink ports.

The fix is to place the DMAC in the send WQE only under SRIOV/eSwitch
configuration or when the device is in selftest.

Reviewed-by: Or Gerlitz 
Signed-off-by: Yan Burman 
Signed-off-by: Amir Vadai 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |   13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c 
b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 019d856..8ba03c9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -633,10 +633,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct 
net_device *dev)
ring->tx_csum++;
}
 
-   /* Copy dst mac address to wqe */
-   ethh = (struct ethhdr *)skb->data;
-   tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 *)ethh->h_dest);
-   tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
+   if (mlx4_is_mfunc(mdev->dev) || priv->validate_loopback) {
+   /* Copy dst mac address to wqe. This allows loopback in eSwitch,
+* so that VFs and PF can communicate with each other
+*/
+   ethh = (struct ethhdr *)skb->data;
+   tx_desc->ctrl.srcrb_flags16[0] = get_unaligned((__be16 
*)ethh->h_dest);
+   tx_desc->ctrl.imm = get_unaligned((__be32 *)(ethh->h_dest + 2));
+   }
+
/* Handle LSO (TSO) packets */
if (lso_header_size) {
/* Mark opcode as LSO */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 43/81] net: prevent setting ttl=0 via IP_TTL

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Cong Wang 

commit c9be4a5c49cf51cc70a993f004c5bb30067a65ce upstream.

A regression is introduced by the following commit:

commit 4d52cfbef6266092d535237ba5a4b981458ab171
Author: Eric Dumazet 
Date:   Tue Jun 2 00:42:16 2009 -0700

net: ipv4/ip_sockglue.c cleanups

Pure cleanups

but it is not a pure cleanup...

-   if (val != -1 && (val < 1 || val>255))
+   if (val != -1 && (val < 0 || val > 255))

Since there is no reason provided to allow ttl=0, change it back.

Reported-by: nitin padalia 
Cc: nitin padalia 
Cc: Eric Dumazet 
Cc: David S. Miller 
Signed-off-by: Cong Wang 
Acked-by: Eric Dumazet 
Signed-off-by: David S. Miller 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 net/ipv4/ip_sockglue.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8285f00..aabeb7b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -589,7 +589,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_TTL:
if (optlen < 1)
goto e_inval;
-   if (val != -1 && (val < 0 || val > 255))
+   if (val != -1 && (val < 1 || val > 255))
goto e_inval;
inet->uc_ttl = val;
break;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 42/81] be2net: Fix to trim skb for padded vlan packets to workaround an ASIC Bug

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Somnath Kotur 

commit 93040ae5cc8dcc893eca4a4366dc8415af278edf upstream.

Fixed spelling error in a comment as pointed out by DaveM.
Also refactored existing code a bit to provide placeholders for another ASIC
Bug workaround that will be checked-in soon after this.

Signed-off-by: Somnath Kotur 
Signed-off-by: David S. Miller 
Cc: Jacek Luczak 
[ herton: adjust context ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/net/ethernet/emulex/benet/be.h  |5 +++
 drivers/net/ethernet/emulex/benet/be_main.c |   56 ---
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be.h 
b/drivers/net/ethernet/emulex/benet/be.h
index c5c4c0e..793ee6b 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -573,6 +573,11 @@ static inline u8 is_udp_pkt(struct sk_buff *skb)
return val;
 }
 
+static inline bool is_ipv4_pkt(struct sk_buff *skb)
+{
+   return skb->protocol == ntohs(ETH_P_IP) && ip_hdr(skb)->version == 4;
+}
+
 static inline void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
 {
u32 addr;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c 
b/drivers/net/ethernet/emulex/benet/be_main.c
index bd5cf7e..dc36f5c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -576,6 +576,11 @@ static inline u16 be_get_tx_vlan_tag(struct be_adapter 
*adapter,
return vlan_tag;
 }
 
+static int be_vlan_tag_chk(struct be_adapter *adapter, struct sk_buff *skb)
+{
+   return vlan_tx_tag_present(skb) || adapter->pvid;
+}
+
 static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb 
*hdr,
struct sk_buff *skb, u32 wrb_cnt, u32 len)
 {
@@ -703,33 +708,56 @@ dma_err:
return 0;
 }
 
+static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
+struct sk_buff *skb)
+{
+   u16 vlan_tag = 0;
+
+   skb = skb_share_check(skb, GFP_ATOMIC);
+   if (unlikely(!skb))
+   return skb;
+
+   if (vlan_tx_tag_present(skb)) {
+   vlan_tag = be_get_tx_vlan_tag(adapter, skb);
+   __vlan_put_tag(skb, vlan_tag);
+   skb->vlan_tci = 0;
+   }
+
+   return skb;
+}
+
 static netdev_tx_t be_xmit(struct sk_buff *skb,
struct net_device *netdev)
 {
struct be_adapter *adapter = netdev_priv(netdev);
struct be_tx_obj *txo = >tx_obj[skb_get_queue_mapping(skb)];
struct be_queue_info *txq = >q;
+   struct iphdr *ip = NULL;
u32 wrb_cnt = 0, copied = 0;
-   u32 start = txq->head;
+   u32 start = txq->head, eth_hdr_len;
bool dummy_wrb, stopped = false;
 
-   /* For vlan tagged pkts, BE
-* 1) calculates checksum even when CSO is not requested
-* 2) calculates checksum wrongly for padded pkt less than
-* 60 bytes long.
-* As a workaround disable TX vlan offloading in such cases.
+   eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
+   VLAN_ETH_HLEN : ETH_HLEN;
+
+   /* HW has a bug which considers padding bytes as legal
+* and modifies the IPv4 hdr's 'tot_len' field
 */
-   if (unlikely(vlan_tx_tag_present(skb) &&
-(skb->ip_summed != CHECKSUM_PARTIAL || skb->len <= 60))) {
-   skb = skb_share_check(skb, GFP_ATOMIC);
-   if (unlikely(!skb))
-   goto tx_drop;
+   if (skb->len <= 60 && be_vlan_tag_chk(adapter, skb) &&
+   is_ipv4_pkt(skb)) {
+   ip = (struct iphdr *)ip_hdr(skb);
+   pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
+   }
 
-   skb = __vlan_put_tag(skb, be_get_tx_vlan_tag(adapter, skb));
+   /* HW has a bug wherein it will calculate CSUM for VLAN
+* pkts even though it is disabled.
+* Manually insert VLAN in pkt.
+*/
+   if (skb->ip_summed != CHECKSUM_PARTIAL &&
+   be_vlan_tag_chk(adapter, skb)) {
+   skb = be_insert_vlan_in_pkt(adapter, skb);
if (unlikely(!skb))
goto tx_drop;
-
-   skb->vlan_tci = 0;
}
 
wrb_cnt = wrb_cnt_for_skb(adapter, skb, _wrb);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 39/81] virtio_console: Don't access uninitialized data.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: =?UTF-8?q?Sjur=20Br=C3=A6ndeland?= 

commit aded024a12b32fc1ed9a80639681daae2d07ec25 upstream.

Don't access uninitialized work-queue when removing device.
The work queue is initialized only if the device multi-queue.
So don't call cancel_work unless this is a multi-queue device.

This fixes the following panic:

Kernel panic - not syncing: BUG!
Call Trace:
62031b28:  [<6026085d>] panic+0x16b/0x2d3
62031b30:  [<6004ef5e>] flush_work+0x0/0x1d7
62031b60:  [<602606f2>] panic+0x0/0x2d3
62031b68:  [<600333b0>] memcpy+0x0/0x140
62031b80:  [<6002d58a>] unblock_signals+0x0/0x84
62031ba0:  [<602609c5>] printk+0x0/0xa0
62031bd8:  [<60264e51>] __mutex_unlock_slowpath+0x13d/0x148
62031c10:  [<6004ef5e>] flush_work+0x0/0x1d7
62031c18:  [<60050234>] try_to_grab_pending+0x0/0x17e
62031c38:  [<6004e984>] get_work_gcwq+0x71/0x8f
62031c48:  [<60050539>] __cancel_work_timer+0x5b/0x115
62031c78:  [<628acc85>] unplug_port+0x0/0x191 [virtio_console]
62031c98:  [<6005061c>] cancel_work_sync+0x12/0x14
62031ca8:  [<628ace96>] virtcons_remove+0x80/0x15c [virtio_console]
62031ce8:  [<628191de>] virtio_dev_remove+0x1e/0x7e [virtio]
62031d08:  [<601cf242>] __device_release_driver+0x75/0xe4
62031d28:  [<601cf2dd>] device_release_driver+0x2c/0x40
62031d48:  [<601ce0dd>] driver_unbind+0x7d/0xc6
62031d88:  [<601cd5d9>] drv_attr_store+0x27/0x29
62031d98:  [<60115f61>] sysfs_write_file+0x100/0x14d
62031df8:  [<600b737d>] vfs_write+0xcb/0x184
62031e08:  [<600b58b8>] filp_close+0x88/0x94
62031e38:  [<600b7686>] sys_write+0x59/0x88
62031e88:  [<6001ced1>] handle_syscall+0x5d/0x80
62031ea8:  [<60030a74>] userspace+0x405/0x531
62031f08:  [<600d32cc>] sys_dup+0x0/0x5e
62031f28:  [<601b11d6>] strcpy+0x0/0x18
62031f38:  [<600be46c>] do_execve+0x10/0x12
62031f48:  [<600184c7>] run_init_process+0x43/0x45
62031fd8:  [<60019a91>] new_thread_handler+0xba/0xbc

Signed-off-by: Sjur Brændeland 
Signed-off-by: Rusty Russell 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/char/virtio_console.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index cdf2f54..f77e341 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1808,7 +1808,8 @@ static void virtcons_remove(struct virtio_device *vdev)
/* Disable interrupts for vqs */
vdev->config->reset(vdev);
/* Finish up work that's lined up */
-   cancel_work_sync(>control_work);
+   if (use_multiport(portdev))
+   cancel_work_sync(>control_work);
 
list_for_each_entry_safe(port, port2, >ports, list)
unplug_port(port);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 37/81] drivers/rtc/rtc-isl1208.c: call rtc_update_irq() from the alarm irq handler

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Jan Luebbe 

commit 72fca4a4b32dc778b5b885c3498700e42b610d49 upstream.

Previously the alarm event was not propagated into the RTC subsystem.
By adding a call to rtc_update_irq, this fixes a timeout problem with
the hwclock utility.

Signed-off-by: Jan Luebbe 
Cc: Alessandro Zummo 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/rtc/rtc-isl1208.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index dd2aeee..8f8c8ae 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -494,6 +494,7 @@ isl1208_rtc_interrupt(int irq, void *data)
 {
unsigned long timeout = jiffies + msecs_to_jiffies(1000);
struct i2c_client *client = data;
+   struct rtc_device *rtc = i2c_get_clientdata(client);
int handled = 0, sr, err;
 
/*
@@ -516,6 +517,8 @@ isl1208_rtc_interrupt(int irq, void *data)
if (sr & ISL1208_REG_SR_ALM) {
dev_dbg(>dev, "alarm!\n");
 
+   rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF);
+
/* Clear the alarm */
sr &= ~ISL1208_REG_SR_ALM;
sr = i2c_smbus_write_byte_data(client, ISL1208_REG_SR, sr);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 33/81] USB: storage: optimize to match the Huawei USB storage devices and support new switch command

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: fangxiaozhi 

commit 200e0d994d9d1919b28c87f1a5fb99a8e13b8a0f upstream.

1. Optimize the match rules with new macro for Huawei USB storage devices,
   to avoid to load USB storage driver for the modem interface
   with Huawei devices.
2. Add to support new switch command for new Huawei USB dongles.

Signed-off-by: fangxiaozhi 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/storage/initializers.c |   76 -
 drivers/usb/storage/initializers.h |4 +-
 drivers/usb/storage/unusual_devs.h |  329 +---
 3 files changed, 78 insertions(+), 331 deletions(-)

diff --git a/drivers/usb/storage/initializers.c 
b/drivers/usb/storage/initializers.c
index 105d900..16b0bf0 100644
--- a/drivers/usb/storage/initializers.c
+++ b/drivers/usb/storage/initializers.c
@@ -92,8 +92,8 @@ int usb_stor_ucr61s2b_init(struct us_data *us)
return 0;
 }
 
-/* This places the HUAWEI E220 devices in multi-port mode */
-int usb_stor_huawei_e220_init(struct us_data *us)
+/* This places the HUAWEI usb dongles in multi-port mode */
+static int usb_stor_huawei_feature_init(struct us_data *us)
 {
int result;
 
@@ -104,3 +104,75 @@ int usb_stor_huawei_e220_init(struct us_data *us)
US_DEBUGP("Huawei mode set result is %d\n", result);
return 0;
 }
+
+/*
+ * It will send a scsi switch command called rewind' to huawei dongle.
+ * When the dongle receives this command at the first time,
+ * it will reboot immediately. After rebooted, it will ignore this command.
+ * So it is  unnecessary to read its response.
+ */
+static int usb_stor_huawei_scsi_init(struct us_data *us)
+{
+   int result = 0;
+   int act_len = 0;
+   struct bulk_cb_wrap *bcbw = (struct bulk_cb_wrap *) us->iobuf;
+   char rewind_cmd[] = {0x11, 0x06, 0x20, 0x00, 0x00, 0x01, 0x01, 0x00,
+   0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+   bcbw->Signature = cpu_to_le32(US_BULK_CB_SIGN);
+   bcbw->Tag = 0;
+   bcbw->DataTransferLength = 0;
+   bcbw->Flags = bcbw->Lun = 0;
+   bcbw->Length = sizeof(rewind_cmd);
+   memset(bcbw->CDB, 0, sizeof(bcbw->CDB));
+   memcpy(bcbw->CDB, rewind_cmd, sizeof(rewind_cmd));
+
+   result = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, bcbw,
+   US_BULK_CB_WRAP_LEN, _len);
+   US_DEBUGP("transfer actual length=%d, result=%d\n", act_len, result);
+   return result;
+}
+
+/*
+ * It tries to find the supported Huawei USB dongles.
+ * In Huawei, they assign the following product IDs
+ * for all of their mobile broadband dongles,
+ * including the new dongles in the future.
+ * So if the product ID is not included in this list,
+ * it means it is not Huawei's mobile broadband dongles.
+ */
+static int usb_stor_huawei_dongles_pid(struct us_data *us)
+{
+   struct usb_interface_descriptor *idesc;
+   int idProduct;
+
+   idesc = >pusb_intf->cur_altsetting->desc;
+   idProduct = us->pusb_dev->descriptor.idProduct;
+   /* The first port is CDROM,
+* means the dongle in the single port mode,
+* and a switch command is required to be sent. */
+   if (idesc && idesc->bInterfaceNumber == 0) {
+   if ((idProduct == 0x1001)
+   || (idProduct == 0x1003)
+   || (idProduct == 0x1004)
+   || (idProduct >= 0x1401 && idProduct <= 0x1500)
+   || (idProduct >= 0x1505 && idProduct <= 0x1600)
+   || (idProduct >= 0x1c02 && idProduct <= 0x2202)) {
+   return 1;
+   }
+   }
+   return 0;
+}
+
+int usb_stor_huawei_init(struct us_data *us)
+{
+   int result = 0;
+
+   if (usb_stor_huawei_dongles_pid(us)) {
+   if (us->pusb_dev->descriptor.idProduct >= 0x1446)
+   result = usb_stor_huawei_scsi_init(us);
+   else
+   result = usb_stor_huawei_feature_init(us);
+   }
+   return result;
+}
diff --git a/drivers/usb/storage/initializers.h 
b/drivers/usb/storage/initializers.h
index 529327f..5376d4f 100644
--- a/drivers/usb/storage/initializers.h
+++ b/drivers/usb/storage/initializers.h
@@ -46,5 +46,5 @@ int usb_stor_euscsi_init(struct us_data *us);
  * flash reader */
 int usb_stor_ucr61s2b_init(struct us_data *us);
 
-/* This places the HUAWEI E220 devices in multi-port mode */
-int usb_stor_huawei_e220_init(struct us_data *us);
+/* This places the HUAWEI usb dongles in multi-port mode */
+int usb_stor_huawei_init(struct us_data *us);
diff --git a/drivers/usb/storage/unusual_devs.h 
b/drivers/usb/storage/unusual_devs.h
index dd2c64f..fff5d10 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1515,335 +1515,10 @@ UNUSUAL_DEV(  0x1210,

[PATCH 35/81] nilfs2: fix fix very long mount time issue

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Vyacheslav Dubeyko 

commit a9bae189542e71f91e61a4428adf6e5a7dfe8063 upstream.

There exists a situation when GC can work in background alone without
any other filesystem activity during significant time.

The nilfs_clean_segments() method calls nilfs_segctor_construct() that
updates superblocks in the case of NILFS_SC_SUPER_ROOT and
THE_NILFS_DISCONTINUED flags are set.  But when GC is working alone the
nilfs_clean_segments() is called with unset THE_NILFS_DISCONTINUED flag.
As a result, the update of superblocks doesn't occurred all this time
and in the case of SPOR superblocks keep very old values of last super
root placement.

SYMPTOMS:

Trying to mount a NILFS2 volume after SPOR in such environment ends with
very long mounting time (it can achieve about several hours in some
cases).

REPRODUCING PATH:

1. It needs to use external USB HDD, disable automount and doesn't
   make any additional filesystem activity on the NILFS2 volume.

2. Generate temporary file with size about 100 - 500 GB (for example,
   dd if=/dev/zero of= bs=1073741824 count=200).  The size of
   file defines duration of GC working.

3. Then it needs to delete file.

4. Start GC manually by means of command "nilfs-clean -p 0".  When you
   start GC by means of such way then, at the end, superblocks is updated
   by once.  So, for simulation of SPOR, it needs to wait sometime (15 -
   40 minutes) and simply switch off USB HDD manually.

5. Switch on USB HDD again and try to mount NILFS2 volume.  As a
   result, NILFS2 volume will mount during very long time.

REPRODUCIBILITY: 100%

FIX:

This patch adds checking that superblocks need to update and set
THE_NILFS_DISCONTINUED flag before nilfs_clean_segments() call.

Reported-by: Sergey Alexandrov 
Signed-off-by: Vyacheslav Dubeyko 
Tested-by: Vyacheslav Dubeyko 
Acked-by: Ryusuke Konishi 
Tested-by: Ryusuke Konishi 
Signed-off-by: Andrew Morton 
Signed-off-by: Linus Torvalds 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 fs/nilfs2/ioctl.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 0b6387c..29990c9 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -666,8 +666,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, 
struct file *filp,
if (ret < 0)
printk(KERN_ERR "NILFS: GC failed during preparation: "
"cannot read source blocks: err=%d\n", ret);
-   else
+   else {
+   if (nilfs_sb_need_update(nilfs))
+   set_nilfs_discontinued(nilfs);
ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
+   }
 
nilfs_remove_all_gcinodes(nilfs);
clear_nilfs_gc_running(nilfs);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 31/81] USB: ftdi_sio: add Zolix FTDI PID

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: =?UTF-8?q?Petr=20Kub=C3=A1nek?= 

commit 0ba3b2ccc72b3df5c305d61f59d93ab0f0e87991 upstream.

Add support for Zolix Omni 1509 monochromator custom USB-RS232 converter.

Signed-off-by: Petr Kubánek 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/serial/ftdi_sio.c |1 +
 drivers/usb/serial/ftdi_sio_ids.h |5 +
 2 files changed, 6 insertions(+)

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 031b23c..4c1fa24 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -677,6 +677,7 @@ static struct usb_device_id id_table_combined [] = {
{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_5_PID) },
{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_6_PID) },
{ USB_DEVICE(FTDI_VID, XSENS_CONVERTER_7_PID) },
+   { USB_DEVICE(FTDI_VID, FTDI_OMNI1509) },
{ USB_DEVICE(MOBILITY_VID, MOBILITY_USB_SERIAL_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_ACTIVE_ROBOTS_PID) },
{ USB_DEVICE(FTDI_VID, FTDI_MHAM_KW_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h 
b/drivers/usb/serial/ftdi_sio_ids.h
index 7c89cfc..9d359e18 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -147,6 +147,11 @@
 #define XSENS_CONVERTER_6_PID  0xD38E
 #define XSENS_CONVERTER_7_PID  0xD38F
 
+/**
+ * Zolix (www.zolix.com.cb) product ids
+ */
+#define FTDI_OMNI1509  0xD491  /* Omni1509 embedded USB-serial 
*/
+
 /*
  * NDI (www.ndigital.com) product ids
  */
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/4] dcache: Don't take unncessary lock in d_count update

2013-02-19 Thread Waiman Long

The current code takes the dentry's d_lock lock whenever the d_count
reference count is being updated. In reality, nothing big really
happens until d_count goes to 0 in dput(). So it is not necessary to
take the lock if the reference count won't go to 0.

Without using a lock, multiple threads may update d_count
simultaneously.  Therefore, atomic instructions must be used to
ensure consistency except in shrink_dcache_for_umount*() where the
whole superblock is being dismounted and locking is not needed.

The worst case scenarios are:

1. d_lock taken in dput with d_count = 2 in one thread and another
   thread comes in to atomically decrement d_count without taking
   the lock. This may result in a d_count of 0 with no deleting
   action taken.

2. d_lock taken in dput with d_count = 1 in one thread and another
   thread comes in to atomically increment d_count without taking
   the lock. This may result in the dentry in the deleted state while
   having a d_count of 1.

Without taking a lock, we need to make sure the decrementing or
incrementing action should not be taken while other threads are
updating d_count simultaneously. This can be done by using the
atomic cmpxchg instruction which will fail if the underlying value
is changed.  If the lock is taken, it should be safe to use a simpler
atomic increment or decrement instruction.

To make sure that the above worst case scenerios will not happen,
the dget() function must take the lock if d_count <= 1. Similarly,
the dput() function must take the lock if d_count <= 2. The cmpxchg()
call to update d_count will be tried twice before falling back to
using the lock as there is a fairly good chance that the cmpxchg()
may fail in a busy situation.

Finally, the CPU must have an instructional level cmpxchg instruction
or the emulated cmpxchg() function may be too expensive to
use. Therefore, the above mentioned changes will only be applied if
the __HAVE_ARCH_CMPXCHG flag is set. Most of the major architectures
supported by Linux have this flag set with the notation exception
of ARM.

As for the performance of the updated reference counting code, it
all depends on whether the cmpxchg instruction is used or not. The
original code has 2 atomic instructions to lock and unlock the
spinlock. The new code path has either 1 atomic cmpxchg instruction
or 3 atomic instructions if the lock has to be taken. Depending on
how frequent the cmpxchg instruction is used (d_count > 1 or 2),
the new code can be faster or slower than the original one.

Signed-off-by: Waiman Long 
---
 fs/dcache.c|   23 ++
 fs/namei.c |2 +-
 include/linux/dcache.h |  105 ++-
 3 files changed, 117 insertions(+), 13 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 19153a0..20cc789 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -484,7 +484,7 @@ relock:
}
 
if (ref)
-   dentry->d_count--;
+   dcount_dec(dentry);
/*
 * if dentry was on the d_lru list delete it from there.
 * inform the fs via d_prune that this dentry is about to be
@@ -530,10 +530,13 @@ void dput(struct dentry *dentry)
 repeat:
if (dentry->d_count == 1)
might_sleep();
+   if (dcount_dec_cmpxchg(dentry))
+   return;
+
spin_lock(>d_lock);
BUG_ON(!dentry->d_count);
if (dentry->d_count > 1) {
-   dentry->d_count--;
+   dcount_dec(dentry);
spin_unlock(>d_lock);
return;
}
@@ -550,7 +553,7 @@ repeat:
dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
 
-   dentry->d_count--;
+   dcount_dec(dentry);
spin_unlock(>d_lock);
return;
 
@@ -621,11 +624,13 @@ EXPORT_SYMBOL(d_invalidate);
 /* This must be called with d_lock held */
 static inline void __dget_dlock(struct dentry *dentry)
 {
-   dentry->d_count++;
+   dcount_inc(dentry);
 }
 
 static inline void __dget(struct dentry *dentry)
 {
+   if (dcount_inc_cmpxchg(dentry))
+   return;
spin_lock(>d_lock);
__dget_dlock(dentry);
spin_unlock(>d_lock);
@@ -650,7 +655,7 @@ repeat:
}
rcu_read_unlock();
BUG_ON(!ret->d_count);
-   ret->d_count++;
+   dcount_inc(ret);
spin_unlock(>d_lock);
return ret;
 }
@@ -782,7 +787,7 @@ static void try_prune_one_dentry(struct dentry *dentry)
while (dentry) {
spin_lock(>d_lock);
if (dentry->d_count > 1) {
-   dentry->d_count--;
+   dcount_dec(dentry);
spin_unlock(>d_lock);
return;
}
@@ -1980,7 +1985,7 @@ struct dentry *__d_lookup(struct dentry *parent, struct 
qstr *name)
goto next;
}
 
-   dentry->d_count++;
+   dcount_inc(dentry);

[PATCH 0/4] dcache: make Oracle more scalable on large systems

2013-02-19 Thread Waiman Long

It was found that the Oracle database software issues a lot of call
to the seq_path() kernel function which translates a (dentry, mnt)
pair to an absolute path. The seq_path() function will eventually
take the following two locks:

1. dentry->d_lock (spinlock) from dget()/dput()
2. rename_lock(seqlock)  from d_path()

With a lot of database activities, the spinning of the 2 locks takes
a major portion of the kernel time and slow down the database software.

This set of patches were designed to minimize the locking overhead of
this code path and improve Oracle performance on systems with a large
number of CPUs.

The current kernel takes the dentry->d_lock lock whenever it wants to
increment or decrement the d_count reference count. However, nothing
big will really happen until the reference count goes all the way to 1
or 0.  Actually, we don't need to take the lock when reference count
is bigger than 1. Instead, atomic cmpxchg() function can be used to
increment or decrement the count in these situations. For safety,
other reference count update operations have to be changed to use
atomic instruction as well.

The rename_lock is a sequence lock. The d_path() function takes the
writer lock because it needs to traverse different dentries through
pointers to get the full path name. Hence it can't tolerate changes
in those pointers. But taking the writer lock also prevent multiple
d_path() calls to proceed concurrently.

A solution is to introduce a new lock type where there will be a
second type of reader which can block the writers - the sequence
read/write lock (seqrwlock). The d_path() and related functions will
then be changed to take the reader lock instead of the writer lock.
This will allow multiple d_path() operations to proceed concurrently.

Performance testing was done using the Oracle SLOB benchmark with the
latest 11.2.0.3 release of Oracle on a 3.8-rc3 kernel. Database files
were put in a tmpfs partition to minimize physical I/O overhead. Huge
pages were used with 30GB of SGA. The test machine was an 8-socket,
80-core HP Proliant DL980 with 1TB of memory and hyperthreading off.
The tests were run 5 times and the averages were taken.

The patch only has a slight positive impact on logical read
performance. The impact on write (redo size) performance, however,
is much greater. The redo size is a proxy of how much database write
has happened. So a larger value means a higher transaction rate.

+-+-+-++--+
| Readers | Writers | Redo Size   | Redo Size  | % Change |
| | | w/o patch   | with patch |  |
| | |   (MB/s)|   (MB/s)   |  |
+-+-+-++--+
|8|   64|802  |903 |  12.6%   |
|   32|   64|798  |892 |  11.8%   |
|   80|   64|658  |714 |   8.5%   |
|  128|   64|748  |907 |  21.3%   |
+-+-+-++--+

The table below shows the %system and %user times reported by Oracle's
AWR tool as well as the %time spent in the spinlocking code in kernel
with (inside parenthesis) and without (outside parenthesis) the patch.

+-+-++++
| Readers | Writers |  % System  |   % User   | % spinlock |
+-+-++++
|   32|0|  0.3(0.3)  | 39.0(39.0) |  6.3(17.4) |
|   80|0|  0.7(0.7)  | 97.4(94.2) |  2.9(31.7) |
|  128|0|  1.4(1.4)  | 34.4(32.2) | 43.5(62.2) |
|   32|   64|  3.8(3.5)  | 55.4(53.6) |  9.1(35.0) |
|   80|   64|  3.0(2.9)  | 94.4(93.9) |  4.5(38.8) |
|  128|   64|  4.7(4.3)  | 38.2(40.3) | 34.8(58.7) |
+-+-++++

The following tests with multiple threads were also run on kernels with
and without the patch on both DL980 and a PC with 4-core i5 processor:

1. find $HOME -size 0b
2. cat /proc/*/maps /proc/*/numa_maps
3. git diff

For both the find-size and cat-maps tests, the performance difference
with hot cache was within a few percentage points and hence within
the margin of error. Single-thread performance was slightly worse,
but multithread performance was generally a bit better. Apparently,
reference count update isn't a significant factor in those tests. Their
perf traces indicates that there was less spinlock content in
functions like dput(), but the function itself ran a little bit longer
on average.

The git-diff test showed no difference in performance. There is a
slight increase in system time compensated by a slight decrease in
user time.

Signed-off-by: Waiman Long 

Waiman Long (4):
  dcache: Don't take unncessary lock in d_count update
  dcache: introduce a new sequence read/write lock type
  dcache: change rename_lock to a sequence read/write lock
  dcache: don't need to take d_lock in prepend_path()

Re: [PATCH] ACPI, PCI: Get PRT entry during acpi_pci_enable_irq()

2013-02-19 Thread Bjorn Helgaas

On Fri, Feb 15, 2013 at 6:37 PM, Yinghai Lu  wrote:
> On Fri, Feb 15, 2013 at 5:26 PM, Yinghai Lu  wrote:
>> On Fri, Feb 15, 2013 at 4:39 PM, Bjorn Helgaas  wrote:
>>> On Thu, Feb 14, 2013 at 5:50 PM, Yinghai Lu  wrote:
 On Tue, Feb 12, 2013 at 12:22 PM, Rafael J. Wysocki  wrote:
> On Tuesday, February 12, 2013 11:11:23 AM Yinghai Lu wrote:
>> Peter Hurley found "irq 18 nobody cared" with pci-next, and dmesg has
>>
>> [8.983246] pci :00:1e.0: can't derive routing for PCI INT A
>> [8.983600] snd_ctxfi :09:02.0: PCI INT A: no GSI - using ISA IRQ 
>> 5
>>
>> bisect to
>> | commit 4f535093cf8f6da8cfda7c36c2c1ecd2e9586ee4
>> | PCI: Put pci_dev in device tree as early as possible
>>
>> It turns out we need to call acpi_pci_irq_add_prt() after the pci bridges
>> are scanned.
>>
>> Bjorn said:
>>  The bus number binding means acpi_pci_irq_add_prt() has to happen
>>  after enumerating everything below a bridge, and it will prevent us
>>  from doing any bus number reassignment for hotplug.
>>
>>  I think we should remove the bus numbers from the cached _PRT (or
>>  maybe even remove the _PRT caching completely).  When we enable a PCI
>>  device's IRQ, we should search up the PCI device tree looking for a
>>  _PRT associated with each node, and applying normal PCI bridge
>>  swizzling when we don't find a _PRT.  I think this can be done without
>>  using PCI bus numbers at all.
>>
>> So here we try to remove _PRT caching completely.
>>
>> -v2: check !handle early.
>>
>> Reported-and-tested-by: Peter Hurley 
>> Suggested-by: Bjorn Helgaas 
>> Signed-off-by: Yinghai Lu 
>
> Acked-by: Rafael J. Wysocki 
>
>> ---
>>  drivers/acpi/pci_irq.c  |   95 
>> +---
>>  drivers/acpi/pci_root.c |   18 
>>  drivers/pci/pci-acpi.c  |   24 ---
>>  include/acpi/acpi_drivers.h |5 --
>>  4 files changed, 38 insertions(+), 104 deletions(-)

 Bjorn,

 Can you put this one into pci/next?
>>>
>>> I'm not sure what this patch is based on or what the best way to merge
>>> it is.  It doesn't apply cleanly to my next or
>>> pci/yinghai-root-bus-hotplug branches.
>>
>> My fault, that is based on pci/next + pm/linux-next
>>
>> linux-next removed
>> acpi_power_resource_(un)register_device ...
>>
>>>
>>> I did apply it manually on top of pci/yinghai-root-bus-hotplug to try
>>> it out, but we need to tweak the messages a little bit.
>>>
>>> Previously we printed "ACPI: PCI Interrupt Routing Table [%s._PRT]"
>>> once when loading it, which was fine.  Now we print it every time we
>>> look at a _PRT, which is too much because it isn't really adding any
>>> information.
>>>
>>> We also print "ACPI Exception: AE_NOT_FOUND, Evaluating _PRT
>>> [AE_NOT_FOUND] (20121018/pci_irq-259)" if we find ACPI nodes without
>>> _PRTs, which we shouldn't do, because that's a common and normal
>>> situation.
>>
>> Sure. Can you have separated patch to do that ?
>>
>> Or want me to resend the patch.
>
> Please check attached updated version that remove print out ...
>
> and it could be applied cleanly on top of pci/yinghai-root-bus-hotplug

Thanks, I applied this to pci/yinghai-root-bus-hotplug and merged it
into my next branch.

Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 25/81] drm/radeon: prevent crash in the ring space allocation

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Alex Deucher 

commit fd5d93a0015ce1a7db881382022b2fcdfdc61760 upstream.

If the requested number of DWs on the ring is larger than
the size of the ring itself, return an error.

In testing with large VM updates, we've seen crashes when we
try and allocate more space on the ring than the total size
of the ring without checking.

This prevents the crash but for large VM updates or bo moves
of very large buffers, we will need to break the transaction
down into multiple batches.  I have patches to use IBs for
the next kernel.

Signed-off-by: Alex Deucher 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/gpu/drm/radeon/radeon_ring.c |3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index 7843b36..727ebfe 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -219,6 +219,9 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct 
radeon_ring *ring, unsi
 {
int r;
 
+   /* make sure we aren't trying to allocate more space than there is on 
the ring */
+   if (ndw > (ring->ring_size / 4))
+   return -ENOMEM;
/* Align requested size with padding so unlock_commit can
 * pad safely */
ndw = (ndw + ring->align_mask) & ~ring->align_mask;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 20/81] USB: EHCI: fix bug in scheduling periodic split transfers

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Alan Stern 

commit 3e619d04159be54b3daa0b7036b0ce9e067f4b5d upstream.

This patch (as1654) fixes a very old bug in ehci-hcd, connected with
scheduling of periodic split transfers.  The calculations for
full/low-speed bus usage are all carried out after the correction for
bit-stuffing has been applied, but the values in the max_tt_usecs
array assume it hasn't been.  The array should allow for allocation of
up to 90% of the bus capacity, which is 900 us, not 780 us.

The symptom caused by this bug is that any isochronous transfer to a
full-speed device with a maxpacket size larger than about 980 bytes is
always rejected with a -ENOSPC error.

Signed-off-by: Alan Stern 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/host/ehci-sched.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index 33182c6..ccc9f70 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -236,7 +236,7 @@ static inline unsigned char tt_start_uframe(struct ehci_hcd 
*ehci, __hc32 mask)
 }
 
 static const unsigned char
-max_tt_usecs[] = { 125, 125, 125, 125, 125, 125, 30, 0 };
+max_tt_usecs[] = { 125, 125, 125, 125, 125, 125, 125, 25 };
 
 /* carryover low/fullspeed bandwidth that crosses uframe boundries */
 static inline void carryover_tt_bandwidth(unsigned short tt_usecs[8])
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 18/81] drm/radeon: protect against div by 0 in backend setup

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Mikko Tiihonen 

commit f689e3acbd2e48cc4101e0af454193f81af4baaf upstream.

Make sure at least one RB is enabled in
r6xx_remap_render_backend() to avoid an division by
zero in some corner cases.

See:
https://bugzilla.redhat.com/show_bug.cgi?id=892233

Signed-off-by: Alex Deucher 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/gpu/drm/radeon/r600.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index bff6272..0c7476d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1383,12 +1383,15 @@ u32 r6xx_remap_render_backend(struct radeon_device 
*rdev,
  u32 disabled_rb_mask)
 {
u32 rendering_pipe_num, rb_num_width, req_rb_num;
-   u32 pipe_rb_ratio, pipe_rb_remain;
+   u32 pipe_rb_ratio, pipe_rb_remain, tmp;
u32 data = 0, mask = 1 << (max_rb_num - 1);
unsigned i, j;
 
/* mask out the RBs that don't exist on that asic */
-   disabled_rb_mask |= (0xff << max_rb_num) & 0xff;
+   tmp = disabled_rb_mask | ((0xff << max_rb_num) & 0xff);
+   /* make sure at least one RB is available */
+   if ((tmp & 0xff) != 0xff)
+   disabled_rb_mask = tmp;
 
rendering_pipe_num = 1 << tiling_pipe_num;
req_rb_num = total_max_rb_num - r600_count_pipe_bits(disabled_rb_mask);
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v7 00/12] Tegra114 clockframework

2013-02-19 Thread Mike Turquette

Quoting Stephen Warren (2013-02-19 10:39:35)
> On 02/15/2013 05:36 AM, Peter De Schrijver wrote:
> > This is the seventh version of the Tegra114 clockframework. It is based on 
> > the
> > for-next branch of
> > git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git and
> > http://www.spinics.net/lists/arm-kernel/msg220452.html.
> 
> Mike,
> 
> I think it'd be prudent to take this series through the Tegra tree again
> for 3.10; patch 11 needs to go in as part of the series due to internal
> dependencies, and I'm slightly worried that other Tegra DT changes might
> conflict with that patch (if only for context). Taking this series (and
> any other Tegra clk driver changes for 3.10) through the Tegra tree
> could resolve that easily. Do you have a problem with that?
> 

I was thinking the same thing.  I'll make time to review these now that
3.8 is out.

> If that's OK, can you stage the dependency Peter mentioned:
> http://www.spinics.net/lists/arm-kernel/msg220452.html
> into a stable branch for me to merge, obviously after 3.9-rc1 is out.
> 

I'll take another look at that one.  I haven't merged it yet for some
reason but I've forgotten why.  Assuming nothing is wrong then I'll pull
it in towards 3.10.

Regards,
Mike

> Thanks.
> 
> Prashant, could you provide a review/ack for this series too.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 14/81] drm/radeon: add quirk for RV100 board

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Alex Deucher 

commit 9200ee4941a6e5d1ec5df88982243686882dff3f upstream.

vbios says external TMDS while the board is actually
internal TMDS.

fixes:
https://bugs.freedesktop.org/show_bug.cgi?id=60037

Signed-off-by: Alex Deucher 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/gpu/drm/radeon/radeon_combios.c |8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_combios.c 
b/drivers/gpu/drm/radeon/radeon_combios.c
index 558e5c0..7139341 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -2455,6 +2455,14 @@ bool radeon_get_legacy_connector_info_from_bios(struct 
drm_device *dev)
   1),
  
ATOM_DEVICE_CRT1_SUPPORT);
}
+   /* RV100 board with external TDMS bit mis-set.
+* Actually uses internal TMDS, clear the bit.
+*/
+   if (dev->pdev->device == 0x5159 &&
+   dev->pdev->subsystem_vendor == 0x1014 &&
+   dev->pdev->subsystem_device == 0x029A) {
+   tmp &= ~(1 << 4);
+   }
if ((tmp >> 4) & 0x1) {
devices |= ATOM_DEVICE_DFP2_SUPPORT;
radeon_add_legacy_encoder(dev,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 08/81] USB: EHCI: fix timer bug affecting port resume

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Alan Stern 

commit ee74290b7853db9d5fd64db70e5c175241c59fba upstream.

This patch (as1652) fixes a long-standing bug in ehci-hcd.  The driver
relies on status polls to know when to stop port-resume signalling.
It uses the root-hub status timer to schedule these status polls.  But
when the driver for the root hub is resumed, the timer is rescheduled
to go off immediately -- before the port is ready.  When this happens
the timer does not get re-enabled, which prevents the port resume from
finishing until some other event occurs.

The symptom is that when a new device is plugged in, it doesn't get
recognized or enumerated until lsusb is run or something else happens.

The solution is to re-enable the root-hub status timer after every
status poll while a port resume is in progress.

This bug hasn't surfaced before now because we never used to try to
suspend the root hub in the middle of a port resume (except by
coincidence).

Signed-off-by: Alan Stern 
Reported-and-tested-by: Norbert Preining 
Tested-by: Ming Lei 
Signed-off-by: Greg Kroah-Hartman 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/host/ehci-hub.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index fc9e7cc..349d3fe 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -613,7 +613,11 @@ ehci_hub_status_data (struct usb_hcd *hcd, char *buf)
status = STS_PCD;
}
}
-   /* FIXME autosuspend idle root hubs */
+
+   /* If a resume is in progress, make sure it can finish */
+   if (ehci->resuming_ports)
+   mod_timer(>rh_timer, jiffies + msecs_to_jiffies(25));
+
spin_unlock_irqrestore (>lock, flags);
return status ? retval : 0;
 }
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 04/81] USB: XHCI: fix memory leak of URB-private data

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Alan Stern 

commit 48c3375c5f69b1c2ef3d1051a0009cb9bce0ce24 upstream.

This patch (as1640) fixes a memory leak in xhci-hcd.  The urb_priv
data structure isn't always deallocated in the handle_tx_event()
routine for non-control transfers.  The patch adds a kfree() call so
that all paths end up freeing the memory properly.

This patch should be backported to kernels as old as 2.6.36, that
contain the commit 8e51adccd4c4b9ffcd509d7f2afce0a906139f75 "USB: xHCI:
Introduce urb_priv structure"

Signed-off-by: Alan Stern 
Signed-off-by: Sarah Sharp 
Reported-and-tested-by: Martin Mokrejs 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/host/xhci-ring.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 5530c76..6f3043f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2587,6 +2587,8 @@ cleanup:
(trb_comp_code != COMP_STALL &&
trb_comp_code != COMP_BABBLE))
xhci_urb_free_priv(xhci, urb_priv);
+   else
+   kfree(urb_priv);
 
usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), 
urb);
if ((urb->actual_length != urb->transfer_buffer_length 
&&
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 06/81] usb: Using correct way to clear usb3.0 device's remote wakeup feature.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Lan Tianyu 

commit 54a3ac0c9e5b7213daa358ce74d154352657353a upstream.

Usb3.0 device defines function remote wakeup which is only for interface
recipient rather than device recipient. This is different with usb2.0 device's
remote wakeup feature which is defined for device recipient. According usb3.0
spec 9.4.5, the function remote wakeup can be modified by the SetFeature()
requests using the FUNCTION_SUSPEND feature selector. This patch is to use
correct way to disable usb3.0 device's function remote wakeup after suspend
error and resuming.

This should be backported to kernels as old as 3.4, that contain the
commit 623bef9e03a60adc623b09673297ca7a1cdfb367 "USB/xhci: Enable remote
wakeup for USB3 devices."

Signed-off-by: Lan Tianyu 
Signed-off-by: Sarah Sharp 
[ herton: include/uapi/linux/usb/ch9.h -> include/linux/usb/ch9.h,
  adjust context ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/core/hub.c  |   70 +++
 include/linux/usb/ch9.h |6 
 2 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 92c0229..c9590c6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2685,6 +2685,23 @@ static int check_port_resume_type(struct usb_device 
*udev,
 }
 
 #ifdef CONFIG_USB_SUSPEND
+/*
+ * usb_disable_function_remotewakeup - disable usb3.0
+ * device's function remote wakeup
+ * @udev: target device
+ *
+ * Assume there's only one function on the USB 3.0
+ * device and disable remote wake for the first
+ * interface. FIXME if the interface association
+ * descriptor shows there's more than one function.
+ */
+static int usb_disable_function_remotewakeup(struct usb_device *udev)
+{
+   return usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
+   USB_REQ_CLEAR_FEATURE, USB_RECIP_INTERFACE,
+   USB_INTRF_FUNC_SUSPEND, 0, NULL, 0,
+   USB_CTRL_SET_TIMEOUT);
+}
 
 /*
  * usb_port_suspend - suspend a usb device's upstream port
@@ -2797,12 +2814,19 @@ int usb_port_suspend(struct usb_device *udev, 
pm_message_t msg)
dev_dbg(hub->intfdev, "can't suspend port %d, status %d\n",
port1, status);
/* paranoia:  "should not happen" */
-   if (udev->do_remote_wakeup)
-   (void) usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
-   USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE,
-   USB_DEVICE_REMOTE_WAKEUP, 0,
-   NULL, 0,
-   USB_CTRL_SET_TIMEOUT);
+   if (udev->do_remote_wakeup) {
+   if (!hub_is_superspeed(hub->hdev)) {
+   (void) usb_control_msg(udev,
+   usb_sndctrlpipe(udev, 0),
+   USB_REQ_CLEAR_FEATURE,
+   USB_RECIP_DEVICE,
+   USB_DEVICE_REMOTE_WAKEUP, 0,
+   NULL, 0,
+   USB_CTRL_SET_TIMEOUT);
+   } else
+   (void) usb_disable_function_remotewakeup(udev);
+
+   }
 
/* Try to enable USB2 hardware LPM again */
if (udev->usb2_hw_lpm_capable == 1)
@@ -2892,20 +2916,30 @@ static int finish_port_resume(struct usb_device *udev)
 * udev->reset_resume
 */
} else if (udev->actconfig && !udev->reset_resume) {
-   le16_to_cpus();
-   if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) {
-   status = usb_control_msg(udev,
-   usb_sndctrlpipe(udev, 0),
-   USB_REQ_CLEAR_FEATURE,
+   if (!hub_is_superspeed(udev->parent)) {
+   le16_to_cpus();
+   if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP))
+   status = usb_control_msg(udev,
+   usb_sndctrlpipe(udev, 0),
+   USB_REQ_CLEAR_FEATURE,
USB_RECIP_DEVICE,
-   USB_DEVICE_REMOTE_WAKEUP, 0,
-   NULL, 0,
-   USB_CTRL_SET_TIMEOUT);
-   if (status)
-   dev_dbg(>dev,
-   "disable remote wakeup, status %d\n",
-   status);
+

[PATCH 02/81] xhci: Fix TD size for isochronous URBs.

2013-02-19 Thread Herton Ronaldo Krzesinski

3.5.7.6 -stable review patch.  If anyone has any objections, please let me know.

--

From: Sarah Sharp 

commit f18f8ed2a9adc41c2d9294b85b6af115829d2af1 upstream.

To calculate the TD size for a particular TRB in an isoc TD, we need
know the endpoint's max packet size.  Isochronous endpoints also encode
the number of additional service opportunities in their wMaxPacketSize
field.  The TD size calculation did not mask off those bits before using
the field.  This resulted in incorrect TD size information for
isochronous TRBs when an URB frame buffer crossed a 64KB boundary.

For example:
 - an isoc endpoint has 2 additional service opportunites and
   a max packet size of 1020 bytes
 - a frame transfer buffer contains 3060 bytes
 - one frame buffer crosses a 64KB boundary, and must be split into
   one 1276 byte TRB, and one 1784 byte TRB.

The TD size is is the number of packets that remain to be transferred
for a TD after processing all the max packet sized packets in the
current TRB and all previous TRBs.

For this TD, the number of packets to be transferred is (3060 / 1020),
or 3.  The first TRB contains 1276 bytes, which means it contains one
full packet, and a 256 byte remainder.  After processing all the max
packet-sized packets in the first TRB, the host will have 2 packets left
to transfer.

The old code would calculate the TD size for the first TRB as:

total packet count = DIV_ROUND_UP (TD length / endpoint wMaxPacketSize)
total packet count - (first TRB length / endpoint wMaxPacketSize)

The math should have been:

total packet count = DIV_ROUND_UP (3060 / 1020) = 3
3 - (1276 / 1020) = 2

Since the old code didn't mask off the additional service interval bits
from the wMaxPacketSize field, the math ended up as

total packet count = DIV_ROUND_UP (3060 / 5116) = 1
1 - (1276 / 5116) = 1

Fix this by masking off the number of additional service opportunities
in the wMaxPacketSize field.

This patch should be backported to stable kernels as old as 3.0, that
contain the commit 4da6e6f247a2601ab9f1e63424e4d944ed4124f3 "xhci 1.0:
Update TD size field format."  It may not apply well to kernels older
than 3.2 because of commit 29cc88979a8818cd8c5019426e945aed118b400e
"USB: use usb_endpoint_maxp() instead of le16_to_cpu()".

Signed-off-by: Sarah Sharp 
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/usb/host/xhci-ring.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 3bf13f8..3dc8e3f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3106,7 +3106,7 @@ static u32 xhci_v1_0_td_remainder(int running_total, int 
trb_buff_len,
 * running_total.
 */
packets_transferred = (running_total + trb_buff_len) /
-   usb_endpoint_maxp(>ep->desc);
+   GET_MAX_PACKET(usb_endpoint_maxp(>ep->desc));
 
if ((total_packet_count - packets_transferred) > 31)
return 31 << 17;
@@ -3640,7 +3640,8 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, 
gfp_t mem_flags,
td_len = urb->iso_frame_desc[i].length;
td_remain_len = td_len;
total_packet_count = DIV_ROUND_UP(td_len,
-   usb_endpoint_maxp(>ep->desc));
+   GET_MAX_PACKET(
+   usb_endpoint_maxp(>ep->desc)));
/* A zero-length transfer still involves at least one packet. */
if (total_packet_count == 0)
total_packet_count++;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH LINUX v4] xen: event channel arrays are xen_ulong_t and not unsigned long

2013-02-19 Thread Konrad Rzeszutek Wilk

On Tue, Feb 19, 2013 at 06:12:35PM +, Stefano Stabellini wrote:
> On Tue, 19 Feb 2013, Ian Campbell wrote:
> > On ARM we want these to be the same size on 32- and 64-bit.
> > 
> > This is an ABI change on ARM. X86 does not change.
> > 
> > Signed-off-by: Ian Campbell 
> > Cc: Jan Beulich 
> > Cc: Keir (Xen.org) 
> > Cc: Tim Deegan 
> > Cc: Stefano Stabellini 
> > Cc: linux-arm-ker...@lists.infradead.org
> > Cc: xen-de...@lists.xen.org
> > Cc: Konrad Rzeszutek Wilk 
> > ---
> > Changes since V3
> >   s/read_evtchn_pending_sel/xchg_xen_ulong/ in a comment.
> > Changes since V2
> >   Add comments about the correct bitops to use, and on the ordering/barrier
> >   requirements on xchg_xen_ulong.
> > Changes since V1
> >   use find_first_set not __ffs
> >   fix some more unsigned long -> xen_ulong_t
> >   use more generic xchg_xen_ulong instead of ...read_evtchn...
> 
> still doesn't apply to 3.8

Weird. It applied to my tree (stable/for-linus-3.9) without fuss.
> 
> 
> > do {
> > -   unsigned long pending_words;
> > +   xen_ulong_t pending_words;
> > 
> > vcpu_info->evtchn_upcall_pending = 0;
> > 
> > if (__this_cpu_inc_return(xed_nesting_count) - 1)
> > goto out;
> > 
> > -#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
> > -   /* Clear master flag /before/ clearing selector flag. */
> > -   wmb();
> > -#endif
> > -   pending_words = xchg(_info->evtchn_pending_sel, 0);
> > +   /*
> > +* Master flag must be /before/ clearing selector
> > +* flag. xchg_xen_ulong must contain an appropriate
> > +* barrier.
> > +*/
> 
> Master flag must be *cleared* ...
> 
> > +   pending_words = 
> > xchg_xen_ulong(_info->evtchn_pending_sel, 0);
> > 
> > start_word_idx = __this_cpu_read(current_word_idx);
> > start_bit_idx = __this_cpu_read(current_bit_idx);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] Input: psmouse - retry getid command in psmouse_probe()

2013-02-19 Thread Dmitry Torokhov

Hi Chung-yih,

On Mon, Feb 18, 2013 at 05:45:07PM +0800, Chung-Yih Wang (王崇懿) wrote:
> Yes, I could add CONFIG_MOUSE_PS2_SYNAPTICS for the change as we only
> need it for synaptics touchpad/touchpoint on lenovo's machines.
> 

I do not think it will solve anything as all distributions have
CONFIG_MOUSE_PS2_SYNAPTICS enabled.

Could you tell me what the response is to the initial GETID command that
you see on these laptops?

Thanks.

-- 
Dmitry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH linux-next v2] firewire: fw_device_init: 'minor' may be used uninitialized

2013-02-19 Thread Tejun Heo

On Tue, Feb 19, 2013 at 10:09:07AM +0100, Stefan Richter wrote:
> Date: Mon, 18 Feb 2013 14:24:36 -0700
> From: Tim Gardner 
> 
> 'firewire: convert to idr_alloc()' accidentally orphaned 'minor'.
> 
> drivers/firewire/core-device.c: In function ‘fw_device_init’:
> drivers/firewire/core-device.c:1029:24: warning: ‘minor’ may be used 
> uninitialized in this function [-Wuninitialized]
> 
> Signed-off-by: Tim Gardner 
> Signed-off-by: Stefan Richter 

Acked-by: Tejun Heo 

Sorry about that. :)

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Question about git branches, features, reverts, etc on subsystem maintainers tree?

2013-02-19 Thread Konrad Rzeszutek Wilk

Hey Linus,

I am hoping you can help out. I've a branch for 3.9 which has some
code that depends on the changes to the Xen hypervisor. The changes
to the Xen hypervisor are still in flux - aka they are not baked. The
code on the Linux side that uses this is marked with EXPERIMENTAL to
ward off novices.

To give you a 3.9 branch I am thinking to either:

 a). revert the merges I've for this new feature altogether and
 merge it later in v3.10 time-frame. They make about 50% off the
 code in this branch, so its big chunk of code movement.
 For 3.10 I could do a git revert of a revert and get everything
 in at once :-)

 b). create a new branch for you without the new features and
 just live with the shame of having the timestamp of patches
 being after the merge window.

 c). Rip out the Kconfig entry so there is not even an build option
 to build it. And then if the Xen hypervisor parts are bakend,
 add the Kconfig entry back and only deal with bug-fixes.
 A bit like adding #ifdef 0 .

The end result for a) and b) is the same - the amount of code that
would end up in the 'git diff --stat' is the same. It is just that
there are these abhorent git reverts in case a). The pedantic part
of me screams at the uncleanliness of a) option.

The b) is a bit like git rebase in spirit, except the only "rebase"
is that I've slimmed it down and not added new patches.

The c) is .. well, ignores the part of development where we might
need to re-engineer big parts of it (thought I doubt it, but you
never know). But those redevelopment parts can be part of v3.10.

Thoughts?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] cpuset changes for 3.9-rc1

2013-02-19 Thread Tejun Heo

Hello, Linus.

This is cpuset changes for 3.9-rc1.

* Synchornization has seen a lot of changes with focus on decoupling
  cpuset synchronization from cgroup internal locking.  After this
  change, there only remain a couple of mostly trivial dependencies on
  cgroup_lock outside cgroup core proper.  cgroup_lock is scheduled to
  be unexported in this devel cycle.  This will finally remove the
  fragile locking order around cgroup (cgroup locking wants to /
  should be one of the outermost but yet has been acquired from deep
  inside individual controllers).

* At this point, Li is most knowlegeable with cpuset and taking over
  the maintainership of cpuset.

This depends on cgroup_rightmost_descendant() implemented in
cgroup/for-3.9 and this pull request assumes that cgroup/for-3.9 is
already pulled in.

The changes are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git for-3.9-cpuset

for you to fetch changes up to d127027baf98dce3ca31bec18c2c0e048ceda7c4:

  cpuset: drop spurious retval assignment in proc_cpuset_show() (2013-01-15 
08:38:55 -0800)


Li Zefan (3):
  cpuset: update MAINTAINERS
  cpuset: fix RCU lockdep splat
  cpuset: drop spurious retval assignment in proc_cpuset_show()

Tejun Heo (15):
  cpuset: remove unused cpuset_unlock()
  cpuset: remove fast exit path from remove_tasks_in_empty_cpuset()
  cpuset: introduce ->css_on/offline()
  cpuset: introduce CS_ONLINE
  cpuset: introduce cpuset_for_each_child()
  cpuset: cleanup cpuset[_can]_attach()
  cpuset: reorganize CPU / memory hotplug handling
  cpuset: don't nest cgroup_mutex inside get_online_cpus()
  cpuset: drop async_rebuild_sched_domains()
  cpuset: make CPU / memory hotplug propagation asynchronous
  cpuset: pin down cpus and mems while a task is being attached
  cpuset: schedule hotplug propagation from cpuset_attach() if the cpuset 
is empty
  cpuset: replace cgroup_mutex locking with cpuset internal locking
  cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre()
  cpuset: remove cpuset->parent

 MAINTAINERS |   4 +-
 kernel/cpuset.c | 872 +++-
 2 files changed, 485 insertions(+), 391 deletions(-)

Thanks.

--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH RFC] video: Add Hyper-V Synthetic Video Frame Buffer Driver

2013-02-19 Thread Olaf Hering

On Tue, Feb 19, Haiyang Zhang wrote:

> The emulated video device is a separate device from the synthetic video.
> The synthetic driver can only take control of the synthetic video, but not
> the emulated video.

Please add this to the comment above.

> Actually, we already have a similar mechanism in ata/ata_piix.c to disable
> emulated IDE drive on Hyper-V, so it won't conflict with the synthetic drive.

I havent read the vesafb code, but I think it can kind of give up the
hardware, something ata_piix can not do.

Olaf
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v7 00/12] Tegra114 clockframework

2013-02-19 Thread Stephen Warren

On 02/15/2013 05:36 AM, Peter De Schrijver wrote:
> This is the seventh version of the Tegra114 clockframework. It is based on the
> for-next branch of
> git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git and
> http://www.spinics.net/lists/arm-kernel/msg220452.html.

Mike,

I think it'd be prudent to take this series through the Tegra tree again
for 3.10; patch 11 needs to go in as part of the series due to internal
dependencies, and I'm slightly worried that other Tegra DT changes might
conflict with that patch (if only for context). Taking this series (and
any other Tegra clk driver changes for 3.10) through the Tegra tree
could resolve that easily. Do you have a problem with that?

If that's OK, can you stage the dependency Peter mentioned:
http://www.spinics.net/lists/arm-kernel/msg220452.html
into a stable branch for me to merge, obviously after 3.9-rc1 is out.

Thanks.

Prashant, could you provide a review/ack for this series too.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

WARNING: at drivers/ata/libata-core.c:5049 ata_qc_issue+0x1c7/0x3a0()

2013-02-19 Thread Tommi Rantala

Hello,

Hit this WARNING once while fuzzing the kernel with trinity in a qemu
virtual machine as the root user.

Does this make any sense? I have occasionally seen some ATA related
troubles while fuzzing in a VM, but this warning is new to me.

[  490.717030] WARNING: at
/home/ttrantal/git/linux-2.6/drivers/ata/libata-core.c:5049
ata_qc_issue+0x1c7/0x3a0()
[  490.717030] Hardware name: Bochs
[  490.717030] Pid: 2548, comm: trinity-child6 Not tainted 3.8.0+ #87
[  490.717030] Call Trace:
[  490.717030]  [] warn_slowpath_common+0x86/0xb0
[  490.717030]  [] warn_slowpath_null+0x15/0x20
[  490.717030]  [] ata_qc_issue+0x1c7/0x3a0
[  490.717030]  [] ? ata_scsi_set_sense.constprop.13+0x30/0x30
[  490.717030]  [] ata_scsi_translate+0x120/0x190
[  490.717030]  [] ? ata_scsi_queuecmd+0x2e/0x2d0
[  490.717030]  [] ata_scsi_queuecmd+0x253/0x2d0
[  490.717030]  [] scsi_dispatch_cmd+0x161/0x230
[  490.717030]  [] scsi_request_fn+0x544/0x580
[  490.717030]  [] ? cfq_dispatch_requests+0x56/0xb30
[  490.717030]  [] ? __lock_is_held+0x5a/0x80
[  490.717030]  [] __blk_run_queue+0x32/0x40
[  490.717030]  [] __elv_add_request+0x10a/0x280
[  490.717030]  [] blk_execute_rq_nowait+0xb6/0xf0
[  490.717030]  [] ? __init_waitqueue_head+0x41/0x60
[  490.717030]  [] blk_execute_rq+0xa8/0x110
[  490.717030]  [] ? lock_release_non_nested+0xde/0x310
[  490.717030]  [] ? selinux_capable+0x34/0x50
[  490.717030]  [] ? security_capable+0x13/0x20
[  490.717030]  [] ? ns_capable+0x53/0x80
[  490.717030]  [] sg_scsi_ioctl+0x2b1/0x3a0
[  490.717030]  [] scsi_cmd_ioctl+0x412/0x4a0
[  490.717030]  [] ? __lock_acquire+0x957/0x1c20
[  490.717030]  [] ? kvm_clock_read+0x1f/0x30
[  490.717030]  [] bsg_ioctl+0x146/0x270
[  490.717030]  [] ? trace_hardirqs_off_caller+0x28/0xd0
[  490.717030]  [] ? trace_hardirqs_off+0xd/0x10
[  490.717030]  [] ? local_clock+0x4a/0x70
[  490.717030]  [] ? lock_release_holdtime+0x28/0x170
[  490.717030]  [] ? avc_has_perm_flags+0x1d0/0x2a0
[  490.717030]  [] ? avc_has_perm_flags+0x28/0x2a0
[  490.717030]  [] ? trace_hardirqs_off_caller+0x28/0xd0
[  490.717030]  [] ? trace_hardirqs_off+0xd/0x10
[  490.717030]  [] do_vfs_ioctl+0x532/0x580
[  490.717030]  [] ? file_has_perm+0x83/0xa0
[  490.717030]  [] sys_ioctl+0x5d/0xa0
[  490.717030]  [] ? trace_hardirqs_on_thunk+0x3a/0x3f
[  490.717030]  [] system_call_fastpath+0x16/0x1b
[  490.717030] ---[ end trace fce35d2b40bd0565 ]---
[  490.810874] ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x0
[  490.812538] ata1.00: failed command: READ DMA
[  490.813715] ata1.00: cmd c8/00:2c:00:01:00/00:00:00:00:00/e0 tag 0
[  490.813715]  res 50/01:00:b0:16:04/00:00:00:00:00/a0 Emask
0x40 (internal error)
[  490.817269] ata1.00: status: { DRDY }
[watchdog] 333615 iterations. [F:326712 S:6891]
[watchdog] kernel became tainted! Last seed was 71022097
[  491.266158] ata1.00: configured for MWDMA2
[  491.267358] ata1: EH complete
child 2548 exitting
child 2492 exitting
child 2500 exitting
[2351] Bailing main loop. Exit reason: kernel became tainted
[2350] Watchdog exiting

Ran 333617 syscalls. Successes: 6892  Failures: 326714

Tommi
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: False-positive RCU stall warnings on large systems...

2013-02-19 Thread Paul E. McKenney

On Wed, Feb 20, 2013 at 12:34:12AM +0800, Daniel J Blueman wrote:
> Hi Paul,
> 
> On some of our larger servers with many hundreds of cores and when
> under high duress, we can see scheduler RCU stall warnings [1], so
> find we have to increase the hardcoded RCU_STALL_RAT_DELAY up from 2
> and RCU_JIFFIES_TILL_FORCE_QS up from 3.
> 
> Is there a more sustainable way to account for this to avoid it
> being hard-coded, such as making it and dependent timeouts a
> fraction of CONFIG_RCU_CPU_STALL_TIMEOUT?
> 
> On the other hand, perhaps this is just caused by clock jitter (eg
> due to distance from a contended clock source)? So increasing these
> a bit may just be adequate in general...

Hmmm...  What version of the kernel are you running?

Thanx, Paul

> Many thanks,
>   Daniel
> 
> --- [1]
> 
> [ 3939.010085] INFO: rcu_sched detected stalls on CPUs/tasks: {}
> (detected by 1, t=29662 jiffies, g=3053, c=3052, q=598)
> [ 3939.020008] INFO: Stall ended before state dump start
> -- 
> Daniel J Blueman
> Principal Software Engineer, Numascale Asia
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: How to populate Battery information through ACPI tables

2013-02-19 Thread Westerberg, Mika

On Tue, Feb 19, 2013 at 08:22:01PM +0200, Pallala, Ramakrishna wrote:
> Mika, I want to populate this characterization data as device
> specific/custom data which could be anything And may not be entirely
> related to battery. Is this is possible?

Yes, for example you could have a custom ACPI method with your device which
then returns this information.

See for example chapter 10.2.2.1 from the ACPI spec. It describes _BIF
method that returns some battery data to the caller.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

WARNING: at drivers/pci/pci.c:1397 pci_disable_device

2013-02-19 Thread Jiri Slaby

Hi,

so I hit that one:
+   dev_WARN_ONCE(>dev, atomic_read(>enable_cnt) <= 0,
+ "disabling already-disabled device");

during suspend (to ram):
WARNING: at drivers/pci/pci.c:1397 pci_disable_device+0x90/0xa0()
Hardware name: To Be Filled By O.E.M.
Device e1000e
disabling already-disabled device
Modules linked in: dvb_usb_dib0700 dib0090 dib7000p dib7000m dib0070
dib8000 dib3000mc dibx000_common microcode
Pid: 31027, comm: kworker/u:35 Not tainted 3.8.0-rc7-next-20130218_64+ #1768
Call Trace:
 [] ? do_pci_disable_device+0x30/0x60
 [] warn_slowpath_common+0x7f/0xc0
 [] warn_slowpath_fmt+0x46/0x50
 [] pci_disable_device+0x90/0xa0
 [] __e1000_shutdown+0x262/0x8b0
 [] e1000_suspend+0x23/0x50
 [] ? wait_for_completion+0x31/0x100
 [] pci_pm_suspend+0x77/0x140
 [] ? __pm_runtime_barrier+0x1a/0x130
 [] ? pci_pm_poweroff+0xf0/0xf0
 [] dpm_run_callback+0x58/0x90
 [] __device_suspend+0xeb/0x280
 [] async_suspend+0x1f/0xa0
 [] async_run_entry_fn+0x3b/0x140
 [] process_one_work+0x174/0x410
 [] worker_thread+0x116/0x400
 [] ? busy_worker_rebind_fn+0xc0/0xc0
 [] kthread+0xc0/0xd0
 [] ? kthread_create_on_node+0x130/0x130
 [] ret_from_fork+0x7c/0xb0
 [] ? kthread_create_on_node+0x130/0x130
---[ end trace 6c5060f8b8fb9175 ]---
e1000e :00:19.0: System wakeup enabled by ACPI


-- 
js
suse labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: slab: odd BUG on kzalloc

2013-02-19 Thread Sasha Levin

On 02/19/2013 01:29 PM, Dave Jones wrote:
> On Tue, Feb 19, 2013 at 01:18:25PM -0500, Sasha Levin wrote:
> 
>  > >> [  169.930103] ---[ end trace 4d135f3def21b4bd ]---
>  > >>
>  > >> The code translates to the following in fs/pipe.c:alloc_pipe_info :
>  > >>
>  > >> pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
>  > >> if (pipe) {
>  > >> pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * 
> PIPE_DEF_BUFFERS, GFP_KERNEL); <=== this
>  > >> if (pipe->bufs) {
>  > >> init_waitqueue_head(>wait);
>  > 
>  > Looks like it's not specific to pipe(). I've also got this one now:
>  > 
>  > Since I've managed to reproduce it, I'll go ahead and add slub_debug and 
> see what it tells us.
> 
> I'm curious, did you recently upgrade gcc, or other parts of the toolchain ?
> This, and one of the other 'weird' bugs you reported recently have me 
> wondering
> if perhaps you're seeing a compiler bug.

It happened once on a kernel built on my gentoo box with is generally up to 
date,
but the other time the kernel was built on my mini-server running ubuntu, which
isn't updated that often.

So I don't think compiler trickery is involved.


Thanks,
Sasha

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] x86, kvm: Add MSR_AMD64_BU_CFG2 to the list of ignored MSRs

2013-02-19 Thread Borislav Petkov

From: Borislav Petkov 

The "x86, AMD: Enable WC+ memory type on family 10 processors" patch
currently in -tip added a workaround for AMD F10h CPUs which #GPs my
guest when booted in kvm. This is because it accesses MSR_AMD64_BU_CFG2
which is not currently ignored by kvm. Do that because this MSR is only
baremetal-relevant anyway. While at it, move the ignored MSRs at the
beginning of kvm_set_msr_common so that we exit then and there.

Acked-by: Gleb Natapov 
Cc: Boris Ostrovsky 
Cc: Andre Przywara 
Cc: Marcelo Tosatti 
Signed-off-by: Borislav Petkov 
---
 arch/x86/kvm/x86.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c243b81e3c74..37040079cd6b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1881,6 +1881,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
u64 data = msr_info->data;
 
switch (msr) {
+   case MSR_AMD64_NB_CFG:
+   case MSR_IA32_UCODE_REV:
+   case MSR_IA32_UCODE_WRITE:
+   case MSR_VM_HSAVE_PA:
+   case MSR_AMD64_PATCH_LOADER:
+   case MSR_AMD64_BU_CFG2:
+   break;
+
case MSR_EFER:
return set_efer(vcpu, data);
case MSR_K7_HWCR:
@@ -1900,8 +1908,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
return 1;
}
break;
-   case MSR_AMD64_NB_CFG:
-   break;
case MSR_IA32_DEBUGCTLMSR:
if (!data) {
/* We support the non-activated case already */
@@ -1914,11 +1920,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct 
msr_data *msr_info)
vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
__func__, data);
break;
-   case MSR_IA32_UCODE_REV:
-   case MSR_IA32_UCODE_WRITE:
-   case MSR_VM_HSAVE_PA:
-   case MSR_AMD64_PATCH_LOADER:
-   break;
case 0x200 ... 0x2ff:
return set_msr_mtrr(vcpu, msr, data);
case MSR_IA32_APICBASE:
@@ -2253,6 +2254,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, 
u64 *pdata)
case MSR_K8_INT_PENDING_MSG:
case MSR_AMD64_NB_CFG:
case MSR_FAM10H_MMIO_CONF_BASE:
+   case MSR_AMD64_BU_CFG2:
data = 0;
break;
case MSR_P6_PERFCTR0:
-- 
1.8.1.3.535.ga923c31

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] x86, CPU, AMD: Fix WC+ workaround for older hosts

2013-02-19 Thread Borislav Petkov

From: Borislav Petkov 

The WC+ workaround for F10h introduces a new MSR and kvm host #GPs
on accesses to unknown MSRs if paravirt is not compiled in. Use the
exception-handling MSR accessors so as not to break 3.8 and later guests
booting on older hosts.

Remove a redundant family check while at it.

Cc: Gleb Natapov 
Cc: Boris Ostrovsky 
Link: http://lkml.kernel.org/r/20130219153655.gd26...@pd.tnic
Signed-off-by: Borislav Petkov 
---
 arch/x86/kernel/cpu/amd.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 721ef3208eb5..163af4a91d09 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -723,12 +723,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 * performance degradation for certain nested-paging guests.
 * Prevent this conversion by clearing bit 24 in
 * MSR_AMD64_BU_CFG2.
+*
+* NOTE: we want to use the _safe accessors so as not to #GP kvm
+* guests on older kvm hosts.
 */
-   if (c->x86 == 0x10) {
-   rdmsrl(MSR_AMD64_BU_CFG2, value);
-   value &= ~(1ULL << 24);
-   wrmsrl(MSR_AMD64_BU_CFG2, value);
-   }
+
+   rdmsrl_safe(MSR_AMD64_BU_CFG2, );
+   value &= ~(1ULL << 24);
+   wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
}
 
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, >microcode, );
-- 
1.8.1.3.535.ga923c31

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[GIT PULL] cgroup changes for 3.9-rc1

2013-02-19 Thread Tejun Heo

Hello, Linus.

These are cgroup changes for 3.9-rc1.  Nothing too drastic.

* Removal of synchronize_rcu() from userland visible paths.

* Various fixes and cleanups from Li.

* cgroup_rightmost_descendant() added which will be used by cpuset
  changes (it will be a separate pull request).

The changes are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git for-3.9

for you to fetch changes up to f169007b2773f285e098cb84c74aac0154d65ff7:

  cgroup: fail if monitored file and event_control are in different cgroup 
(2013-02-18 09:31:35 -0800)


Greg Thelen (2):
  cgroups: move cgroup_event_listener.c to tools/cgroup
  cgroups: fix cgroup_event_listener error handling

Li Zefan (15):
  cgroup: use new hashtable implementation
  cgroup: remove synchronize_rcu() from cgroup_attach_{task|proc}()
  cgroup: remove synchronize_rcu() from rebind_subsystems()
  cgroup: fix bogus kernel warnings when cgroup_create() failed
  cgroup: remove a NULL check in cgroup_exit()
  cgroup: initialize cgrp->dentry before css_alloc()
  sched: split out css_online/css_offline from tg creation/destruction
  sched: remove redundant NULL cgroup check in task_group_path()
  cgroup: remove duplicate RCU free on struct cgroup
  cgroup: remove synchronize_rcu() from cgroup_diput()
  cgroup: remove bogus comments in cgroup_diput()
  cgroup: fix exit() vs rmdir() race
  cpuset: fix cpuset_print_task_mems_allowed() vs rename() race
  cgroup: fix cgroup_rmdir() vs close(eventfd) race
  cgroup: fail if monitored file and event_control are in different cgroup

Tejun Heo (2):
  cgroup: remove unused dummy cgroup_fork_callbacks()
  cgroup: implement cgroup_rightmost_descendant()

 Documentation/cgroups/00-INDEX |   2 -
 Documentation/cgroups/memcg_test.txt   |   3 +-
 include/linux/cgroup.h |   3 +-
 include/linux/sched.h  |   3 +
 kernel/cgroup.c| 288 -
 kernel/cpuset.c|  12 +-
 kernel/sched/auto_group.c  |   3 +
 kernel/sched/core.c|  49 +++-
 kernel/sched/debug.c   |   7 -
 tools/Makefile |  19 +-
 tools/cgroup/.gitignore|   1 +
 tools/cgroup/Makefile  |  11 +
 .../cgroup}/cgroup_event_listener.c|  72 ++
 13 files changed, 270 insertions(+), 203 deletions(-)
 create mode 100644 tools/cgroup/.gitignore
 create mode 100644 tools/cgroup/Makefile
 rename {Documentation/cgroups => tools/cgroup}/cgroup_event_listener.c (54%)

Thanks.

--
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v5 29/45] x86/xen: Use get/put_online_cpus_atomic() to prevent CPU offline

2013-02-19 Thread Srivatsa S. Bhat

On 02/19/2013 11:40 PM, Konrad Rzeszutek Wilk wrote:
> On Tue, Jan 22, 2013 at 01:10:51PM +0530, Srivatsa S. Bhat wrote:
>> Once stop_machine() is gone from the CPU offline path, we won't be able to
>> depend on preempt_disable() or local_irq_disable() to prevent CPUs from
>> going offline from under us.
>>
>> Use the get/put_online_cpus_atomic() APIs to prevent CPUs from going offline,
>> while invoking from atomic context.
>>
>> Cc: Konrad Rzeszutek Wilk 
> 
> Weird. I see this in the patch but I don't see it in the header?

Meaning, you didn't get this email at all?

> Did you
> explicitly suppress the CC part?
> 

No.. I sent the entire patchset to a set of email ids and in addition to
that I CC'ed individual patches to the respective maintainers/lists (the
CC: list in the changelog). I used the --auto knob from stgit to do that.

> 
> Anyhow, the patch looks sane enough, thought I need to to run it through
> a test framework just to be on a sure side.
>

Sure, thank you. But you might want to test the v6 that I sent out
yesterday instead of v5. Oh, wait a min, you didn't get the v6 mail also?

Here it is, for your reference:
http://marc.info/?l=linux-kernel=136119260122255=2

Regards,
Srivatsa S. Bhat

>> Cc: Jeremy Fitzhardinge 
>> Cc: "H. Peter Anvin" 
>> Cc: x...@kernel.org
>> Cc: xen-de...@lists.xensource.com
>> Cc: virtualizat...@lists.linux-foundation.org
>> Signed-off-by: Srivatsa S. Bhat 
>> ---

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [patch 1/1] early_printk: Consolidate random copies of identical code

2013-02-19 Thread Mike Frysinger

On Tuesday 19 February 2013 07:44:29 Thomas Gleixner wrote:
> The early console implementations are the same all over the
> place. Move the print function to kernel/printk and get rid of the
> copies.

Acked-by: Mike Frysinger 
-mike


signature.asc
Description: This is a digitally signed message part.

Re: [patch 1/2] kcmp: Make it to depend on CONFIG_KCMP

2013-02-19 Thread Cyrill Gorcunov

On Tue, Feb 19, 2013 at 09:53:47AM -0800, H. Peter Anvin wrote:
> On 02/19/2013 01:31 AM, Cyrill Gorcunov wrote:
> >+
> >+  If unsure, say N.
> >+
> 
> Wrong advice.  In this particular case, Y is the safe alternative.
---
From: Cyrill Gorcunov 
Subject: kcmp: Make it to depend on CONFIG_KCMP

Since kcmp syscall has been implemented (initially on
x86 architecture) a number of other archs wire it up
as well: xtensa, sparc, sh, s390, mips, microblaze,
m68k (not taking into account those who uses
 for syscall numbers
definitions).

But the Makefile, which turns kcmp.o generation on
still depends on former config-x86. Thus get rid
of this limitation and make kcmp.o depend on CONFIG_KCMP
option.

v2:
 - As Michal pointed the old configs might already use of
   CHECKPOINT_RESTORE, so make "default" accordingly.
 - Advice Y if unsure

Signed-off-by: Cyrill Gorcunov 
Cc: KOSAKI Motohiro 
Cc: "Eric W. Biederman" 
Cc: Pavel Emelyanov 
Cc: Andrey Vagin 
Cc: Ingo Molnar 
Cc: H. Peter Anvin 
Cc: Thomas Gleixner 
Cc: Glauber Costa 
Cc: Andi Kleen 
Cc: Tejun Heo 
Cc: Matt Helsley 
Cc: Pekka Enberg 
Cc: Eric Dumazet 
Cc: Vasiliy Kulikov 
Cc: Alexey Dobriyan 
Cc: valdis.kletni...@vt.edu
Cc: Michal Marek 
Cc: Frederic Weisbecker 
Cc: Andrew Morton 
---
 init/Kconfig|9 +
 kernel/Makefile |4 +---
 2 files changed, 10 insertions(+), 3 deletions(-)

Index: linux-2.6.git/init/Kconfig
===
--- linux-2.6.git.orig/init/Kconfig
+++ linux-2.6.git/init/Kconfig
@@ -279,6 +279,15 @@ config FHANDLE
  get renamed. Enables open_by_handle_at(2) and name_to_handle_at(2)
  syscalls.
 
+config KCMP
+   bool "kcmp syscall"
+   default CHECKPOINT_RESTORE
+   help
+ If you say Y here, a user level program will be able to use
+ kcmp(2) syscall.
+
+ If unsure, say Y.
+
 config AUDIT
bool "Auditing support"
depends on NET
Index: linux-2.6.git/kernel/Makefile
===
--- linux-2.6.git.orig/kernel/Makefile
+++ linux-2.6.git/kernel/Makefile
@@ -25,9 +25,7 @@ endif
 obj-y += sched/
 obj-y += power/
 
-ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
-obj-$(CONFIG_X86) += kcmp.o
-endif
+obj-$(CONFIG_KCMP) += kcmp.o
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] regulator: tps6586x: Having slew rate settings for other than SM0/1 is not fatal

2013-02-19 Thread Mark Brown

On Tue, Feb 19, 2013 at 09:47:29AM -0700, Stephen Warren wrote:
> On 02/16/2013 04:50 AM, Axel Lin wrote:

> > Ignore the setting and show "Only SM0/SM1 can set slew rate" warning is 
> > enough,
> > then we can return 0 instead of -EINVAL in 
> > tps6586x_regulator_set_slew_rate().
> > 
> > Otherwise, probe() fails.

> Why does probe() fail; what is trying to set a slew rate on a regulator
> that doesn't support it? At least a few days ago in linux-next, this
> patch wasn't needed AFAIK. Is the problem something new?

I rather suspect Axel is doing this based on code inspection and review
rather than testing (either that or he has an enormous lab somewhere
full of all sorts of hardware!) - what he's saying is that the error
handling here seems excessive.


signature.asc
Description: Digital signature

Re: [GIT PULL] x86/cpu changes for v3.9

2013-02-19 Thread Boris Ostrovsky


On 02/19/2013 01:21 PM, H. Peter Anvin wrote:

On 02/19/2013 10:19 AM, Boris Ostrovsky wrote:

On 02/19/2013 12:57 PM, Konrad Rzeszutek Wilk wrote:

On Tue, Feb 19, 2013 at 06:47:58PM +0100, Borislav Petkov wrote:

On Tue, Feb 19, 2013 at 09:38:31AM -0800, H. Peter Anvin wrote:

My fault... I was tracking the fix and lost track of the thread.

The problem is that the fix is necessary but not sufficient, as it
introduces an undesirable host-guest dependency.  In order to allow
neerw guests to work on older hosts we also should use the
{rd,wr}msr_safe() functions to manipulate this MSR, with a comment
as to why.

Boris, could you prepare such a patch, please?
I don't think Boris O. is at AMD anymore. Want me to add that to my 
fix

for kvm or prep a separate patch?

CC-ing Boris.


BorisP's patch is what I should have done. Can you take it?



As I stated:

 The problem is that the fix is necessary but not sufficient, as it
 introduces an undesirable host-guest dependency. In order to allow
 neerw guests to work on older hosts we also should use the
 {rd,wr}msr_safe() functions to manipulate this MSR, with a comment
 as to why.


Ah, sorry --- I missed that part.

-boris

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [RFC] perf: need to expose sched_clock to correlate user samples with kernel samples

2013-02-19 Thread John Stultz


On 02/18/2013 12:35 PM, Thomas Gleixner wrote:

On Tue, 5 Feb 2013, John Stultz wrote:

On 02/05/2013 02:13 PM, Stephane Eranian wrote:

But if people are strongly opposed to the clock_gettime() approach, then
I can go with the ioctl() because the functionality is definitively needed
ASAP.

I prefer the ioctl method, since its less likely to be re-purposed/misused.

Urgh. No! With a dedicated CLOCK_PERF we might have a decent chance to
put this into a vsyscall. With an ioctl not so much.
  

Though I'd be most comfortable with finding some way for perf-timestamps to be
CLOCK_MONOTONIC based (or maybe CLOCK_MONOTONIC_RAW if it would be easier),
and just avoid all together adding another time domain that doesn't really
have clear definition (other then "what perf uses").

What's wrong with that. We already have the infrastructure to create
dynamic time domains which can be completely disconnected from
everything else.


Right, but those are for actual hardware domains that we had no other 
way of interacting with.




Tracing/perf/instrumentation is a different domain and the main issue
there is performance. So going for a vsyscall enabled clock_gettime()
approach is definitely the best thing to do.


So describe how the perf time domain is different then CLOCK_MONOTONIC_RAW.


My concern here is that we're basically creating a kernel interface that 
exports implementation-defined semantics (again: whatever perf does 
right now). And I think folks want to do this, because adding CLOCK_PERF 
is easier then trying to:


1) Get a lock-free method for accessing CLOCK_MONOTONIC_RAW

2) Having perf interpolate its timestamps to CLOCK_MONOTONIC, or 
CLOCKMONOTONIC_RAW when it exports the data



The semantics on sched_clock() have been very flexible and hand-wavy in 
the past. And I agree with the need for the kernel to have a 
"fast-and-loose" clock as well as the benefits to that flexibility as 
the scheduler code has evolved.  But non-the-less, the changes in its 
semantics have bitten us badly a few times.


So I totally understand why the vsyscall is attractive. I'm just very 
cautious about exporting a similarly fuzzily defined interface to 
userland. So until its clear what the semantics will need to be going 
forward (forever!), my preference will be that we not add it.



thanks
-john

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv5 1/8] zsmalloc: add to mm/

2013-02-19 Thread Seth Jennings

On 02/19/2013 03:18 AM, Joonsoo Kim wrote:
> Hello, Seth.
> I'm not sure that this is right time to review, because I already have
> seen many effort of various people to promote zxxx series. I don't want to
> be a stopper to promote these. :)

Any time is good review time :)  Thanks for your review!

> 
> But, I read the code, now, and then some comments below.
> 
> On Wed, Feb 13, 2013 at 12:38:44PM -0600, Seth Jennings wrote:
>> =
>> DO NOT MERGE, FOR REVIEW ONLY
>> This patch introduces zsmalloc as new code, however, it already
>> exists in drivers/staging.  In order to build successfully, you
>> must select EITHER to driver/staging version OR this version.
>> Once zsmalloc is reviewed in this format (and hopefully accepted),
>> I will create a new patchset that properly promotes zsmalloc from
>> staging.
>> =
>>
>> This patchset introduces a new slab-based memory allocator,
>> zsmalloc, for storing compressed pages.  It is designed for
>> low fragmentation and high allocation success rate on
>> large object, but <= PAGE_SIZE allocations.
>>
>> zsmalloc differs from the kernel slab allocator in two primary
>> ways to achieve these design goals.
>>
>> zsmalloc never requires high order page allocations to back
>> slabs, or "size classes" in zsmalloc terms. Instead it allows
>> multiple single-order pages to be stitched together into a
>> "zspage" which backs the slab.  This allows for higher allocation
>> success rate under memory pressure.
>>
>> Also, zsmalloc allows objects to span page boundaries within the
>> zspage.  This allows for lower fragmentation than could be had
>> with the kernel slab allocator for objects between PAGE_SIZE/2
>> and PAGE_SIZE.  With the kernel slab allocator, if a page compresses
>> to 60% of it original size, the memory savings gained through
>> compression is lost in fragmentation because another object of
>> the same size can't be stored in the leftover space.
>>
>> This ability to span pages results in zsmalloc allocations not being
>> directly addressable by the user.  The user is given an
>> non-dereferencable handle in response to an allocation request.
>> That handle must be mapped, using zs_map_object(), which returns
>> a pointer to the mapped region that can be used.  The mapping is
>> necessary since the object data may reside in two different
>> noncontigious pages.
>>
>> zsmalloc fulfills the allocation needs for zram and zswap.
>>
>> Acked-by: Nitin Gupta 
>> Acked-by: Minchan Kim 
>> Signed-off-by: Seth Jennings 
>> ---
>>  include/linux/zsmalloc.h |   49 ++
>>  mm/Kconfig   |   24 +
>>  mm/Makefile  |1 +
>>  mm/zsmalloc.c| 1124 
>> ++
>>  4 files changed, 1198 insertions(+)
>>  create mode 100644 include/linux/zsmalloc.h
>>  create mode 100644 mm/zsmalloc.c
>>
>> diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
>> new file mode 100644
>> index 000..eb6efb6
>> --- /dev/null
>> +++ b/include/linux/zsmalloc.h
>> @@ -0,0 +1,49 @@
>> +/*
>> + * zsmalloc memory allocator
>> + *
>> + * Copyright (C) 2011  Nitin Gupta
>> + *
>> + * This code is released using a dual license strategy: BSD/GPL
>> + * You can choose the license that better fits your requirements.
>> + *
>> + * Released under the terms of 3-clause BSD License
>> + * Released under the terms of GNU General Public License Version 2.0
>> + */
>> +
>> +#ifndef _ZS_MALLOC_H_
>> +#define _ZS_MALLOC_H_
>> +
>> +#include 
>> +#include 
>> +
>> +/*
>> + * zsmalloc mapping modes
>> + *
>> + * NOTE: These only make a difference when a mapped object spans pages
>> +*/
>> +enum zs_mapmode {
>> +ZS_MM_RW, /* normal read-write mapping */
>> +ZS_MM_RO, /* read-only (no copy-out at unmap time) */
>> +ZS_MM_WO /* write-only (no copy-in at map time) */
>> +};
> 
> 
> These makes no difference for PGTABLE_MAPPING.
> Please add some comment for this.

Yes. Will do.

> 
>> +struct zs_ops {
>> +struct page * (*alloc)(gfp_t);
>> +void (*free)(struct page *);
>> +};
>> +
>> +struct zs_pool;
>> +
>> +struct zs_pool *zs_create_pool(gfp_t flags, struct zs_ops *ops);
>> +void zs_destroy_pool(struct zs_pool *pool);
>> +
>> +unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags);
>> +void zs_free(struct zs_pool *pool, unsigned long obj);
>> +
>> +void *zs_map_object(struct zs_pool *pool, unsigned long handle,
>> +enum zs_mapmode mm);
>> +void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
>> +
>> +u64 zs_get_total_size_bytes(struct zs_pool *pool);
>> +
>> +#endif
>> diff --git a/mm/Kconfig b/mm/Kconfig
>> index 278e3ab..25b8f38 100644
>> --- a/mm/Kconfig
>> +++ b/mm/Kconfig
>> @@ -446,3 +446,27 @@ config FRONTSWAP
>>and swap data is stored as normal on the matching swap device.
>>  
>>If unsure, say Y to enable frontswap.
>> +
>> +config ZSMALLOC
>> +tristate "Memory allocator for compressed pages"
>> +default n
>> +

Re: [GIT PULL] x86/cpu changes for v3.9

2013-02-19 Thread H. Peter Anvin


On 02/19/2013 10:19 AM, Boris Ostrovsky wrote:

On 02/19/2013 12:57 PM, Konrad Rzeszutek Wilk wrote:

On Tue, Feb 19, 2013 at 06:47:58PM +0100, Borislav Petkov wrote:

On Tue, Feb 19, 2013 at 09:38:31AM -0800, H. Peter Anvin wrote:

My fault... I was tracking the fix and lost track of the thread.

The problem is that the fix is necessary but not sufficient, as it
introduces an undesirable host-guest dependency.  In order to allow
neerw guests to work on older hosts we also should use the
{rd,wr}msr_safe() functions to manipulate this MSR, with a comment
as to why.

Boris, could you prepare such a patch, please?

I don't think Boris O. is at AMD anymore. Want me to add that to my fix
for kvm or prep a separate patch?

CC-ing Boris.


BorisP's patch is what I should have done. Can you take it?



As I stated:

 The problem is that the fix is necessary but not sufficient, as it
 introduces an undesirable host-guest dependency.  In order to allow
 neerw guests to work on older hosts we also should use the
 {rd,wr}msr_safe() functions to manipulate this MSR, with a comment
 as to why.

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: How to populate Battery information through ACPI tables

2013-02-19 Thread Pallala, Ramakrishna

> > > > > > I am trying to populate battery related information through
> > > > > > ACPI tables and do battery management through non acpi
> > > > > > drivers.  Can you tell or point me on how to populate the ACPI
> > > > > > tables in FW/BIOS and get them in OS? I am new to ACPI world.
> > > > >
> > > > > You should start by reading Documentation/acpi/enumeration.txt.
> > > > >
> > > > > Also can you describe with bit more details, what you are trying
> > > > > to do? A new battery driver that is enumerated from ACPI namespace,
> perhaps?
> > > > >
> > > > > Do you have a DSDT table in hand which we could take a peek?
> > > >
> > > > Thanks Mika for the pointers. I have looked at the documentation.
> > > >
> > > > we already have i2c driver for battery monitoring but the chip
> > > > needs initialized with characterization data. We wanted to
> > > > populate/pass this data through ACPI table to the i2c slave driver.
> > > >
> > > > How can I do that? Can I pass characterization data through ACPI
> > > > table to the i2c slave device?
> > >
> > > Is the characterization data available in DSDT (or SSDT) table? If
> > > yes, then you can just use the fact that
> > > ACPI_HANDLE(_client->dev) returns a valid ACPI handle for your
> > > device. You can use this handle to call some ACPI method or whaterver is
> needed to extract that information.
> >
> > As of now this data is not there in any table. I want to put this data in 
> > DSDT
> table.
> > The data I want to put is specific to the device  and will be around 1K 
> > bytes.
> >
> > One question, How the table and device are linked? Is it by device name/id?
> 
> In principle, yes. We create devices based on the IDs on the DSDT table.
> 
> > Next, how to put this characterization data into the DSDT table?
> 
> Typically this is done by BIOS team but you can experiment yourself just by
> taking an existing DSDT, disassembling it with iasl, do the changes and
> reassembling it again. You can then include this with your kernel or initrd 
> image.
> 
> I have no idea how that data is supposed to look like or be represented in the
> DSDT.
> 
> You should also familiarize yourself with ACPI by reading the ACPI 5.0 spec 
> from
> acpi.info.

Mika, I want to populate this characterization data as device specific/custom 
data which could be anything
And may not be entirely related to battery. Is this is possible?

I will also go through the spec but this info would help me to resolve some 
initial hurdles in my work.

Note: I am not going to use ACPI based battery monitoring but I need this data 
from acpi tables.

Thanks,
Ram
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] arm: Set the page table freeing ceiling to TASK_SIZE

2013-02-19 Thread Hugh Dickins

On Mon, 18 Feb 2013, Catalin Marinas wrote:

> ARM processors with LPAE enabled use 3 levels of page tables, with an
> entry in the top level (pgd) covering 1GB of virtual space. Because of
> the branch relocation limitations on ARM, the loadable modules are
> mapped 16MB below PAGE_OFFSET, making the corresponding 1GB pgd shared
> between kernel modules and user space.
> 
> If free_pgtables() is called with the default ceiling 0,
> free_pgd_range() (and subsequently called functions) also frees the page
> table shared between user space and kernel modules (which is normally
> handled by the ARM-specific pgd_free() function). This patch changes
> defines the ARM USER_PGTABLES_CEILING to TASK_SIZE.

I don't have an ARM to test on, so I won't ack or nack this,
but I am a little worried or puzzled.

I thought CONFIG_ARM_LPAE came in v3.3: so I would expect these
patches to need "Cc: sta...@vger.kernel.org" for porting back there.

But then, did v3.3..v3.8 have the appropriate arch/arm code to handle
the freeing of the user+kernel pgd?  I'm not asserting that it could
not, but when doing the similar arch/x86 thing, I had to make changes
down there, so it's not necessarily something that works automatically.

And does the ARM !LPAE case work correctly (not leaking page tables
at any level) with this change from 0 to TASK_SIZE?  Again, I'm not
asserting that it does not, but your commit description doesn't give
enough confidence that you've tried that.

Perhaps you have some other patches to arch/arm, that of course I
wouldn't have noticed, which make this all work together; and it's
accepted that CONFIG_ARM_LPAE is broken on v3.3..v3.8, and too
much risk to backport it all for -stable.

Maybe all I'm asking for is a more reassuring commit description.

Hugh

> 
> Signed-off-by: Catalin Marinas 
> Cc: Russell King 
> Cc: Hugh Dickins 
> Cc: Andrew Morton 
> ---
>  arch/arm/include/asm/pgtable.h | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
> index c094749..8f06ee5 100644
> --- a/arch/arm/include/asm/pgtable.h
> +++ b/arch/arm/include/asm/pgtable.h
> @@ -61,6 +61,13 @@ extern void __pgd_error(const char *file, int line, pgd_t);
>  #define FIRST_USER_ADDRESS   PAGE_SIZE
>  
>  /*
> + * Use TASK_SIZE as the ceiling argument for free_pgtables() and
> + * free_pgd_range() to avoid freeing the modules pmd when LPAE is enabled 
> (pmd
> + * page shared between user and kernel).
> + */
> +#define USER_PGTABLES_CEILINGTASK_SIZE
> +
> +/*
>   * The pgprot_* and protection_map entries will be fixed up in runtime
>   * to include the cachable and bufferable bits based on memory policy,
>   * as well as any architecture dependent bits like global/ASID and SMP
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [resend] Timer broadcast question

2013-02-19 Thread Daniel Lezcano

On 02/19/2013 07:10 PM, Thomas Gleixner wrote:
> On Tue, 19 Feb 2013, Daniel Lezcano wrote:
>> I am working on identifying the different wakeup sources from the
>> interrupts and I have a question regarding the timer broadcast.
>>
>> The broadcast timer is setup to the next event and that will wake up any
>> idle cpu belonging to the "broadcast cpumask", right ?
>>
>> The cpu which has been woken up will look for each cpu the next-event
>> and send an IPI to wake it up.
>>  
>> Although, it is possible the sender of this IPI may not be concerned by
>> the timer expiration and has been woken up just for sending the IPI, right ?
> 
> Correct.
>  
>> If this is correct, is it possible to setup the timer irq affinity to a
>> cpu which will be concerned by the timer expiration ? so we prevent an
>> unnecessary wake up for a cpu.
> 
> It is possible, but we never implemented it.
> 
> If we go there, we want to make that conditional on a property flag,
> because some interrupt controllers especially on x86 only allow to
> move the affinity from interrupt context, which is pointless.

Thanks Thomas for your quick answer. I will write a RFC patchset.

  -- Daniel

-- 
  Linaro.org │ Open source software for ARM SoCs

Follow Linaro:   Facebook |
 Twitter |
 Blog

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [GIT PULL] x86/cpu changes for v3.9

2013-02-19 Thread Boris Ostrovsky


On 02/19/2013 12:57 PM, Konrad Rzeszutek Wilk wrote:

On Tue, Feb 19, 2013 at 06:47:58PM +0100, Borislav Petkov wrote:

On Tue, Feb 19, 2013 at 09:38:31AM -0800, H. Peter Anvin wrote:

My fault... I was tracking the fix and lost track of the thread.

The problem is that the fix is necessary but not sufficient, as it
introduces an undesirable host-guest dependency.  In order to allow
neerw guests to work on older hosts we also should use the
{rd,wr}msr_safe() functions to manipulate this MSR, with a comment
as to why.

Boris, could you prepare such a patch, please?

I don't think Boris O. is at AMD anymore. Want me to add that to my fix
for kvm or prep a separate patch?

CC-ing Boris.


BorisP's patch is what I should have done. Can you take it?

-boris


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] atl1c: restore buffer state

2013-02-19 Thread David Miller

From: xiong 
Date: Wed, 20 Feb 2013 01:23:09 +0800

> in the previous commit : f1f220ea1dda078, the BUSY state of buffer is wrongly
> deleted. this patch just restore it.
> 
> Signed-off-by: xiong 

Applied.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] NET/PHY: Eliminate the forced speed reduction algorithm.

2013-02-19 Thread David Miller

From: Kirill Kapranov 
Date: Tue, 19 Feb 2013 13:53:48 +0400

> Tested at 2.6.38.7, applicable up to for 3.0.4. 
> Signed-off-by: Kirill Kapranov ,
>  --- linux/drivers/net/phy/phy.c.orig 2011-05-22 02:13:59.0 +0400
> +++ linux/drivers/net/phy/phy.c   2012-04-28 12:49:37.0 +0400

Your patches are continually poorly formatted, and corrupted by
your email client, which means that the patches cannot be applied
properly and all of our automated tools for patch tracking do not
recognize your submissions as a patch.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

< 1 2 3 4 5 6 7 8 9 10 >

301 - 400 of 1522 matches

Mail list logo