date:20120824

[PATCH v9 3/5] virtio_balloon: introduce migration primitives to balloon pages

2012-08-24 Thread Rafael Aquini

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.

Besides making balloon pages movable at allocation time and introducing
the necessary primitives to perform balloon page migration/compaction,
the patch changes the balloon bookeeping pages counter into an atomic
counter, as well as it introduces the following locking scheme, in order to
enhance the syncronization methods for accessing elements of struct
virtio_balloon, thus providing protection against the concurrent accesses
introduced by parallel memory compaction threads.

 - balloon_lock (mutex) : synchronizes the access demand to elements of
  struct virtio_balloon and its queue operations;
 - pages_lock (spinlock): special protection to balloon's pages bookmarking
  elements (list and atomic counters) against the
  potential memory compaction concurrency;

Signed-off-by: Rafael Aquini 
---
 drivers/virtio/virtio_balloon.c | 286 +---
 1 file changed, 265 insertions(+), 21 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 0908e60..9b0bc46 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -34,6 +36,7 @@
  * page units.
  */
 #define VIRTIO_BALLOON_PAGES_PER_PAGE (PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT)
+#define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256
 
 struct virtio_balloon
 {
@@ -46,11 +49,24 @@ struct virtio_balloon
/* The thread servicing the balloon. */
struct task_struct *thread;
 
+   /* balloon special page->mapping */
+   struct address_space *mapping;
+
+   /* Synchronize access/update to this struct virtio_balloon elements */
+   struct mutex balloon_lock;
+
/* Waiting for host to ack the pages we released. */
wait_queue_head_t acked;
 
+   /* Number of balloon pages isolated from 'pages' list for compaction */
+   atomic_t num_isolated_pages;
+
/* Number of balloon pages we've told the Host we're not using. */
-   unsigned int num_pages;
+   atomic_t num_pages;
+
+   /* Protect pages list, and pages bookeeping counters */
+   spinlock_t pages_lock;
+
/*
 * The pages we've told the Host we're not using.
 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
@@ -60,7 +76,7 @@ struct virtio_balloon
 
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
-   u32 pfns[256];
+   u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX];
 
/* Memory statistics */
int need_stats_update;
@@ -122,13 +138,17 @@ static void set_page_pfns(u32 pfns[], struct page *page)
 
 static void fill_balloon(struct virtio_balloon *vb, size_t num)
 {
+   /* Get the proper GFP alloc mask from vb->mapping flags */
+   gfp_t vb_gfp_mask = mapping_gfp_mask(vb->mapping);
+
/* We can only do one array worth at a time. */
num = min(num, ARRAY_SIZE(vb->pfns));
 
+   mutex_lock(>balloon_lock);
for (vb->num_pfns = 0; vb->num_pfns < num;
 vb->num_pfns += VIRTIO_BALLOON_PAGES_PER_PAGE) {
-   struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY |
-   __GFP_NOMEMALLOC | __GFP_NOWARN);
+   struct page *page = alloc_page(vb_gfp_mask | __GFP_NORETRY |
+  __GFP_NOWARN | __GFP_NOMEMALLOC);
if (!page) {
if (printk_ratelimit())
dev_printk(KERN_INFO, >vdev->dev,
@@ -139,9 +159,15 @@ static void fill_balloon(struct virtio_balloon *vb, size_t 
num)
break;
}
set_page_pfns(vb->pfns + vb->num_pfns, page);
-   vb->num_pages += VIRTIO_BALLOON_PAGES_PER_PAGE;
totalram_pages--;
+
+   BUG_ON(!trylock_page(page));
+   spin_lock(>pages_lock);
list_add(>lru, >pages);
+   assign_balloon_mapping(page, vb->mapping);
+   atomic_add(VIRTIO_BALLOON_PAGES_PER_PAGE, >num_pages);
+   spin_unlock(>pages_lock);
+   unlock_page(page);
}
 
/* Didn't get any?  Oh well. */
@@ -149,6 +175,7 @@ static void fill_balloon(struct virtio_balloon *vb, size_t 
num)
return;
 
tell_host(vb, vb->inflate_vq);
+   mutex_unlock(>balloon_lock);
 }
 
 static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
@@ -162,19 +189,97 @@ static void release_pages_by_pfn(const

[PATCH v9 5/5] mm: add vm event counters for balloon pages compaction

2012-08-24 Thread Rafael Aquini

This patch introduces a new set of vm event counters to keep track of
ballooned pages compaction activity.

Signed-off-by: Rafael Aquini 
---
 drivers/virtio/virtio_balloon.c |  1 +
 include/linux/vm_event_item.h   |  8 +++-
 mm/balloon_compaction.c |  2 ++
 mm/migrate.c|  1 +
 mm/vmstat.c | 10 +-
 5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9b0bc46..e1e8e30 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -528,6 +528,7 @@ int virtballoon_migratepage(struct address_space *mapping,
 
mutex_unlock(>balloon_lock);
wake_up(>config_change);
+   count_balloon_event(COMPACTBALLOONMIGRATED);
 
return BALLOON_MIGRATION_RETURN;
 }
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 57f7b10..13573fe 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -41,7 +41,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_COMPACTION
COMPACTBLOCKS, COMPACTPAGES, COMPACTPAGEFAILED,
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
-#endif
+#ifdef CONFIG_BALLOON_COMPACTION
+   COMPACTBALLOONISOLATED, /* isolated from balloon pagelist */
+   COMPACTBALLOONMIGRATED, /* balloon page sucessfully migrated */
+   COMPACTBALLOONRELEASED, /* old-page released after migration */
+   COMPACTBALLOONRETURNED, /* putback to pagelist, not-migrated */
+#endif /* CONFIG_BALLOON_COMPACTION */
+#endif /* CONFIG_COMPACTION */
 #ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
 #endif
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 86a3692..00e7ea9 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -110,6 +110,7 @@ bool isolate_balloon_page(struct page *page)
if (__is_movable_balloon_page(page) &&
(page_count(page) == 2)) {
__isolate_balloon_page(page);
+   count_balloon_event(COMPACTBALLOONISOLATED);
unlock_page(page);
return true;
} else if (unlikely(!__is_movable_balloon_page(page))) {
@@ -139,6 +140,7 @@ void putback_balloon_page(struct page *page)
if (__is_movable_balloon_page(page)) {
__putback_balloon_page(page);
put_page(page);
+   count_balloon_event(COMPACTBALLOONRETURNED);
} else {
dump_page(page);
__WARN();
diff --git a/mm/migrate.c b/mm/migrate.c
index e47daf5..124b16b 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -896,6 +896,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned 
long private,
list_del(>lru);
put_page(page);
__free_page(page);
+   count_balloon_event(COMPACTBALLOONRELEASED);
return 0;
}
 out:
diff --git a/mm/vmstat.c b/mm/vmstat.c
index df7a674..5824ad2 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -768,7 +768,15 @@ const char * const vmstat_text[] = {
"compact_stall",
"compact_fail",
"compact_success",
-#endif
+
+#ifdef CONFIG_BALLOON_COMPACTION
+   "compact_balloon_isolated",
+   "compact_balloon_migrated",
+   "compact_balloon_released",
+   "compact_balloon_returned",
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+#endif /* CONFIG_COMPACTION */
 
 #ifdef CONFIG_HUGETLB_PAGE
"htlb_buddy_alloc_success",
-- 
1.7.11.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v9 4/5] mm: introduce putback_movable_pages()

2012-08-24 Thread Rafael Aquini

The PATCH "mm: introduce compaction and migration for virtio ballooned pages"
hacks around putback_lru_pages() in order to allow ballooned pages to be
re-inserted on balloon page list as if a ballooned page was like a LRU page.

As ballooned pages are not legitimate LRU pages, this patch introduces
putback_movable_pages() to properly cope with cases where the isolated
pageset contains ballooned pages and LRU pages, thus fixing the mentioned
inelegant hack around putback_lru_pages().

Signed-off-by: Rafael Aquini 
---
 include/linux/migrate.h |  2 ++
 mm/compaction.c |  4 ++--
 mm/migrate.c| 20 
 mm/page_alloc.c |  2 +-
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index ce7e667..ff103a1 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -10,6 +10,7 @@ typedef struct page *new_page_t(struct page *, unsigned long 
private, int **);
 #ifdef CONFIG_MIGRATION
 
 extern void putback_lru_pages(struct list_head *l);
+extern void putback_movable_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
@@ -33,6 +34,7 @@ extern int migrate_huge_page_move_mapping(struct 
address_space *mapping,
 #else
 
 static inline void putback_lru_pages(struct list_head *l) {}
+static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
unsigned long private, bool offlining,
enum migrate_mode mode) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index e50836b..409b2f5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -817,9 +817,9 @@ static int compact_zone(struct zone *zone, struct 
compact_control *cc)
trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
nr_remaining);
 
-   /* Release LRU pages not migrated */
+   /* Release isolated pages not migrated */
if (err) {
-   putback_lru_pages(>migratepages);
+   putback_movable_pages(>migratepages);
cc->nr_migratepages = 0;
if (err == -ENOMEM) {
ret = COMPACT_PARTIAL;
diff --git a/mm/migrate.c b/mm/migrate.c
index ec439f8..e47daf5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -80,6 +80,26 @@ void putback_lru_pages(struct list_head *l)
list_del(>lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
+   putback_lru_page(page);
+   }
+}
+
+/*
+ * Put previously isolated pages back onto the appropriate lists
+ * from where they were once taken off for compaction/migration.
+ *
+ * This function shall be used instead of putback_lru_pages(),
+ * whenever the isolated pageset has been built by isolate_migratepages_range()
+ */
+void putback_movable_pages(struct list_head *l)
+{
+   struct page *page;
+   struct page *page2;
+
+   list_for_each_entry_safe(page, page2, l, lru) {
+   list_del(>lru);
+   dec_zone_page_state(page, NR_ISOLATED_ANON +
+   page_is_file_cache(page));
if (unlikely(movable_balloon_page(page)))
putback_balloon_page(page);
else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c66fb87..a0c2cc5 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5675,7 +5675,7 @@ static int __alloc_contig_migrate_range(unsigned long 
start, unsigned long end)
0, false, MIGRATE_SYNC);
}
 
-   putback_lru_pages();
+   putback_movable_pages();
return ret > 0 ? 0 : ret;
 }
 
-- 
1.7.11.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v9 1/5] mm: introduce a common interface for balloon pages mobility

2012-08-24 Thread Rafael Aquini

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.

This patch introduces a common interface to help a balloon driver on
making its page set movable to compaction, and thus allowing the system
to better leverage the compation efforts on memory defragmentation.

Signed-off-by: Rafael Aquini 
---
 include/linux/balloon_compaction.h | 137 +
 include/linux/pagemap.h|  18 
 mm/Kconfig |  15 
 mm/Makefile|   2 +-
 mm/balloon_compaction.c| 172 +
 5 files changed, 343 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/balloon_compaction.h
 create mode 100644 mm/balloon_compaction.c

diff --git a/include/linux/balloon_compaction.h 
b/include/linux/balloon_compaction.h
new file mode 100644
index 000..7afb0ae
--- /dev/null
+++ b/include/linux/balloon_compaction.h
@@ -0,0 +1,137 @@
+/*
+ * include/linux/balloon_compaction.h
+ *
+ * Common interface definitions for making balloon pages movable to compaction.
+ *
+ * Copyright (C) 2012, Red Hat, Inc.  Rafael Aquini 
+ */
+#ifndef _LINUX_BALLOON_COMPACTION_H
+#define _LINUX_BALLOON_COMPACTION_H
+#ifdef __KERNEL__
+
+#include 
+#include 
+#include 
+
+#ifdef CONFIG_BALLOON_COMPACTION
+#define count_balloon_event(e) count_vm_event(e)
+extern bool isolate_balloon_page(struct page *);
+extern void putback_balloon_page(struct page *);
+extern int migrate_balloon_page(struct page *newpage,
+   struct page *page, enum migrate_mode mode);
+
+static inline gfp_t balloon_mapping_gfp_mask(void)
+{
+   return GFP_HIGHUSER_MOVABLE;
+}
+
+/*
+ * movable_balloon_page - test page->mapping->flags to identify balloon pages
+ *   that can be moved by compaction/migration.
+ *
+ * This function is used at core compaction's page isolation scheme and so it's
+ * exposed to several system pages which may, or may not, be part of a memory
+ * balloon, and thus we cannot afford to hold a page locked to perform tests.
+ *
+ * Therefore, as we might return false positives in the case a balloon page
+ * is just released under us, the page->mapping->flags need to be retested
+ * with the proper page lock held, on the functions that will cope with the
+ * balloon page later.
+ */
+static inline bool movable_balloon_page(struct page *page)
+{
+   /*
+* Before dereferencing and testing mapping->flags, lets make sure
+* this is not a page that uses ->mapping in a different way
+*/
+   if (!PageSlab(page) && !PageSwapCache(page) &&
+   !PageAnon(page) && !page_mapped(page)) {
+   /*
+* While doing compaction core work, we cannot afford to hold
+* page lock as it might cause very undesirable side effects.
+*/
+   struct address_space *mapping;
+   mapping = rcu_dereference_raw(page->mapping);
+   if (mapping)
+   return mapping_balloon(mapping);
+   }
+   return false;
+}
+
+/*
+ * __page_balloon_device - return the balloon device owing the page.
+ *
+ * This shall only be used at driver callbacks under proper page lock,
+ * to get access to the balloon device structure that owns @page.
+ */
+static inline void *__page_balloon_device(struct page *page)
+{
+   struct address_space *mapping;
+   mapping = rcu_dereference_protected(page->mapping, PageLocked(page));
+   if (mapping)
+   mapping = mapping->assoc_mapping;
+   return (void *)mapping;
+}
+
+/*
+ * DEFINE_BALLOON_MAPPING_AOPS - declare and instantiate a callback descriptor
+ *  to be used as balloon page->mapping->a_ops.
+ *
+ * @label : declaration identifier (var name)
+ * @isolatepg : callback symbol name for performing the page isolation step
+ * @migratepg : callback symbol name for performing the page migration step
+ * @putbackpg : callback symbol name for performing the page putback step
+ *
+ * address_space_operations utilized methods for ballooned pages:
+ *   .migratepage- used to perform balloon's page migration (as is)
+ *   .invalidatepage - used to isolate a page from balloon's page list
+ *   .freepage   - used to reinsert an isolated page to balloon's page list
+ */
+#define DEFINE_BALLOON_MAPPING_AOPS(label, isolatepg, migratepg, putbackpg) \
+   const struct address_space_operations (label) = {   \
+   .migratepage= (migratepg),  \
+   .invalidatepage = (isolatepg),  \
+   .freepage   = (putbackpg),  \
+   }
+

[PATCH v9 0/5] make balloon pages movable by compaction

2012-08-24 Thread Rafael Aquini

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.

This patch-set follows the main idea discussed at 2012 LSFMMS session:
"Ballooning for transparent huge pages" -- http://lwn.net/Articles/490114/
to introduce the required changes to the virtio_balloon driver, as well as
the changes to the core compaction & migration bits, in order to make those
subsystems aware of ballooned pages and allow memory balloon pages become
movable within a guest, thus avoiding the aforementioned fragmentation issue

Rafael Aquini (5):
  mm: introduce a common interface for balloon pages mobility
  mm: introduce compaction and migration for ballooned pages
  virtio_balloon: introduce migration primitives to balloon pages
  mm: introduce putback_movable_pages()
  mm: add vm event counters for balloon pages compaction

 drivers/virtio/virtio_balloon.c| 287 ++---
 include/linux/balloon_compaction.h | 137 ++
 include/linux/migrate.h|   2 +
 include/linux/pagemap.h|  18 +++
 include/linux/vm_event_item.h  |   8 +-
 mm/Kconfig |  15 ++
 mm/Makefile|   2 +-
 mm/balloon_compaction.c| 174 ++
 mm/compaction.c|  51 ---
 mm/migrate.c   |  57 +++-
 mm/page_alloc.c|   2 +-
 mm/vmstat.c|  10 +-
 12 files changed, 715 insertions(+), 48 deletions(-)
 create mode 100644 include/linux/balloon_compaction.h
 create mode 100644 mm/balloon_compaction.c


Change log:
v9:
 * Adjust rcu_dereference usage to leverage page lock protection  (Paul, Peter);
 * Enhance doc on compaction interface introduced to balloon driver   (Michael);
 * Fix issue with isolated pages breaking leak_balloon() logics   (Michael);
v8:
 * introduce a common MM interface for balloon driver page compaction (Michael);
 * remove the global state preventing multiple balloon device support (Michael);
 * introduce RCU protection/syncrhonization to balloon page->mapping  (Michael);
v7:
 * fix a potential page leak case at 'putback_balloon_page'   (Mel);
 * adjust vm-events-counter patch and remove its drop-on-merge message(Rik);
 * add 'putback_movable_pages' to avoid hacks on 'putback_lru_pages'  (Minchan);
v6:
 * rename 'is_balloon_page()' to 'movable_balloon_page()' (Rik);
v5:
 * address Andrew Morton's review comments on the patch series;
 * address a couple extra nitpick suggestions on PATCH 01 (Minchan);
v4: 
 * address Rusty Russel's review comments on PATCH 02;
 * re-base virtio_balloon patch on 9c378abc5c0c6fc8e3acf5968924d274503819b3;
V3: 
 * address reviewers nitpick suggestions on PATCH 01 (Mel, Minchan);
V2: 
 * address Mel Gorman's review comments on PATCH 01;


Preliminary test results:
(2 VCPU 2048mB RAM KVM guest running 3.6.0_rc3+ -- after a reboot)

* 64mB balloon:
[root@localhost ~]# awk '/compact/ {print}' /proc/vmstat
compact_blocks_moved 0
compact_pages_moved 0
compact_pagemigrate_failed 0
compact_stall 0
compact_fail 0
compact_success 0
compact_balloon_isolated 0
compact_balloon_migrated 0
compact_balloon_released 0
compact_balloon_returned 0
[root@localhost ~]# 
[root@localhost ~]# for i in $(seq 1 6); do echo 1 > 
/proc/sys/vm/compact_memory & done &>/dev/null 
[1]   Doneecho 1 > /proc/sys/vm/compact_memory
[2]   Doneecho 1 > /proc/sys/vm/compact_memory
[3]   Doneecho 1 > /proc/sys/vm/compact_memory
[4]   Doneecho 1 > /proc/sys/vm/compact_memory
[5]-  Doneecho 1 > /proc/sys/vm/compact_memory
[6]+  Doneecho 1 > /proc/sys/vm/compact_memory
[root@localhost ~]# 
[root@localhost ~]# awk '/compact/ {print}' /proc/vmstat
compact_blocks_moved 3108
compact_pages_moved 43169
compact_pagemigrate_failed 95
compact_stall 0
compact_fail 0
compact_success 0
compact_balloon_isolated 16384
compact_balloon_migrated 16384
compact_balloon_released 16384
compact_balloon_returned 0


* 128 mB balloon:
[root@localhost ~]# awk '/compact/ {print}' /proc/vmstat
compact_blocks_moved 0
compact_pages_moved 0
compact_pagemigrate_failed 0
compact_stall 0
compact_fail 0
compact_success 0
compact_balloon_isolated 0
compact_balloon_migrated 0
compact_balloon_released 0
compact_balloon_returned 0
[root@localhost ~]# 
[root@localhost ~]# for i in $(seq 1 6); do echo 1 > 
/proc/sys/vm/compact_memory & done &>/dev/null  
[1]   Doneecho 1 > /proc/sys/vm/compact_memory
[2]   Doneecho 1 > /proc/sys/vm/compact_memory
[3]   Doneecho 1 > /proc/sys/vm/compact_memory
[4]   Done

[PATCH v9 2/5] mm: introduce compaction and migration for ballooned pages

2012-08-24 Thread Rafael Aquini

Memory fragmentation introduced by ballooning might reduce significantly
the number of 2MB contiguous memory blocks that can be used within a guest,
thus imposing performance penalties associated with the reduced number of
transparent huge pages that could be used by the guest workload.

This patch introduces the helper functions as well as the necessary changes
to teach compaction and migration bits how to cope with pages which are
part of a guest memory balloon, in order to make them movable by memory
compaction procedures.

Signed-off-by: Rafael Aquini 
---
 mm/compaction.c | 47 ---
 mm/migrate.c| 36 ++--
 2 files changed, 62 insertions(+), 21 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 7fcd3a5..e50836b 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "internal.h"
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
@@ -358,32 +359,40 @@ isolate_migratepages_range(struct zone *zone, struct 
compact_control *cc,
continue;
}
 
-   if (!PageLRU(page))
-   continue;
-
/*
-* PageLRU is set, and lru_lock excludes isolation,
-* splitting and collapsing (collapsing has already
-* happened if PageLRU is set).
+* It is possible to migrate LRU pages and balloon pages.
+* Skip any other type of page.
 */
-   if (PageTransHuge(page)) {
-   low_pfn += (1 << compound_order(page)) - 1;
-   continue;
-   }
+   if (PageLRU(page)) {
+   /*
+* PageLRU is set, and lru_lock excludes isolation,
+* splitting and collapsing (collapsing has already
+* happened if PageLRU is set).
+*/
+   if (PageTransHuge(page)) {
+   low_pfn += (1 << compound_order(page)) - 1;
+   continue;
+   }
 
-   if (!cc->sync)
-   mode |= ISOLATE_ASYNC_MIGRATE;
+   if (!cc->sync)
+   mode |= ISOLATE_ASYNC_MIGRATE;
 
-   lruvec = mem_cgroup_page_lruvec(page, zone);
+   lruvec = mem_cgroup_page_lruvec(page, zone);
 
-   /* Try isolate the page */
-   if (__isolate_lru_page(page, mode) != 0)
-   continue;
+   /* Try isolate the page */
+   if (__isolate_lru_page(page, mode) != 0)
+   continue;
 
-   VM_BUG_ON(PageTransCompound(page));
+   VM_BUG_ON(PageTransCompound(page));
+
+   /* Successfully isolated */
+   del_page_from_lru_list(page, lruvec, page_lru(page));
+   } else if (unlikely(movable_balloon_page(page))) {
+   if (!isolate_balloon_page(page))
+   continue;
+   } else
+   continue;
 
-   /* Successfully isolated */
-   del_page_from_lru_list(page, lruvec, page_lru(page));
list_add(>lru, migratelist);
cc->nr_migratepages++;
nr_isolated++;
diff --git a/mm/migrate.c b/mm/migrate.c
index 77ed2d7..ec439f8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -35,6 +35,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -79,7 +80,10 @@ void putback_lru_pages(struct list_head *l)
list_del(>lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
-   putback_lru_page(page);
+   if (unlikely(movable_balloon_page(page)))
+   putback_balloon_page(page);
+   else
+   putback_lru_page(page);
}
 }
 
@@ -799,6 +803,18 @@ static int __unmap_and_move(struct page *page, struct page 
*newpage,
goto skip_unmap;
}
 
+   if (unlikely(movable_balloon_page(page))) {
+   /*
+* A ballooned page does not need any special attention from
+* physical to virtual reverse mapping procedures.
+* Skip any attempt to unmap PTEs or to remap swap cache,
+* in order to avoid burning cycles at rmap level, and perform
+* the page migration right away (proteced by page lock).
+*/
+   rc = migrate_balloon_page(newpage, page, mode);
+   goto uncharge;
+   }
+
/* Establish migration ptes or remove ptes */
try_to_unmap(page,

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Yinghai Lu

On Fri, Aug 24, 2012 at 9:24 PM, Jacob Shin  wrote:
> On Fri, Aug 24, 2012 at 06:07:01PM -0700, Yinghai Lu wrote:
>> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
>> > Currently direct mappings are created for [ 0 to max_low_pfn<> > and [ 4GB to max_pfn<> > backed by actual DRAM. This is fine for holes under 4GB which are covered
>> > by fixed and variable range MTRRs to be UC. However, we run into trouble
>> > on higher memory addresses which cannot be covered by MTRRs.
>> >
>> > Our system with 1TB of RAM has an e820 that looks like this:
>> >
>> >  BIOS-e820: [mem 0x-0x000983ff] usable
>> >  BIOS-e820: [mem 0x00098400-0x0009] reserved
>> >  BIOS-e820: [mem 0x000d-0x000f] reserved
>> >  BIOS-e820: [mem 0x0010-0xc7eb] usable
>> >  BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data
>> >  BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS
>> >  BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved
>> >  BIOS-e820: [mem 0xfec0-0xfec0] reserved
>> >  BIOS-e820: [mem 0xfee0-0xfee00fff] reserved
>> >  BIOS-e820: [mem 0xfff0-0x] reserved
>> >  BIOS-e820: [mem 0x0001-0x00e037ff] usable
>> >  BIOS-e820: [mem 0x00e03800-0x00fc] reserved
>> >  BIOS-e820: [mem 0x0100-0x011ffeff] usable
>> >
>> > and so direct mappings are created for huge memory hole between
>> > 0x00e03800 to 0x0100. Even though the kernel never
>> > generates memory accesses in that region, since the page tables mark
>> > them incorrectly as being WB, our (AMD) processor ends up causing a MCE
>> > while doing some memory bookkeeping/optimizations around that area.
>> >
>> > This patch iterates through e820 and only direct maps ranges that are
>> > marked as E820_RAM, and keeps track of those pfn ranges. Depending on
>> > the alignment of E820 ranges, this may possibly result in using smaller
>> > size (i.e. 4K instead of 2M or 1G) page tables.
>> >
>> > Signed-off-by: Jacob Shin 
>> > ---
>> >  arch/x86/include/asm/page_types.h |9 +++
>> >  arch/x86/kernel/setup.c   |  125 
>> > +
>> >  arch/x86/mm/init.c|2 +
>> >  arch/x86/mm/init_64.c |6 +-
>> >  4 files changed, 112 insertions(+), 30 deletions(-)
>> >
>> > diff --git a/arch/x86/include/asm/page_types.h 
>> > b/arch/x86/include/asm/page_types.h
>> > index e21fdd1..409047a 100644
>> > --- a/arch/x86/include/asm/page_types.h
>> > +++ b/arch/x86/include/asm/page_types.h
>> > @@ -3,6 +3,7 @@
>> >
>> >  #include 
>> >  #include 
>> > +#include 
>> >
>> >  /* PAGE_SHIFT determines the page size */
>> >  #define PAGE_SHIFT 12
>> > @@ -40,12 +41,20 @@
>> >  #endif /* CONFIG_X86_64 */
>> >
>> >  #ifndef __ASSEMBLY__
>> > +#include 
>> >
>> >  extern int devmem_is_allowed(unsigned long pagenr);
>> >
>> >  extern unsigned long max_low_pfn_mapped;
>> >  extern unsigned long max_pfn_mapped;
>> >
>> > +extern struct range pfn_mapped[E820_X_MAX];
>> > +extern int nr_pfn_mapped;
>> > +
>> > +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long 
>> > end_pfn);
>> > +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long 
>> > end_pfn);
>> > +extern bool pfn_is_mapped(unsigned long pfn);
>> > +
>> >  static inline phys_addr_t get_max_mapped(void)
>> >  {
>> > return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
>> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> > index 751e020..4217fb4 100644
>> > --- a/arch/x86/kernel/setup.c
>> > +++ b/arch/x86/kernel/setup.c
>> > @@ -115,13 +115,46 @@
>> >  #include 
>> >
>> >  /*
>> > - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 
>> > entries.
>> > - * The direct mapping extends to max_pfn_mapped, so that we can directly 
>> > access
>> > - * apertures, ACPI and other tables without having to play with fixmaps.
>> > + * max_low_pfn_mapped: highest direct mapped pfn under 4GB
>> > + * max_pfn_mapped: highest direct mapped pfn over 4GB
>> > + *
>> > + * The direct mapping only covers E820_RAM regions, so the ranges and 
>> > gaps are
>> > + * represented by pfn_mapped
>> >   */
>> >  unsigned long max_low_pfn_mapped;
>> >  unsigned long max_pfn_mapped;
>> >
>> > +struct range pfn_mapped[E820_X_MAX];
>> > +int nr_pfn_mapped;
>> > +
>> > +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
>> > +{
>> > +   nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
>> > +nr_pfn_mapped, start_pfn, 
>> > end_pfn);
>> > +
>> > +   max_pfn_mapped = max(max_pfn_mapped, end_pfn);
>> > +
>> > +   if (end_pfn <= (1UL << (32 - PAGE_SHIFT)))
>> > +   max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn);
>> > +}
>> > +
>> > +bool

Re: [PATCH 5/5] x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 06:23:48PM -0700, Yinghai Lu wrote:
> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> > There could be cases where user supplied memmap=exactmap memory
> > mappings do not mark the region where the kernel .text .data and
> > .bss reside as E820_RAM as reported here:
> >
> > https://lkml.org/lkml/2012/8/14/86
> >
> > Handle it by complaining, and adding the range back into the e820.
> >
> > Signed-off-by: Jacob Shin 
> > ---
> >  arch/x86/kernel/setup.c |   15 +++
> >  1 file changed, 15 insertions(+)
> >
> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > index 4217fb4..b84aceb5 100644
> > --- a/arch/x86/kernel/setup.c
> > +++ b/arch/x86/kernel/setup.c
> > @@ -926,6 +926,21 @@ void __init setup_arch(char **cmdline_p)
> > insert_resource(_resource, _resource);
> > insert_resource(_resource, _resource);
> >
> > +   /*
> > +* Complain if .text .data and .bss are not marked as E820_RAM and
> > +* attempt to fix it by adding the range. We may have a confused 
> > BIOS,
> > +* or the user may have incorrectly supplied it via 
> > memmap=exactmap. If
> > +* we really are running on top non-RAM, we will crash later 
> > anyways.
> > +*/
> > +   if (!e820_all_mapped(code_resource.start, bss_resource.end, 
> > E820_RAM)) {
> > +   pr_warn(".text .data .bss are not marked as E820_RAM!\n");
> > +
> > +   e820_add_region(code_resource.start,
> > +   bss_resource.end - code_resource.start + 1,
> > +   E820_RAM);
> > +   sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), 
> > _map);
> 
>this sanitze_e820_map could be spared. trim_bios_range will
> that always.

Ah. okay

> 
> > +   }
> > +
> > trim_bios_range();
> >  #ifdef CONFIG_X86_32
> > if (ppro_with_ram_bug()) {
> 
> also should use brk_limit instead of bss_resource.end. aka need to
> keep the map for brk area.

Okay.. will fix on Monday

> 
> Thanks
> 
> Yinghai
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable

2012-08-24 Thread Mathieu Desnoyers

* Tejun Heo (t...@kernel.org) wrote:
> Hello,
> 
> On Sat, Aug 25, 2012 at 12:59:25AM +0200, Sasha Levin wrote:
> > Thats the thing, the amount of things of things you can do with a given 
> > bucket
> > is very limited. You can't add entries to any point besides the head 
> > (without
> > walking the entire list).
> 
> Kinda my point.  We already have all the hlist*() interface to deal
> with such cases.  Having something which is evidently the trivial
> hlist hashtable and advertises as such in the interface can be
> helpful.  I think we need that more than we need anything fancy.
> 
> Heh, this is a debate about which one is less insignificant.  I can
> see your point.  I'd really like to hear what others think on this.
> 
> Guys, do we want something which is evidently trivial hlist hashtable
> which can use hlist_*() API directly or do we want something better
> encapsulated?

My 2 cents, FWIW: I think this specific effort should target a trivially
understandable API and implementation, for use-cases where one would be
tempted to reimplement his own trivial hash table anyway. So here
exposing hlist internals, with which kernel developers are already
familiar, seems like a good approach in my opinion, because hiding stuff
behind new abstraction might make the target users go away.

Then, as we see the need, we can eventually merge a more elaborate hash
table with poneys and whatnot, but I would expect that the trivial hash
table implementation would still be useful. There are of course very
compelling reasons to use a more featureful hash table: automatic
resize, RT-aware updates, scalable updates, etc... but I see a purpose
for a trivial implementation. Its primary strong points being:

- it's trivially understandable, so anyone how want to be really sure
  they won't end up debugging the hash table instead of their
  work-in-progress code can have a full understanding of it,
- it has few dependencies, which makes it easier to understand and
  easier to use in some contexts (e.g. early boot).

So I'm in favor of not overdoing the abstraction for this trivial hash
table, and honestly I would rather prefer that this trivial hash table
stays trivial. A more elaborate hash table should probably come as a
separate API.

Thanks,

Mathieu

-- 
Mathieu Desnoyers
Operating System Efficiency R Consultant
EfficiOS Inc.
http://www.efficios.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 06:07:01PM -0700, Yinghai Lu wrote:
> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> > Currently direct mappings are created for [ 0 to max_low_pfn< > and [ 4GB to max_pfn< > backed by actual DRAM. This is fine for holes under 4GB which are covered
> > by fixed and variable range MTRRs to be UC. However, we run into trouble
> > on higher memory addresses which cannot be covered by MTRRs.
> >
> > Our system with 1TB of RAM has an e820 that looks like this:
> >
> >  BIOS-e820: [mem 0x-0x000983ff] usable
> >  BIOS-e820: [mem 0x00098400-0x0009] reserved
> >  BIOS-e820: [mem 0x000d-0x000f] reserved
> >  BIOS-e820: [mem 0x0010-0xc7eb] usable
> >  BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data
> >  BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS
> >  BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved
> >  BIOS-e820: [mem 0xfec0-0xfec0] reserved
> >  BIOS-e820: [mem 0xfee0-0xfee00fff] reserved
> >  BIOS-e820: [mem 0xfff0-0x] reserved
> >  BIOS-e820: [mem 0x0001-0x00e037ff] usable
> >  BIOS-e820: [mem 0x00e03800-0x00fc] reserved
> >  BIOS-e820: [mem 0x0100-0x011ffeff] usable
> >
> > and so direct mappings are created for huge memory hole between
> > 0x00e03800 to 0x0100. Even though the kernel never
> > generates memory accesses in that region, since the page tables mark
> > them incorrectly as being WB, our (AMD) processor ends up causing a MCE
> > while doing some memory bookkeeping/optimizations around that area.
> >
> > This patch iterates through e820 and only direct maps ranges that are
> > marked as E820_RAM, and keeps track of those pfn ranges. Depending on
> > the alignment of E820 ranges, this may possibly result in using smaller
> > size (i.e. 4K instead of 2M or 1G) page tables.
> >
> > Signed-off-by: Jacob Shin 
> > ---
> >  arch/x86/include/asm/page_types.h |9 +++
> >  arch/x86/kernel/setup.c   |  125 
> > +
> >  arch/x86/mm/init.c|2 +
> >  arch/x86/mm/init_64.c |6 +-
> >  4 files changed, 112 insertions(+), 30 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/page_types.h 
> > b/arch/x86/include/asm/page_types.h
> > index e21fdd1..409047a 100644
> > --- a/arch/x86/include/asm/page_types.h
> > +++ b/arch/x86/include/asm/page_types.h
> > @@ -3,6 +3,7 @@
> >
> >  #include 
> >  #include 
> > +#include 
> >
> >  /* PAGE_SHIFT determines the page size */
> >  #define PAGE_SHIFT 12
> > @@ -40,12 +41,20 @@
> >  #endif /* CONFIG_X86_64 */
> >
> >  #ifndef __ASSEMBLY__
> > +#include 
> >
> >  extern int devmem_is_allowed(unsigned long pagenr);
> >
> >  extern unsigned long max_low_pfn_mapped;
> >  extern unsigned long max_pfn_mapped;
> >
> > +extern struct range pfn_mapped[E820_X_MAX];
> > +extern int nr_pfn_mapped;
> > +
> > +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long 
> > end_pfn);
> > +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long 
> > end_pfn);
> > +extern bool pfn_is_mapped(unsigned long pfn);
> > +
> >  static inline phys_addr_t get_max_mapped(void)
> >  {
> > return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > index 751e020..4217fb4 100644
> > --- a/arch/x86/kernel/setup.c
> > +++ b/arch/x86/kernel/setup.c
> > @@ -115,13 +115,46 @@
> >  #include 
> >
> >  /*
> > - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 
> > entries.
> > - * The direct mapping extends to max_pfn_mapped, so that we can directly 
> > access
> > - * apertures, ACPI and other tables without having to play with fixmaps.
> > + * max_low_pfn_mapped: highest direct mapped pfn under 4GB
> > + * max_pfn_mapped: highest direct mapped pfn over 4GB
> > + *
> > + * The direct mapping only covers E820_RAM regions, so the ranges and gaps 
> > are
> > + * represented by pfn_mapped
> >   */
> >  unsigned long max_low_pfn_mapped;
> >  unsigned long max_pfn_mapped;
> >
> > +struct range pfn_mapped[E820_X_MAX];
> > +int nr_pfn_mapped;
> > +
> > +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
> > +{
> > +   nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
> > +nr_pfn_mapped, start_pfn, 
> > end_pfn);
> > +
> > +   max_pfn_mapped = max(max_pfn_mapped, end_pfn);
> > +
> > +   if (end_pfn <= (1UL << (32 - PAGE_SHIFT)))
> > +   max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn);
> > +}
> > +
> > +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
> > +{
> > +   int i;
> > +
> > +   for (i = 0; i < nr_pfn_mapped; i++)
> > +   if ((start_pfn >=

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread H. Peter Anvin


On 08/24/2012 09:20 PM, Jacob Shin wrote:


What is the benefit?


So that in the case where we have E820_RAM right above 1MB, we don't
call init_memory_mapping twice, first on 0 ~ 1MB and then 1MB ~ something

we only call it once. 0 ~ something.



So what is the benefit?

-hpa


--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 06:13:02PM -0700, H. Peter Anvin wrote:
> On 08/24/2012 05:49 PM, Jacob Shin wrote:
> > 
> > Right, I think what I was attempting to do was to merge the 1MB
> > with E820_RAM right above 1MB:
> > 
> > So instead of:
> > 
> > init_memory_mapping(0, 1MB)
> > init_memory_mapping(1MB, 2GB)
> > 
> > It would be:
> > 
> > init_memory_mapping(0, 2GB)
> > 
> > While taking care of the odd case where there is a gap right after
> > 1MB.
> > 
> > But if its not worth it, I can move it out of the loop.
> > 
> 
> What is the benefit?

So that in the case where we have E820_RAM right above 1MB, we don't
call init_memory_mapping twice, first on 0 ~ 1MB and then 1MB ~ something

we only call it once. 0 ~ something.

I'll get it out of the loop if you don't think its a good idea.

> 
>   -hpa
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 07:06:42PM -0700, Yinghai Lu wrote:
> On Fri, Aug 24, 2012 at 6:49 PM, Yinghai Lu  wrote:
> > On Fri, Aug 24, 2012 at 6:25 PM, Yinghai Lu  wrote:
> >> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> >>> Depending on the platform, init_memory_mapping() may be called multiple
> >>> times. Move it out to setup_arch() to avoid writing to cr4 on every call.
> >>>
> >>> Signed-off-by: Jacob Shin 
> >>> ---
> >>>  arch/x86/kernel/setup.c |   10 ++
> >>>  arch/x86/mm/init.c  |   10 --
> >>>  2 files changed, 10 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> >>> index f4b9b80..751e020 100644
> >>> --- a/arch/x86/kernel/setup.c
> >>> +++ b/arch/x86/kernel/setup.c
> >>> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p)
> >>>
> >>> init_gbpages();
> >>>
> >>> +   /* Enable PSE if available */
> >>> +   if (cpu_has_pse)
> >>> +   set_in_cr4(X86_CR4_PSE);
> >>> +
> >>> +   /* Enable PGE if available */
> >>> +   if (cpu_has_pge) {
> >>> +   set_in_cr4(X86_CR4_PGE);
> >>> +   __supported_pte_mask |= _PAGE_GLOBAL;
> >>> +   }
> >>> +
> >>
> >> please don't put it directly in setup_arch().
> >>
> >> and another function.
> >>
> >
> > Jacob, hpa
> >
> > can you use attached one to replace the first patch?
> 
> Please use attached two instead.

Hmm .. okay I'll test with these two patches applied on Monday ..

> 
> Thanks
> 
> Yinghai




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 06:25:38PM -0700, Yinghai Lu wrote:
> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> > Depending on the platform, init_memory_mapping() may be called multiple
> > times. Move it out to setup_arch() to avoid writing to cr4 on every call.
> >
> > Signed-off-by: Jacob Shin 
> > ---
> >  arch/x86/kernel/setup.c |   10 ++
> >  arch/x86/mm/init.c  |   10 --
> >  2 files changed, 10 insertions(+), 10 deletions(-)
> >
> > diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> > index f4b9b80..751e020 100644
> > --- a/arch/x86/kernel/setup.c
> > +++ b/arch/x86/kernel/setup.c
> > @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p)
> >
> > init_gbpages();
> >
> > +   /* Enable PSE if available */
> > +   if (cpu_has_pse)
> > +   set_in_cr4(X86_CR4_PSE);
> > +
> > +   /* Enable PGE if available */
> > +   if (cpu_has_pge) {
> > +   set_in_cr4(X86_CR4_PGE);
> > +   __supported_pte_mask |= _PAGE_GLOBAL;
> > +   }
> > +
> 
> please don't put it directly in setup_arch().
> 
> and another function.

It actually gets moved out to another function in patch 3/5

> 
> Thanks
> 
> Yinghai
> 
> > /* max_pfn_mapped is updated here */
> > max_low_pfn_mapped = init_memory_mapping(0, 
> > max_low_pfn< > max_pfn_mapped = max_low_pfn_mapped;
> > diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> > index e0e6990..2f07e09 100644
> > --- a/arch/x86/mm/init.c
> > +++ b/arch/x86/mm/init.c
> > @@ -149,16 +149,6 @@ unsigned long __init_refok 
> > init_memory_mapping(unsigned long start,
> > use_gbpages = direct_gbpages;
> >  #endif
> >
> > -   /* Enable PSE if available */
> > -   if (cpu_has_pse)
> > -   set_in_cr4(X86_CR4_PSE);
> > -
> > -   /* Enable PGE if available */
> > -   if (cpu_has_pge) {
> > -   set_in_cr4(X86_CR4_PGE);
> > -   __supported_pte_mask |= _PAGE_GLOBAL;
> > -   }
> > -
> > if (use_gbpages)
> > page_size_mask |= 1 << PG_LEVEL_1G;
> > if (use_pse)
> > --
> > 1.7.9.5
> >
> >
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RT 2/2] Linux 3.0.41-rt62-rc1

2012-08-24 Thread Steven Rostedt

---
 localversion-rt |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/localversion-rt b/localversion-rt
index 9b7de93..fef6b3c 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt61
+-rt62-rc1
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2

2012-08-24 Thread Paul E. McKenney

On Sat, Aug 25, 2012 at 02:19:14AM +0100, Ben Hutchings wrote:
> On Fri, 2012-08-24 at 14:26 -0700, Paul E. McKenney wrote:
> > On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote:
> > > Hi,
> > > 
> > > Changes since v1:
> > > 
> > > - Fixed preempt handling in alpha idle loop
> > > - added ack from Geert
> > > - fixed stable email address, sorry :-/
> > > 
> > > This time I built tested everywhere but: h8300 (compiler internal error),
> > > and mn10300, parisc, score (cross compilers not available in
> > > ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/)
> > > 
> > > For testing, you can pull from:
> > > 
> > > git://github.com/fweisbec/linux-dynticks.git
> > >   rcu/idle-fix-v2 
> > > 
> > > Thanks.
> > 
> > I have queued these on -rcu branch rcu/idle:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
> > 
> > This problem has been in place since 3.3, so it is hard to argue that
> > it is a regression for this merge window.  I have therefore queued it
> > for 3.7.
> 
> I don't follow that; I would expect any serious bug fix (serious enough
> for a stable update) to be acceptable for 3.6 at this point.

OK, if any of the arch maintainers wishes to submit the patch to 3.6,
they are free to do so -- just let me know and I will drop the patch from
my tree.

That said, all this does is cause spurious warnings to be printed, so
not sure it really qualifies as serious.  But I am happy to leave that
decision with the individual arch maintainers -- it is their arch,
after all, so their decision.

Thanx, Paul

> If the regression occurred in 3.3, then the cc lines should be something
> like:
> 
> Cc:  # 3.3+
> 
> and not the current:
> 
> Cc: 3.2.x.. 
> 
> (Note, version annotations should be on the right of the address, not in
> the 'real name' position on the left.)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RT 1/2] fix printk flush of messages

2012-08-24 Thread Steven Rostedt

Updates console-make-rt-friendly.patch

#ifdef CONFIG_PREEMPT_RT_FULL, printk() output is never flushed by
printk() because:

   # some liberties taken in this pseudo-code to make it easier to follow
   printk()
  vprintk()
 raw_spin_lock(_lock)
# increment preempt_count():
preempt_disable()
  result = console_trylock_for_printk()
 retval = 0
 # lock will always be false, because preempt_count() will be >= 1
 lock = ... && !preempt_count()
 if (lock)
retval = 1
 return retval
  # result will always be false since lock will always be false
  if (result)
 console_unlock()
# this is where the printk() output would be flushed

On system boot some printk() output is flushed because register_console()
and tty_open() call console_unlock().

This change also fixes the problem that was previously fixed by
preempt-rt-allow-immediate-magic-sysrq-output-for-preempt_rt_full.patch

Signed-off-by: Frank Rowand 
Cc: Frank 
Link: http://lkml.kernel.org/r/4fb44fd0.4090...@am.sony.com
Signed-off-by: Thomas Gleixner 
---
 kernel/printk.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index 60f4290..f2c459f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -835,7 +835,7 @@ static int console_trylock_for_printk(unsigned int cpu, 
unsigned long flags)
 {
 #ifdef CONFIG_PREEMPT_RT_FULL
int lock = (!early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
-   !preempt_count()) || sysrq_in_progress;
+   (preempt_count() <= 1)) || sysrq_in_progress;
 #else
int lock = 1;
 #endif
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RT 0/2] [ANNOUNCE] 3.0.41-rt62-rc1 stable review

2012-08-24 Thread Steven Rostedt


Dear RT Folks,

This is the RT stable review cycle of patch 3.0.41-rt62-rc1.

Please scream at me if I messed something up. Please test the patches too.

The -rc release will be uploaded to kernel.org and will be deleted when
the final release is out. This is just a review release (or release candidate).

The pre-releases will not be pushed to the git repository, only the
final release is.

If all goes well, this patch will be converted to the next main release
on 8/29/2012 (again on the late Michael Jackson's B-day, and my
buddy Derek's)

Enjoy,

-- Steve


To build 3.0.41-rt62-rc1 directly, the following patches should be applied:

  http://www.kernel.org/pub/linux/kernel/v3.0/linux-3.0.tar.xz

  http://www.kernel.org/pub/linux/kernel/v3.0/patch-3.0.41.xz

  
http://www.kernel.org/pub/linux/kernel/projects/rt/3.0/patch-3.0.41-rt62-rc1.patch.xz

You can also build from 3.0.41-rt61 by applying the incremental patch:

http://www.kernel.org/pub/linux/kernel/projects/rt/3.0/incr/patch-3.0.41-rt61-rt62-rc1.patch.xz


Changes from 3.0.41-rt61:

---


Frank Rowand (1):
  fix printk flush of messages

Steven Rostedt (1):
  Linux 3.0.41-rt62-rc1


 kernel/printk.c |2 +-
 localversion-rt |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/1 v1] leds: Add new LED driver for lm355x chips

2012-08-24 Thread Bryan Wu

On Fri, Aug 24, 2012 at 12:06 PM, G.Shark Jeong  wrote:
> From: "G.Shark Jeong" 
>
> LM3554 and LM3556 have similar functions but very different register map.
> This driver is a general version for LM355x,lm3554 and lm3556,led chips of TI.
> lm3556 driver can be replaced by this driver.
>
> LM3554 :
> The LM3554 is a 2 MHz fixed-frequency synchronous boost
> converter with 1.2A dual high side led drivers.
> Datasheet: www.ti.com/lit/ds/symlink/lm3554.pdf
>
> LM3556 :
> The LM3556 is a 4 MHz fixed-frequency synchronous boost
> converter plus 1.5A constant current driver for a high-current white LED.
> Datasheet: www.national.com/ds/LM/LM3556.pdf
>
> Signed-off-by: G.Shark Jeong 
> ---
>  drivers/leds/Kconfig  |8 +-
>  drivers/leds/Makefile |2 +-
>  drivers/leds/leds-lm3556.c|  512 
>  drivers/leds/leds-lm355x.c|  529 
> +
>  include/linux/platform_data/leds-lm3556.h |   50 ---
>  include/linux/platform_data/leds-lm355x.h |   66 
>  6 files changed, 600 insertions(+), 567 deletions(-)
>  delete mode 100644 drivers/leds/leds-lm3556.c
>  create mode 100644 drivers/leds/leds-lm355x.c
>  delete mode 100644 include/linux/platform_data/leds-lm3556.h
>  create mode 100644 include/linux/platform_data/leds-lm355x.h
>
> diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
> index c96bbaa..4f6ced2 100644
> --- a/drivers/leds/Kconfig
> +++ b/drivers/leds/Kconfig
> @@ -422,13 +422,13 @@ config LEDS_MAX8997
>   This option enables support for on-chip LED drivers on
>   MAXIM MAX8997 PMIC.
>
> -config LEDS_LM3556
> -   tristate "LED support for LM3556 Chip"
> +config LEDS_LM355x
> +   tristate "LED support for LM355x Chips, LM3554 and LM3556"
> depends on LEDS_CLASS && I2C
> select REGMAP_I2C
> help
> - This option enables support for LEDs connected to LM3556.
> - LM3556 includes Torch, Flash and Indicator functions.
> + This option enables support for LEDs connected to LM355x.
> + LM355x includes Torch, Flash and Indicator functions.
>
>  config LEDS_OT200
> tristate "LED support for the Bachmann OT200"
> diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
> index a4429a9..b57a021 100644
> --- a/drivers/leds/Makefile
> +++ b/drivers/leds/Makefile
> @@ -48,7 +48,7 @@ obj-$(CONFIG_LEDS_NETXBIG)+= leds-netxbig.o
>  obj-$(CONFIG_LEDS_ASIC3)   += leds-asic3.o
>  obj-$(CONFIG_LEDS_RENESAS_TPU) += leds-renesas-tpu.o
>  obj-$(CONFIG_LEDS_MAX8997) += leds-max8997.o
> -obj-$(CONFIG_LEDS_LM3556)  += leds-lm3556.o
> +obj-$(CONFIG_LEDS_LM355x)  += leds-lm355x.o
>  obj-$(CONFIG_LEDS_BLINKM)  += leds-blinkm.o
>
>  # LED SPI Drivers
> diff --git a/drivers/leds/leds-lm3556.c b/drivers/leds/leds-lm3556.c
> deleted file mode 100644
> index 3062abd..000
> --- a/drivers/leds/leds-lm3556.c
> +++ /dev/null
> @@ -1,512 +0,0 @@
> -/*
> - * Simple driver for Texas Instruments LM3556 LED Flash driver chip (Rev0x03)
> - * Copyright (C) 2012 Texas Instruments
> - *
> - * This program is free software; you can redistribute it and/or modify
> - * it under the terms of the GNU General Public License version 2 as
> - * published by the Free Software Foundation.
> - *
> - * Please refer Documentation/leds/leds-lm3556.txt file.
> - */
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -#include 
> -
> -#define REG_FILT_TIME  (0x0)
> -#define REG_IVFM_MODE  (0x1)
> -#define REG_NTC(0x2)
> -#define REG_INDIC_TIME (0x3)
> -#define REG_INDIC_BLINK(0x4)
> -#define REG_INDIC_PERIOD   (0x5)
> -#define REG_TORCH_TIME (0x6)
> -#define REG_CONF   (0x7)
> -#define REG_FLASH  (0x8)
> -#define REG_I_CTRL (0x9)
> -#define REG_ENABLE (0xA)
> -#define REG_FLAG   (0xB)
> -#define REG_MAX(0xB)
> -
> -#define IVFM_FILTER_TIME_SHIFT (3)
> -#define UVLO_EN_SHIFT  (7)
> -#define HYSTERSIS_SHIFT(5)
> -#define IVM_D_TH_SHIFT (2)
> -#define IVFM_ADJ_MODE_SHIFT(0)
> -#define NTC_EVENT_LVL_SHIFT(5)
> -#define NTC_TRIP_TH_SHIFT  (2)
> -#define NTC_BIAS_I_LVL_SHIFT   (0)
> -#define INDIC_RAMP_UP_TIME_SHIFT   (3)
> -#define INDIC_RAMP_DN_TIME_SHIFT   (0)
> -#define INDIC_N_BLANK_SHIFT(4)
> -#define INDIC_PULSE_TIME_SHIFT (0)
> -#define INDIC_N_PERIOD_SHIFT   (0)
> -#define TORCH_RAMP_UP_TIME_SHIFT   (3)
> -#define TORCH_RAMP_DN_TIME_SHIFT   (0)
> -#define STROBE_USUAGE_SHIFT(7)
> -#define

Re: [PATCH v2 1/2] mfd: dt: tps6586x: Add power off control

2012-08-24 Thread Stephen Warren

On 08/24/2012 06:36 PM, Bill Huang wrote:
>>> On Sun, Aug 19, 2012 at 06:07:55PM -0700, Bill Huang wrote:
 Add DT property "ti,system-power-controller" telling whether or not
 this pmic is in charge of controlling the system power, so the power
 off routine can be hooked up to system call "pm_power_off".
...
>>> I've seen the following while trying this patch applied on top of 
>>> next-20120817:
>>>
>>> [   40.581151] Power down.
>>> [   41.583160] [ cut here ]
>>> [   41.587784] WARNING: at 
>>> /home/thierry.reding/src/kernel/linux-ipmp.git/drivers/i2c/busses/i2c-
>>> tegra.c:525 tegra_i2c_xfer+0x21c/0x29c()
...
>> Thanks Thierry, I can repro this on Tegra20 inconsistently and found, if 
>> current cpu is not cpu0 when
>> doing "machine_shutdown" (it will call "smp_send_stop"), i2c controller will 
>> failed to do any
>> transaction (looks like gic interrupt will be disabled), I'll debug further 
>> to find out the root cause.
>>
>> By the way, Tegra30 is good since it will always be cpu0 when doing 
>> "machine_shutdown", I still don't
>> know why it makes the difference against Tegra20 since I'm not familiar with 
>> those cpu stuffs and what
>> make it behave differently, I'll study a bit, thanks.
>
> I've sent the shutdown issue for discussion in ARM list: Shutdown problem in 
> SMP system happened on Tegra20.
> The cause of the i2c timeout is pretty clear now and it is not directly 
> related to this patch, so is this
> patch series acceptable? Any thoughts or comment? Thanks.

I tend to agree; power off never worked without this patch, and
sometimes does with the patch, due to nothing wrong with this patch.

Bill, please do follow up on getting the underlying Tegra issue solved
somehow though. IIRC, Joseph Lo or Prashant has a patch which enabled
the config option that Russell mentioned, so the fix may just be to wait
for that patch to get finalized, but please double-check that solves it.
Thanks!
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCHv4 0/9] * ARM: Update arch-vt8500 to Devicetree *

2012-08-24 Thread Stephen Warren

On 08/23/2012 01:35 AM, Tony Prisk wrote:
> This patchset updates arch-vt8500 to devicetree and removes all the old-style
> code. Support for WM8650 has also been added.
> 
> Example dts/dtsi files are given for the three currently supported models.
> 
> Major changes:
> 
> GPIO code has been converted to a platform_device and rewritten as WM8505
> support was broken. Add support for WM8650 gpio controller.
> 
> UHCI support was missing. Added this as a generic non-pci uhci controller as
> it doesn't require anything special. Should be usable by any system that 
> doesn't
> have special requirements to get the UHCI controller working.
> 
> Framebuffer code patched to support WM8650. The bindings for this are of 
> concern
> but there doesn't seem to be a formalized binding yet. This patch is based off
> Sascha Hauer's current patch on the dri-devel mailing list and should be 
> easily
> patched out when its finalized.
> 
> Patchset based on Arnd's arm-soc/for-next branch.

I believe all the issues I pointed out are fixed in this series. I'm not
sure I reviewed it in enough detail to ack it, but I'm fine with what I saw.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net-next v1 3/3] forcedeth: prevent TX timeouts after reboot

2012-08-24 Thread David Decotigny

This complements patch "net-forcedeth: fix TX timeout caused by TX
pause on down link" which ensures that a lock-up sequence is not sent
to the NIC. Present patch ensures that if a NIC is already locked-up,
the driver will recover from it when initializing the device.

It does the equivalent of the following recovery sequence:
 - write NVREG_TX_PAUSEFRAME_ENABLE_V1 to eth1's register
   NvRegTxPauseFrame
 - write NVREG_XMITCTL_START to eth1's register
   NvRegTransmitterControl
 - write 0 to eth1's register NvRegTransmitterControl
(this is at the heart of the "unbricking" sequence mentioned in patch
 "net-forcedeth: fix TX timeout caused by TX pause on down link")

Tested:
 - hardware is MCP55 device id 10de:0373 (rev a3), dual-port
 - reboot a kernel without any of patches mentioned
 - freeze the NIC (details on description for commit "net-forcedeth:
   fix TX timeout caused by TX pause on down link")
 - wait 5mn until ping hangs & TX timeout in dmesg
 - reboot on kernel with present patch
 - host is immediatly operational, no TX timeout



Signed-off-by: David Decotigny 
---
 drivers/net/ethernet/nvidia/forcedeth.c |   11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c 
b/drivers/net/ethernet/nvidia/forcedeth.c
index 8b82457..edd6221 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5905,11 +5905,18 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, 
const struct pci_device_i
goto out_error;
}
 
+   netif_carrier_off(dev);
+
+   /* Some NICs freeze when TX pause is enabled while NIC is
+* down, and this stays across warm reboots. The sequence
+* below should be enough to recover from that state. */
+   nv_update_pause(dev, 0);
+   nv_start_tx(dev);
+   nv_stop_tx(dev);
+
if (id->driver_data & DEV_HAS_VLAN)
nv_vlan_mode(dev, dev->features);
 
-   netif_carrier_off(dev);
-
dev_info(_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n",
 dev->name, np->phy_oui, np->phyaddr, dev->dev_addr);
 
-- 
1.7.10.2.5.g20d7bc9

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net-next v1 2/3] forcedeth: fix TX timeout caused by TX pause on down link

2012-08-24 Thread David Decotigny

On some dual-port forcedeth devices such as MCP55 10de:0373 (rev a3),
when autoneg & TX pause are enabled while port is connected but
interface is down, the NIC will eventually freeze (TX timeouts,
network unreachable).

This patch ensures that TX pause is not configured in hardware when
interface is down. The TX pause request will be honored when interface
is later configured.

Tested:
 - hardware is MCP55 device id 10de:0373 (rev a3), dual-port
 - eth0 connected and UP, eth1 connected but DOWN
 - without this patch, following sequence would brick NIC:
  ifconfig eth0 down
  ifconfig eth1 up
  ifconfig eth1 down
  ethtool -A eth1 autoneg off rx on tx off
  ifconfig eth1 up
  ifconfig eth1 down
  ethtool -A eth1 autoneg on rx on tx on
  ifconfig eth1 up
  ifconfig eth1 down
  ifup eth0
  sleep 120  # or longer
  ethtool eth1
   Just in case, sequence to un-brick:
  ifconfig eth0 down
  ethtool -A eth1 autoneg off rx on tx off
  ifconfig eth1 up
  ifconfig eth1 down
  ifup eth0
 - with this patch: no TX timeout after "bricking" sequence above

Details:
 - The following register accesses have been identified as the ones
   causing the NIC to freeze in "bricking" sequence above:
- write NVREG_TX_PAUSEFRAME_ENABLE_V1 to eth1's register NvRegTxPauseFrame
- write NVREG_MISC1_PAUSE_TX | NVREG_MISC1_FORCE to eth1's register 
NvRegMisc1
- write 0 to eth1's register NvRegTransmitterControl
   This is what this patch avoids.



Signed-off-by: David Decotigny 
---
 drivers/net/ethernet/nvidia/forcedeth.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c 
b/drivers/net/ethernet/nvidia/forcedeth.c
index 51d19d8..8b82457 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -3409,7 +3409,7 @@ set_speed:
 
pause_flags = 0;
/* setup pause frame */
-   if (np->duplex != 0) {
+   if (netif_running(dev) && (np->duplex != 0)) {
if (np->autoneg && np->pause_flags & NV_PAUSEFRAME_AUTONEG) {
adv_pause = adv & (ADVERTISE_PAUSE_CAP | 
ADVERTISE_PAUSE_ASYM);
lpa_pause = lpa & (LPA_PAUSE_CAP | LPA_PAUSE_ASYM);
@@ -5455,6 +5455,7 @@ static int nv_close(struct net_device *dev)
 
netif_stop_queue(dev);
spin_lock_irq(>lock);
+   nv_update_pause(dev, 0); /* otherwise stop_tx bricks NIC */
nv_stop_rxtx(dev);
nv_txrx_reset(dev);
 
-- 
1.7.10.2.5.g20d7bc9

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net-next v1 1/3] forcedeth: fix buffer overflow

2012-08-24 Thread David Decotigny

Found by manual code inspection.

Tested: compile, reboot, ethtool -d ethX


Signed-off-by: David Decotigny 
---
 drivers/net/ethernet/nvidia/forcedeth.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/nvidia/forcedeth.c 
b/drivers/net/ethernet/nvidia/forcedeth.c
index f45def0..51d19d8 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -4435,7 +4435,7 @@ static void nv_get_regs(struct net_device *dev, struct 
ethtool_regs *regs, void
 
regs->version = FORCEDETH_REGS_VER;
spin_lock_irq(>lock);
-   for (i = 0; i <= np->register_size/sizeof(u32); i++)
+   for (i = 0; i < np->register_size/sizeof(u32); i++)
rbuf[i] = readl(base + i*sizeof(u32));
spin_unlock_irq(>lock);
 }
-- 
1.7.10.2.5.g20d7bc9

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH net-next v1 0/3] forcedeth: fix device lock-up for dual-port NICs

2012-08-24 Thread David Decotigny

On a dual port MCP55 10de:0373 (rev a3) NIC with both ports connected,
we identified a configuration that does freeze the whole NIC: having
autoneg & TX pause turned on while one port is physically connected
but interface is down (eg. eth1) eventually causes the whole NIC to
freeze (eth1 and... eth0). This triggers TX timeouts on the UP
interface and, more generally, an unreachable network.

In order to avoid the bug, all we have to do is make sure not to
configure TX pause on the hardware while NIC is down. This is what the
2nd patch of the series does (details included).

And, in case the NIC is in a bad state at reboot (should not happen
anymore thanks to patch above), third patch basically always makes
sure to fix the NIC when module is loaded.

I could only test this with a MCP55 10de:0373 (rev a3) PCI device on a
x86_64 host.

Any feedback on these patches welcome! In particular, please let me
know if this should not apply to other hardware.



# Patch Set Summary:

David Decotigny (3):
  forcedeth: fix buffer overflow
  forcedeth: fix TX timeout caused by TX pause on down link
  forcedeth: prevent TX timeouts after reboot

 drivers/net/ethernet/nvidia/forcedeth.c |   16 
 1 file changed, 12 insertions(+), 4 deletions(-)

-- 
1.7.10.2.5.g20d7bc9

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RT 2/2] Linux 3.2.27-rt41-rc1

2012-08-24 Thread Steven Rostedt

---
 localversion-rt |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/localversion-rt b/localversion-rt
index 629e0b4..31c892a 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt41
+-rt42-rc1
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RT 1/2] fix printk flush of messages

2012-08-24 Thread Steven Rostedt

Updates console-make-rt-friendly.patch

#ifdef CONFIG_PREEMPT_RT_FULL, printk() output is never flushed by
printk() because:

   # some liberties taken in this pseudo-code to make it easier to follow
   printk()
  vprintk()
 raw_spin_lock(_lock)
# increment preempt_count():
preempt_disable()
  result = console_trylock_for_printk()
 retval = 0
 # lock will always be false, because preempt_count() will be >= 1
 lock = ... && !preempt_count()
 if (lock)
retval = 1
 return retval
  # result will always be false since lock will always be false
  if (result)
 console_unlock()
# this is where the printk() output would be flushed

On system boot some printk() output is flushed because register_console()
and tty_open() call console_unlock().

This change also fixes the problem that was previously fixed by
preempt-rt-allow-immediate-magic-sysrq-output-for-preempt_rt_full.patch

Signed-off-by: Frank Rowand 
Cc: Frank 
Link: http://lkml.kernel.org/r/4fb44fd0.4090...@am.sony.com
Signed-off-by: Thomas Gleixner 
---
 kernel/printk.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/printk.c b/kernel/printk.c
index 9ea..66e83e5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -836,7 +836,7 @@ static int console_trylock_for_printk(unsigned int cpu, 
unsigned long flags)
int retval = 0, wake = 0;
 #ifdef CONFIG_PREEMPT_RT_FULL
int lock = (!early_boot_irqs_disabled && !irqs_disabled_flags(flags) &&
-   !preempt_count()) || sysrq_in_progress;
+   (preempt_count() <= 1)) || sysrq_in_progress;
 #else
int lock = 1;
 #endif
-- 
1.7.10.4


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH RT 0/2] [ANNOUNCE] 3.2.28-rt42-rc1 stable review

2012-08-24 Thread Steven Rostedt


Dear RT Folks,

This is the RT stable review cycle of patch 3.2.28-rt42-rc1.

Please scream at me if I messed something up. Please test the patches too.

The -rc release will be uploaded to kernel.org and will be deleted when
the final release is out. This is just a review release (or release candidate).

The pre-releases will not be pushed to the git repository, only the
final release is.

If all goes well, this patch will be converted to the next main release
on 8/29/2012 (The late Michael Jackson's B-Day, and also one of
my friends from high school, who hated Michael Jackson, and also
hated to be reminded that he shared the same B-Day).

Enjoy,

-- Steve


To build 3.2.28-rt42-rc1 directly, the following patches should be applied:

  http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz

  http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.28.xz

  
http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.28-rt42-rc1.patch.xz

You can also build from 3.2.28-rt41 by applying the incremental patch:

http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/incr/patch-3.2.28-rt41-rt42-rc1.patch.xz


Changes from 3.2.28-rt41:

---


Frank Rowand (1):
  fix printk flush of messages

Steven Rostedt (1):
  Linux 3.2.27-rt41-rc1


 kernel/printk.c |2 +-
 localversion-rt |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[ANNOUNCE] 3.2.28-rt41 (this is for real)

2012-08-24 Thread Steven Rostedt


Dear RT Folks,

I'm pleased to announce the 3.2.28-rt41 stable release.


This release is just an update to the new stable 3.2.28 version
and no RT specific changes have been made.


You can get this release via the git tree at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git

  Head SHA1: 31f85ffc3a341b9377ac66702947bbc0e5ca008d


Or to build 3.2.28-rt41 directly, the following patches should be applied:

  http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz

  http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.28.xz

  
http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.28-rt41.patch.xz



Enjoy,

-- Steve



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ANNOUNCE] 3.2.27-rt40

2012-08-24 Thread Steven Rostedt

On Fri, 2012-08-24 at 22:37 -0400, Steven Rostedt wrote:
> Dear RT Folks,
> 
> I'm pleased to announce the 3.2.27-rt40 stable release.

Bah, Evolution is crashing on my /tmp directory (where my scripts place
the files). There's a bug in the gtk4 file manager (I'm using xfce),
where if the directory changes it crashes the entire app :-p. Thus I
moved the output file to another directory to post. But I copied an old
version (we're at 3.2.28-rt41 now).

Ignore this announcement, I'll send out another one :-(

-- Steve

> 
> 
> This release is just an update to the new stable 3.2.27 version
> and no RT specific changes have been made.
> 
> 
> You can get this release via the git tree at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git
> 
>   Head SHA1: 31f85ffc3a341b9377ac66702947bbc0e5ca008d
> 
> 
> Or to build 3.2.27-rt40 directly, the following patches should be applied:
> 
>   http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz
> 
>   http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.27.xz
> 
>   
> http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.27-rt40.patch.xz
> 
> 
> 
> Enjoy,
> 
> -- Steve
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[ANNOUNCE] 3.2.27-rt40

2012-08-24 Thread Steven Rostedt


Dear RT Folks,

I'm pleased to announce the 3.2.27-rt40 stable release.


This release is just an update to the new stable 3.2.27 version
and no RT specific changes have been made.


You can get this release via the git tree at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-stable-rt.git

  Head SHA1: 31f85ffc3a341b9377ac66702947bbc0e5ca008d


Or to build 3.2.27-rt40 directly, the following patches should be applied:

  http://www.kernel.org/pub/linux/kernel/v3.x/linux-3.2.tar.xz

  http://www.kernel.org/pub/linux/kernel/v3.x/patch-3.2.27.xz

  
http://www.kernel.org/pub/linux/kernel/projects/rt/3.2/patch-3.2.27-rt40.patch.xz



Enjoy,

-- Steve



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v2] mtd: cmdlinepart: fix the wrong partitions number when truncating occurs

2012-08-24 Thread Huang Shijie

Assume we have a 1GB(8Gb) nand chip, and we set the partitions
in the command line like this:
#gpmi-nand:100m(boot),100m(kernel),1g(rootfs)

In this case, the partition truncating occurs. The current code will
get the following result:

 --
root@freescale ~$ cat /proc/mtd
dev:size   erasesize  name
mtd0: 0640 0004 "boot"
mtd1: 0640 0004 "kernel"
 --

It is obvious that we lost the truncated partition `rootfs` which should
be 824M in this case.

Why? The old code sets the wrong partitions number when the truncating
occurs. This patch fixes it. Alao add a `break` to shortcut the code in this
case.

After apply this patch, the result becomes:
 --
root@freescale ~$ cat /proc/mtd
dev:size   erasesize  name
mtd0: 0640 0004 "boot"
mtd1: 0640 0004 "kernel"
mtd2: 3380 0004 "rootfs"
 --

We get the right result.

Signed-off-by: Huang Shijie 
---
v1 --> v2:
[1] add more commit info.
---
 drivers/mtd/cmdlinepart.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/drivers/mtd/cmdlinepart.c b/drivers/mtd/cmdlinepart.c
index 4558e0f..fc960a3 100644
--- a/drivers/mtd/cmdlinepart.c
+++ b/drivers/mtd/cmdlinepart.c
@@ -344,7 +344,8 @@ static int parse_cmdline_partitions(struct mtd_info *master,
   "%s: partitioning exceeds flash 
size, truncating\n",
   part->mtd_id);
part->parts[i].size = master->size - 
offset;
-   part->num_parts = i;
+   part->num_parts = i + 1;
+   break;
}
offset += part->parts[i].size;
}
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] kdump: remove unused including

2012-08-24 Thread Wei Yongjun

From: Wei Yongjun 


Remove including  that don't need it.

Signed-off-by: Wei Yongjun 
---
 kernel/kexec.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/kexec.c b/kernel/kexec.c
index 0668d58..5e4bd78 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -21,7 +21,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2

2012-08-24 Thread Michael Cree

On 25/08/12 13:19, Ben Hutchings wrote:
> On Fri, 2012-08-24 at 14:26 -0700, Paul E. McKenney wrote:
>> On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote:
>>> Hi,
>>>
>>> Changes since v1:
>>>
>>> - Fixed preempt handling in alpha idle loop
>>> - added ack from Geert
>>> - fixed stable email address, sorry :-/
>>>
>>> This time I built tested everywhere but: h8300 (compiler internal error),
>>> and mn10300, parisc, score (cross compilers not available in
>>> ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/)
>>>
>>> For testing, you can pull from:
>>>
>>> git://github.com/fweisbec/linux-dynticks.git
>>> rcu/idle-fix-v2 
>>>
>>> Thanks.
>>
>> I have queued these on -rcu branch rcu/idle:
>>
>>  git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
>>
>> This problem has been in place since 3.3, so it is hard to argue that
>> it is a regression for this merge window.  I have therefore queued it
>> for 3.7.
> 
> I don't follow that; I would expect any serious bug fix (serious enough
> for a stable update) to be acceptable for 3.6 at this point.
> 
> If the regression occurred in 3.3, then the cc lines should be something
> like:
> 
> Cc:  # 3.3+
> 
> and not the current:
> 
> Cc: 3.2.x.. 

The Alpha patches fix an even earlier regression resulting in RCU CPU
stalls on an SMP kernel built for generic Alpha (which includes the
current Debian 3.2-alpha-smp kernel) and renders the kernel pretty much
unuseable.  I've only tested the two alpha patches together but maybe
just the first patch (1/11 alpha: Fix preemption handling in idle loop)
might be needed to fix the problem in 3.2.   I'll test and let you know.

Cheers
Michael.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping

2012-08-24 Thread Yinghai Lu

On Fri, Aug 24, 2012 at 6:49 PM, Yinghai Lu  wrote:
> On Fri, Aug 24, 2012 at 6:25 PM, Yinghai Lu  wrote:
>> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
>>> Depending on the platform, init_memory_mapping() may be called multiple
>>> times. Move it out to setup_arch() to avoid writing to cr4 on every call.
>>>
>>> Signed-off-by: Jacob Shin 
>>> ---
>>>  arch/x86/kernel/setup.c |   10 ++
>>>  arch/x86/mm/init.c  |   10 --
>>>  2 files changed, 10 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>>> index f4b9b80..751e020 100644
>>> --- a/arch/x86/kernel/setup.c
>>> +++ b/arch/x86/kernel/setup.c
>>> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p)
>>>
>>> init_gbpages();
>>>
>>> +   /* Enable PSE if available */
>>> +   if (cpu_has_pse)
>>> +   set_in_cr4(X86_CR4_PSE);
>>> +
>>> +   /* Enable PGE if available */
>>> +   if (cpu_has_pge) {
>>> +   set_in_cr4(X86_CR4_PGE);
>>> +   __supported_pte_mask |= _PAGE_GLOBAL;
>>> +   }
>>> +
>>
>> please don't put it directly in setup_arch().
>>
>> and another function.
>>
>
> Jacob, hpa
>
> can you use attached one to replace the first patch?

Please use attached two instead.

Thanks

Yinghai


get_page_size_mask_v3.patch
Description: Binary data


mr_cal.patch
Description: Binary data

Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping

2012-08-24 Thread Yinghai Lu

On Fri, Aug 24, 2012 at 6:25 PM, Yinghai Lu  wrote:
> On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
>> Depending on the platform, init_memory_mapping() may be called multiple
>> times. Move it out to setup_arch() to avoid writing to cr4 on every call.
>>
>> Signed-off-by: Jacob Shin 
>> ---
>>  arch/x86/kernel/setup.c |   10 ++
>>  arch/x86/mm/init.c  |   10 --
>>  2 files changed, 10 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index f4b9b80..751e020 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p)
>>
>> init_gbpages();
>>
>> +   /* Enable PSE if available */
>> +   if (cpu_has_pse)
>> +   set_in_cr4(X86_CR4_PSE);
>> +
>> +   /* Enable PGE if available */
>> +   if (cpu_has_pge) {
>> +   set_in_cr4(X86_CR4_PGE);
>> +   __supported_pte_mask |= _PAGE_GLOBAL;
>> +   }
>> +
>
> please don't put it directly in setup_arch().
>
> and another function.
>

Jacob, hpa

can you use attached one to replace the first patch?

Thanks

Yinghai


get_page_size_mask.patch
Description: Binary data

[PATCH] staging: csr: use is_zero_ether_addr() instead of memcmp()

2012-08-24 Thread Wei Yongjun

From: Wei Yongjun 

Using is_zero_ether_addr() instead of directly use
memcmp() to determine if the ethernet address is all
zeros.

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun 
---
 drivers/staging/csr/sme_wext.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/csr/sme_wext.c b/drivers/staging/csr/sme_wext.c
index 7e85907..f09a738 100644
--- a/drivers/staging/csr/sme_wext.c
+++ b/drivers/staging/csr/sme_wext.c
@@ -1191,8 +1191,6 @@ unifi_siwap(struct net_device *dev, struct 
iw_request_info *info,
 netInterface_priv_t *interfacePriv = (netInterface_priv_t 
*)netdev_priv(dev);
 unifi_priv_t *priv = interfacePriv->privPtr;
 int err = 0;
-const unsigned char zero_bssid[ETH_ALEN] = {0x00, 0x00, 0x00,
-0x00, 0x00, 0x00};
 
 func_enter();
 
@@ -1213,7 +1211,7 @@ unifi_siwap(struct net_device *dev, struct 
iw_request_info *info,
unifi_trace(priv, UDBG1, "unifi_siwap: asked for %pM\n",
wrqu->ap_addr.sa_data);
 
-if (!memcmp(wrqu->ap_addr.sa_data, zero_bssid, ETH_ALEN)) {
+if (is_zero_ether_addr(wrqu->ap_addr.sa_data)) {
 priv->ignore_bssid_join = FALSE;
 err = sme_mgt_disconnect(priv);
 if (err) {


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping

2012-08-24 Thread Yinghai Lu

On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> Depending on the platform, init_memory_mapping() may be called multiple
> times. Move it out to setup_arch() to avoid writing to cr4 on every call.
>
> Signed-off-by: Jacob Shin 
> ---
>  arch/x86/kernel/setup.c |   10 ++
>  arch/x86/mm/init.c  |   10 --
>  2 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index f4b9b80..751e020 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p)
>
> init_gbpages();
>
> +   /* Enable PSE if available */
> +   if (cpu_has_pse)
> +   set_in_cr4(X86_CR4_PSE);
> +
> +   /* Enable PGE if available */
> +   if (cpu_has_pge) {
> +   set_in_cr4(X86_CR4_PGE);
> +   __supported_pte_mask |= _PAGE_GLOBAL;
> +   }
> +

please don't put it directly in setup_arch().

and another function.

Thanks

Yinghai

> /* max_pfn_mapped is updated here */
> max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< max_pfn_mapped = max_low_pfn_mapped;
> diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
> index e0e6990..2f07e09 100644
> --- a/arch/x86/mm/init.c
> +++ b/arch/x86/mm/init.c
> @@ -149,16 +149,6 @@ unsigned long __init_refok init_memory_mapping(unsigned 
> long start,
> use_gbpages = direct_gbpages;
>  #endif
>
> -   /* Enable PSE if available */
> -   if (cpu_has_pse)
> -   set_in_cr4(X86_CR4_PSE);
> -
> -   /* Enable PGE if available */
> -   if (cpu_has_pge) {
> -   set_in_cr4(X86_CR4_PGE);
> -   __supported_pte_mask |= _PAGE_GLOBAL;
> -   }
> -
> if (use_gbpages)
> page_size_mask |= 1 << PG_LEVEL_1G;
> if (use_pse)
> --
> 1.7.9.5
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 5/5] x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix

2012-08-24 Thread Yinghai Lu

On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> There could be cases where user supplied memmap=exactmap memory
> mappings do not mark the region where the kernel .text .data and
> .bss reside as E820_RAM as reported here:
>
> https://lkml.org/lkml/2012/8/14/86
>
> Handle it by complaining, and adding the range back into the e820.
>
> Signed-off-by: Jacob Shin 
> ---
>  arch/x86/kernel/setup.c |   15 +++
>  1 file changed, 15 insertions(+)
>
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 4217fb4..b84aceb5 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -926,6 +926,21 @@ void __init setup_arch(char **cmdline_p)
> insert_resource(_resource, _resource);
> insert_resource(_resource, _resource);
>
> +   /*
> +* Complain if .text .data and .bss are not marked as E820_RAM and
> +* attempt to fix it by adding the range. We may have a confused BIOS,
> +* or the user may have incorrectly supplied it via memmap=exactmap. 
> If
> +* we really are running on top non-RAM, we will crash later anyways.
> +*/
> +   if (!e820_all_mapped(code_resource.start, bss_resource.end, 
> E820_RAM)) {
> +   pr_warn(".text .data .bss are not marked as E820_RAM!\n");
> +
> +   e820_add_region(code_resource.start,
> +   bss_resource.end - code_resource.start + 1,
> +   E820_RAM);
> +   sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), 
> _map);

   this sanitze_e820_map could be spared. trim_bios_range will
that always.

> +   }
> +
> trim_bios_range();
>  #ifdef CONFIG_X86_32
> if (ppro_with_ram_bug()) {

also should use brk_limit instead of bss_resource.end. aka need to
keep the map for brk area.

Thanks

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2

2012-08-24 Thread Ben Hutchings

On Fri, 2012-08-24 at 14:26 -0700, Paul E. McKenney wrote:
> On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote:
> > Hi,
> > 
> > Changes since v1:
> > 
> > - Fixed preempt handling in alpha idle loop
> > - added ack from Geert
> > - fixed stable email address, sorry :-/
> > 
> > This time I built tested everywhere but: h8300 (compiler internal error),
> > and mn10300, parisc, score (cross compilers not available in
> > ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/)
> > 
> > For testing, you can pull from:
> > 
> > git://github.com/fweisbec/linux-dynticks.git
> > rcu/idle-fix-v2 
> > 
> > Thanks.
> 
> I have queued these on -rcu branch rcu/idle:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git
> 
> This problem has been in place since 3.3, so it is hard to argue that
> it is a regression for this merge window.  I have therefore queued it
> for 3.7.

I don't follow that; I would expect any serious bug fix (serious enough
for a stable update) to be acceptable for 3.6 at this point.

If the regression occurred in 3.3, then the cc lines should be something
like:

Cc:  # 3.3+

and not the current:

Cc: 3.2.x.. 

(Note, version annotations should be on the right of the address, not in
the 'real name' position on the left.)

Ben.

-- 
Ben Hutchings
Experience is what causes a person to make new mistakes instead of old ones.


signature.asc
Description: This is a digitally signed message part

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread H. Peter Anvin

On 08/24/2012 05:49 PM, Jacob Shin wrote:
> 
> Right, I think what I was attempting to do was to merge the 1MB
> with E820_RAM right above 1MB:
> 
> So instead of:
> 
> init_memory_mapping(0, 1MB)
> init_memory_mapping(1MB, 2GB)
> 
> It would be:
> 
> init_memory_mapping(0, 2GB)
> 
> While taking care of the odd case where there is a gap right after
> 1MB.
> 
> But if its not worth it, I can move it out of the loop.
> 

What is the benefit?

-hpa

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Yinghai Lu

On Fri, Aug 24, 2012 at 4:55 PM, Jacob Shin  wrote:
> Currently direct mappings are created for [ 0 to max_low_pfn< and [ 4GB to max_pfn< backed by actual DRAM. This is fine for holes under 4GB which are covered
> by fixed and variable range MTRRs to be UC. However, we run into trouble
> on higher memory addresses which cannot be covered by MTRRs.
>
> Our system with 1TB of RAM has an e820 that looks like this:
>
>  BIOS-e820: [mem 0x-0x000983ff] usable
>  BIOS-e820: [mem 0x00098400-0x0009] reserved
>  BIOS-e820: [mem 0x000d-0x000f] reserved
>  BIOS-e820: [mem 0x0010-0xc7eb] usable
>  BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data
>  BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS
>  BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved
>  BIOS-e820: [mem 0xfec0-0xfec0] reserved
>  BIOS-e820: [mem 0xfee0-0xfee00fff] reserved
>  BIOS-e820: [mem 0xfff0-0x] reserved
>  BIOS-e820: [mem 0x0001-0x00e037ff] usable
>  BIOS-e820: [mem 0x00e03800-0x00fc] reserved
>  BIOS-e820: [mem 0x0100-0x011ffeff] usable
>
> and so direct mappings are created for huge memory hole between
> 0x00e03800 to 0x0100. Even though the kernel never
> generates memory accesses in that region, since the page tables mark
> them incorrectly as being WB, our (AMD) processor ends up causing a MCE
> while doing some memory bookkeeping/optimizations around that area.
>
> This patch iterates through e820 and only direct maps ranges that are
> marked as E820_RAM, and keeps track of those pfn ranges. Depending on
> the alignment of E820 ranges, this may possibly result in using smaller
> size (i.e. 4K instead of 2M or 1G) page tables.
>
> Signed-off-by: Jacob Shin 
> ---
>  arch/x86/include/asm/page_types.h |9 +++
>  arch/x86/kernel/setup.c   |  125 
> +
>  arch/x86/mm/init.c|2 +
>  arch/x86/mm/init_64.c |6 +-
>  4 files changed, 112 insertions(+), 30 deletions(-)
>
> diff --git a/arch/x86/include/asm/page_types.h 
> b/arch/x86/include/asm/page_types.h
> index e21fdd1..409047a 100644
> --- a/arch/x86/include/asm/page_types.h
> +++ b/arch/x86/include/asm/page_types.h
> @@ -3,6 +3,7 @@
>
>  #include 
>  #include 
> +#include 
>
>  /* PAGE_SHIFT determines the page size */
>  #define PAGE_SHIFT 12
> @@ -40,12 +41,20 @@
>  #endif /* CONFIG_X86_64 */
>
>  #ifndef __ASSEMBLY__
> +#include 
>
>  extern int devmem_is_allowed(unsigned long pagenr);
>
>  extern unsigned long max_low_pfn_mapped;
>  extern unsigned long max_pfn_mapped;
>
> +extern struct range pfn_mapped[E820_X_MAX];
> +extern int nr_pfn_mapped;
> +
> +extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long 
> end_pfn);
> +extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long 
> end_pfn);
> +extern bool pfn_is_mapped(unsigned long pfn);
> +
>  static inline phys_addr_t get_max_mapped(void)
>  {
> return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 751e020..4217fb4 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -115,13 +115,46 @@
>  #include 
>
>  /*
> - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
> - * The direct mapping extends to max_pfn_mapped, so that we can directly 
> access
> - * apertures, ACPI and other tables without having to play with fixmaps.
> + * max_low_pfn_mapped: highest direct mapped pfn under 4GB
> + * max_pfn_mapped: highest direct mapped pfn over 4GB
> + *
> + * The direct mapping only covers E820_RAM regions, so the ranges and gaps 
> are
> + * represented by pfn_mapped
>   */
>  unsigned long max_low_pfn_mapped;
>  unsigned long max_pfn_mapped;
>
> +struct range pfn_mapped[E820_X_MAX];
> +int nr_pfn_mapped;
> +
> +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
> +{
> +   nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
> +nr_pfn_mapped, start_pfn, 
> end_pfn);
> +
> +   max_pfn_mapped = max(max_pfn_mapped, end_pfn);
> +
> +   if (end_pfn <= (1UL << (32 - PAGE_SHIFT)))
> +   max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn);
> +}
> +
> +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
> +{
> +   int i;
> +
> +   for (i = 0; i < nr_pfn_mapped; i++)
> +   if ((start_pfn >= pfn_mapped[i].start) &&
> +   (end_pfn <= pfn_mapped[i].end))
> +   return true;
> +
> +   return false;
> +}
> +
> +bool pfn_is_mapped(unsigned long pfn)
> +{
> +   return pfn_range_is_mapped(pfn, pfn + 1);
> +}
> +

looks like you could avoid add pfn_mapped[]

Re: BUG: Kprobe smoke test: 2 out of 6 tests failed

2012-08-24 Thread Steven Rostedt

On Fri, 2012-08-24 at 09:41 -0400, Steven Rostedt wrote:

> On Fri, 2012-08-24 at 15:15 +0800, Fengguang Wu wrote:
> > Hi Steven,
> > 
> > The following test fails are mostly due to this commit, or one of the
> > last 4 commits in
> > 
> > tree:   
> > git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git 
> > tip/perf/core
> > head:   d57c5d51a30152f3175d2344cb6395f08bf8ee0c
> > commit: d57c5d51a30152f3175d2344cb6395f08bf8ee0c [100/100] ftrace/x86: Add 
> > support for -mfentry to x86_64
> > 
> > [9.084881] Kprobe smoke test failed: register_jprobe returned -22
> > [9.086786] Kprobe smoke test failed: register_jprobes returned -22

Masami,

Seems that when we use fentry, we break jprobes. I thought the patches
that we added would just move the call to the next op, not fail totally?

Are jprobes deprecated?

-- Steve

> > [9.121281] BUG: Kprobe smoke test: 2 out of 6 tests failed
> > [9.171132] Testing tracer function: PASSED
> > [9.408938] Testing dynamic ftrace: PASSED
> > 
> > Thanks,
> > Fengguang
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Drop support for x86-32

2012-08-24 Thread Cruz Julian Bishop

On 25/08/12 02:36, Alan Cox wrote:
>> almost all x86-32 boxes will be trash in 2017, remaining boxes will
>> use long term tree
> People will still be manufacturing 32bit x86 processors in 2017 I'm quite
> sure. You appear entirely out of touch. There are already serious
> discussions going on about things like the kernel modifications needed to
> make 32bit systems run past 2038. 
>
> Besides which what Linux supports is defined by what peope chose to
> contribute code for. We support 32bit 680x0 machines that have been
> obsolete for nigh on 20 years because someone chooses to support them.
>
> For that matter if someone comes along with DEC-10 port and it works as
> was clean without messing up the core I'm sure we'd add that too!
Is that a hint? :P
> Alan
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] kernel.h: Introduce IDIV_ROUND_CLOSEST

2012-08-24 Thread Guenter Roeck

DIV_ROUND_CLOSEST returns a bad result for negative dividends:
DIV_ROUND_CLOSEST(-2, 2) = 0

Most of the time this does not matter. However, in the hardware monitoring
subsystem, it is often used on integers which can be negative (such as
temperatures). Introduce new macro IDIV_ROUND_CLOSEST which also supports
negative dividends.

Signed-off-by: Guenter Roeck 
---
I can take this patch through my hwmon tree, but would like to get an Ack first.
Alternative would be to put it into include/linux/hwmon.h, but I would prefer
to avoid that.

Also, if someone has an idea for a simpler implementation, I would really like
to know about it.

 include/linux/kernel.h |9 +
 1 file changed, 9 insertions(+)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 6043821..a89483c 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -89,6 +89,15 @@
 }  \
 )
 
+#define IDIV_ROUND_CLOSEST(x, divisor)(\
+{  \
+   typeof(x) __x = x;  \
+   typeof(divisor) __d1 = divisor; \
+   typeof(divisor) __d2 = (__x) < 0 ? -(__d1) : (__d1);\
+   (((__x) + ((__d2) / 2)) / (__d1));  \
+}  \
+)
+
 /*
  * Multiplies an integer by a fraction, while avoiding unnecessary
  * overflow or loss of precision.
-- 
1.7.9.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Drop support for x86-32

2012-08-24 Thread Cruz Julian Bishop

On 25/08/12 03:05, wbrana wrote:
> On 8/24/12, Martin Nybo Andersen  wrote:
>> What I'd hate even more is rendering my old working hardware useless by
>> removing x86-32 support from the kernel. To reason the removal by saying
>> "Microsoft plans to do it" just makes me go bonkers...
> Your old hardware will work fine with long term kernel.
People won't want to be forced to stick with an old version of the
kernel which,
as you said, will not have any backported features.

People deserve the choice to use whatever they have, however they want.
That's the way it works. The was it has been, currently is, and always
will be.

...Unless someone at Microsoft* holds Linus hostage** in order to take over
Linux kernel development. Not that it's likely to ever happen


*Not being a troll or hurling personal insults at Microsoft - It's just
that they
currently have the majority share on the desktop (and made the original
announcement for W9)

**If this ever happens, even if it's by a terrorist group and not a company,
please don't sue me for conspiracy to kidnapping. It was just an example :)

>
>> These legacy apps will most likely be compiled for x86-32 and not x32 (an
>> argument for not removing x86-32 support on a running x86-64 kernel).
> Which legacy apps do you mean?
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 05:30:21PM -0700, H. Peter Anvin wrote:
> On 08/24/2012 04:55 PM, Jacob Shin wrote:
> >+
> >+for (i = 0; i < e820.nr_map; i++) {
> >+struct e820entry *ei = [i];
> >+u64 start = ei->addr;
> >+u64 end = ei->addr + ei->size;
> >+
> >+/* we only map E820_RAM */
> >+if (ei->type != E820_RAM)
> >+continue;
> >+
> >+if (end <= ISA_END_ADDRESS)
> >+continue;
> >+
> >+if (start <= ISA_END_ADDRESS)
> >+start = 0;
> >+#ifdef CONFIG_X86_32
> >+/* on 32 bit, we only map up to max_low_pfn */
> >+if ((start >> PAGE_SHIFT) >= max_low_pfn)
> >+continue;
> >+
> >+if ((end >> PAGE_SHIFT) > max_low_pfn)
> >+end = max_low_pfn << PAGE_SHIFT;
> >+#endif
> >+/* the ISA range is always mapped regardless of holes */
> >+if (!pfn_range_is_mapped(0, ISA_END_ADDRESS << PAGE_SHIFT) &&
> >+start != 0)
> >+init_memory_mapping(0, ISA_END_ADDRESS);
> >+
> >+init_memory_mapping(start, end);
> >+}
> >+
> 
> The ISA range mapping doesn't really make sense *inside* the loop,
> no? It seems you could do that before you enter the loop and then
> simply have:
> 
> + if (end <= ISA_END_ADDRESS)
> + continue;
> +
> + if (start <= ISA_END_ADDRESS)
> + start = ISA_END_ADDRESS;
> 
> ... no?

Right, I think what I was attempting to do was to merge the 1MB
with E820_RAM right above 1MB:

So instead of:

init_memory_mapping(0, 1MB)
init_memory_mapping(1MB, 2GB)

It would be:

init_memory_mapping(0, 2GB)

While taking care of the odd case where there is a gap right after
1MB.

But if its not worth it, I can move it out of the loop.

> 
>   -hpa
> 
> -- 
> H. Peter Anvin, Intel Open Source Technology Center
> I work for Intel.  I don't speak on their behalf.
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: [PATCH v2 1/2] mfd: dt: tps6586x: Add power off control

2012-08-24 Thread Bill Huang

nvpublic
> > On Sun, Aug 19, 2012 at 06:07:55PM -0700, Bill Huang wrote:
> > > Add DT property "ti,system-power-controller" telling whether or not
> > > this pmic is in charge of controlling the system power, so the power
> > > off routine can be hooked up to system call "pm_power_off".
> > >
> > > Based on the work by:
> > > Dan Willemsen 
> > >
> > > Signed-off-by: Bill Huang 
> > > Tested-by: Stephen Warren 
> > > ---
> > >  .../devicetree/bindings/regulator/tps6586x.txt |6 ++
> > >  drivers/mfd/tps6586x.c |   19 
> > > +++
> > >  include/linux/mfd/tps6586x.h   |1 +
> > >  3 files changed, 26 insertions(+), 0 deletions(-)
> >
> > Hi,
> >
> > I've seen the following while trying this patch applied on top of 
> > next-20120817:
> >
> > [   40.581151] Power down.
> > [   41.583160] [ cut here ]
> > [   41.587784] WARNING: at 
> > /home/thierry.reding/src/kernel/linux-ipmp.git/drivers/i2c/busses/i2c-
> > tegra.c:525 tegra_i2c_xfer+0x21c/0x29c()
> > [   41.599850] Modules linked in:
> > [   41.602927] [] (unwind_backtrace+0x0/0xf8) from []
> > (warn_slowpath_common+0x4c/0x64)
> > [   41.612304] [] (warn_slowpath_common+0x4c/0x64) from 
> > []
> > (warn_slowpath_null+0x1c/0x24)
> > [   41.621947] [] (warn_slowpath_null+0x1c/0x24) from []
> > (tegra_i2c_xfer+0x21c/0x29c)
> > [   41.631244] [] (tegra_i2c_xfer+0x21c/0x29c) from [] 
> > (__i2c_transfer+0x44/0x80)
> > [   41.640192] [] (__i2c_transfer+0x44/0x80) from [] 
> > (i2c_transfer+0x7c/0xb8)
> > [   41.648796] [] (i2c_transfer+0x7c/0xb8) from [] 
> > (regmap_i2c_read+0x48/0x64)
> > [   41.657485] [] (regmap_i2c_read+0x48/0x64) from []
> (_regmap_raw_read+0x90/0x98)
> > [   41.666518] [] (_regmap_raw_read+0x90/0x98) from [] 
> > (_regmap_read+0x50/0xa8)
> > [   41.675290] [] (_regmap_read+0x50/0xa8) from []
> (_regmap_update_bits+0x24/0x64)
> > [   41.684322] [] (_regmap_update_bits+0x24/0x64) from 
> > []
> > (regmap_update_bits+0x3c/0x58)
> > [   41.693885] [] (regmap_update_bits+0x3c/0x58) from []
> > (tps6586x_power_off+0x18/0x38)
> > [   41.703362] [] (tps6586x_power_off+0x18/0x38) from []
> > (machine_power_off+0x1c/0x24)
> > [   41.712749] [] (machine_power_off+0x1c/0x24) from [] 
> > (sys_reboot+0x138/0x1b0)
> > [   41.721612] [] (sys_reboot+0x138/0x1b0) from [] 
> > (ret_fast_syscall+0x0/0x30)
> > [   41.730293] ---[ end trace 9af366974fefa459 ]---
> > [   41.734906] tegra-i2c tegra-i2c.3: i2c transfer timed out
> > [   41.740689] Kernel panic - not syncing: Attempted to kill init! 
> > exitcode=0x
> > [   41.740689]
> > [   41.749823] [] (unwind_backtrace+0x0/0xf8) from [] 
> > (panic+0x8c/0x1d8)
> > [   41.757993] [] (panic+0x8c/0x1d8) from [] 
> > (do_exit+0x694/0x750)
> > [   41.765636] [] (do_exit+0x694/0x750) from [] 
> > (do_group_exit+0x3c/0xb0)
> > [   41.773884] [] (do_group_exit+0x3c/0xb0) from [] 
> > (__wake_up_parent+0x0/0x18)
> 
> Thanks Thierry, I can repro this on Tegra20 inconsistently and found, if 
> current cpu is not cpu0 when
> doing "machine_shutdown" (it will call "smp_send_stop"), i2c controller will 
> failed to do any
> transaction (looks like gic interrupt will be disabled), I'll debug further 
> to find out the root cause.
> 
> By the way, Tegra30 is good since it will always be cpu0 when doing 
> "machine_shutdown", I still don't
> know why it makes the difference against Tegra20 since I'm not familiar with 
> those cpu stuffs and what
> make it behave differently, I'll study a bit, thanks.
> 
I've sent the shutdown issue for discussion in ARM list: Shutdown problem in 
SMP system happened on Tegra20.
The cause of the i2c timeout is pretty clear now and it is not directly related 
to this patch, so is this
patch series acceptable? Any thoughts or comment? Thanks.

> >
> > Thierry
> >
> > * Unknown Key
> > * 0x7F3EB3A1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 02/14] aoe: kernel thread handles I/O completions for simple locking

2012-08-24 Thread Ed Cashin

Andrew Morton  writes:

> On Fri, 17 Aug 2012 21:24:08 -0400
> Ed Cashin  wrote:
...
>> +sigfillset();
>> +sigprocmask(SIG_BLOCK, , NULL);
>> +flush_signals(current);
>
> This is a kernel thread - it shouldn't need to fiddle with signals.
...

Thanks for the feedback.  I'll try out your suggestions and return with
changes and explanations.

-- 
  Ed

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread H. Peter Anvin


On 08/24/2012 04:55 PM, Jacob Shin wrote:

+
+   for (i = 0; i < e820.nr_map; i++) {
+   struct e820entry *ei = [i];
+   u64 start = ei->addr;
+   u64 end = ei->addr + ei->size;
+
+   /* we only map E820_RAM */
+   if (ei->type != E820_RAM)
+   continue;
+
+   if (end <= ISA_END_ADDRESS)
+   continue;
+
+   if (start <= ISA_END_ADDRESS)
+   start = 0;
+#ifdef CONFIG_X86_32
+   /* on 32 bit, we only map up to max_low_pfn */
+   if ((start >> PAGE_SHIFT) >= max_low_pfn)
+   continue;
+
+   if ((end >> PAGE_SHIFT) > max_low_pfn)
+   end = max_low_pfn << PAGE_SHIFT;
+#endif
+   /* the ISA range is always mapped regardless of holes */
+   if (!pfn_range_is_mapped(0, ISA_END_ADDRESS << PAGE_SHIFT) &&
+   start != 0)
+   init_memory_mapping(0, ISA_END_ADDRESS);
+
+   init_memory_mapping(start, end);
+   }
+


The ISA range mapping doesn't really make sense *inside* the loop, no? 
It seems you could do that before you enter the loop and then simply have:


+   if (end <= ISA_END_ADDRESS)
+   continue;
+
+   if (start <= ISA_END_ADDRESS)
+   start = ISA_END_ADDRESS;

... no?

-hpa

--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V5 0/5] clk: mmp: add clock framework for mmp

2012-08-24 Thread Mike Turquette

Quoting Chao Xie (2012-08-19 19:55:10)
> From: Chao Xie 
>  arch/arm/mach-mmp/Kconfig|3 +
>  drivers/clk/Makefile |3 +
>  drivers/clk/mmp/Makefile |9 +
>  drivers/clk/mmp/clk-apbc.c   |  152 ++
>  drivers/clk/mmp/clk-apmu.c   |   97 +
>  drivers/clk/mmp/clk-frac.c   |  153 ++
>  drivers/clk/mmp/clk-mmp2.c   |  449 
> ++
>  drivers/clk/mmp/clk-pxa168.c |  346 
>  drivers/clk/mmp/clk-pxa910.c |  320 ++
>  drivers/clk/mmp/clk.h|   35 

Looks like you are not removing your arch/arm/mach-mmp/clock.c.  Is that
intentional?

When I apply your series against v3.6-rc3 I find that compilation breaks
with mmp2_defconfig due to conflicting definitions for the clk api
(clk_enable, clk_set_rate, etc).  This is not surprising since your
legacy clock code is neither deleted nor removed from compilation
conditionally by checking for CONFIG_COMMON_CLK.

Did I somehow manage to misapply your patches or should your patches
have removed the arch-specific clock framework as well?

Regards,
Mike

>  10 files changed, 1567 insertions(+), 0 deletions(-)
>  create mode 100644 drivers/clk/mmp/Makefile
>  create mode 100644 drivers/clk/mmp/clk-apbc.c
>  create mode 100644 drivers/clk/mmp/clk-apmu.c
>  create mode 100644 drivers/clk/mmp/clk-frac.c
>  create mode 100644 drivers/clk/mmp/clk-mmp2.c
>  create mode 100644 drivers/clk/mmp/clk-pxa168.c
>  create mode 100644 drivers/clk/mmp/clk-pxa910.c
>  create mode 100644 drivers/clk/mmp/clk.h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Jacob Shin

On Fri, Aug 24, 2012 at 06:55:14PM -0500, Jacob Shin wrote:
> Currently direct mappings are created for [ 0 to max_low_pfn< and [ 4GB to max_pfn< backed by actual DRAM. This is fine for holes under 4GB which are covered
> by fixed and variable range MTRRs to be UC. However, we run into trouble
> on higher memory addresses which cannot be covered by MTRRs.
> 
> Our system with 1TB of RAM has an e820 that looks like this:
> 
>  BIOS-e820: [mem 0x-0x000983ff] usable
>  BIOS-e820: [mem 0x00098400-0x0009] reserved
>  BIOS-e820: [mem 0x000d-0x000f] reserved
>  BIOS-e820: [mem 0x0010-0xc7eb] usable
>  BIOS-e820: [mem 0xc7ec-0xc7ed7fff] ACPI data
>  BIOS-e820: [mem 0xc7ed8000-0xc7ed9fff] ACPI NVS
>  BIOS-e820: [mem 0xc7eda000-0xc7ff] reserved
>  BIOS-e820: [mem 0xfec0-0xfec0] reserved
>  BIOS-e820: [mem 0xfee0-0xfee00fff] reserved
>  BIOS-e820: [mem 0xfff0-0x] reserved
>  BIOS-e820: [mem 0x0001-0x00e037ff] usable
>  BIOS-e820: [mem 0x00e03800-0x00fc] reserved
>  BIOS-e820: [mem 0x0100-0x011ffeff] usable
> 
> and so direct mappings are created for huge memory hole between
> 0x00e03800 to 0x0100. Even though the kernel never
> generates memory accesses in that region, since the page tables mark
> them incorrectly as being WB, our (AMD) processor ends up causing a MCE
> while doing some memory bookkeeping/optimizations around that area.
> 
> This patch iterates through e820 and only direct maps ranges that are
> marked as E820_RAM, and keeps track of those pfn ranges. Depending on
> the alignment of E820 ranges, this may possibly result in using smaller
> size (i.e. 4K instead of 2M or 1G) page tables.
> 
> Signed-off-by: Jacob Shin 
> ---
>  arch/x86/include/asm/page_types.h |9 +++
>  arch/x86/kernel/setup.c   |  125 
> +
>  arch/x86/mm/init.c|2 +
>  arch/x86/mm/init_64.c |6 +-
>  4 files changed, 112 insertions(+), 30 deletions(-)

> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 751e020..4217fb4 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -115,13 +115,46 @@
>  #include 
>  
>  /*
> - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
> - * The direct mapping extends to max_pfn_mapped, so that we can directly 
> access
> - * apertures, ACPI and other tables without having to play with fixmaps.
> + * max_low_pfn_mapped: highest direct mapped pfn under 4GB
> + * max_pfn_mapped: highest direct mapped pfn over 4GB
> + *
> + * The direct mapping only covers E820_RAM regions, so the ranges and gaps 
> are
> + * represented by pfn_mapped
>   */
>  unsigned long max_low_pfn_mapped;
>  unsigned long max_pfn_mapped;
>  
> +struct range pfn_mapped[E820_X_MAX];
> +int nr_pfn_mapped;
> +
> +void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
> +{
> + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
> +  nr_pfn_mapped, start_pfn, end_pfn);
> +
> + max_pfn_mapped = max(max_pfn_mapped, end_pfn);
> +
> + if (end_pfn <= (1UL << (32 - PAGE_SHIFT)))
> + max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn);
> +}
> +
> +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
> +{
> + int i;
> +
> + for (i = 0; i < nr_pfn_mapped; i++)
> + if ((start_pfn >= pfn_mapped[i].start) &&
> + (end_pfn <= pfn_mapped[i].end))
> + return true;
> +
> + return false;
> +}
> +
> +bool pfn_is_mapped(unsigned long pfn)
> +{
> + return pfn_range_is_mapped(pfn, pfn + 1);
> +}
> +
>  #ifdef CONFIG_DMI
>  RESERVE_BRK(dmi_alloc, 65536);
>  #endif
> @@ -296,6 +329,68 @@ static void __init cleanup_highmap(void)
>  }
>  #endif
>  
> +/*
> + * Iterate through E820 memory map and create direct mappings for only 
> E820_RAM
> + * regions. We cannot simply create direct mappings for all pfns from
> + * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory holes 
> in
> + * high addresses that cannot be marked as UC by fixed/variable range MTRRs.
> + * Depending on the alignment of E820 ranges, this may possibly result in 
> using
> + * smaller size (i.e. 4K instead of 2M or 1G) page tables.
> + */
> +static void __init init_memory(void)
> +{
> + int i;
> +
> + init_gbpages();
> +
> + /* Enable PSE if available */
> + if (cpu_has_pse)
> + set_in_cr4(X86_CR4_PSE);
> +
> + /* Enable PGE if available */
> + if (cpu_has_pge) {
> + set_in_cr4(X86_CR4_PGE);
> + __supported_pte_mask |= _PAGE_GLOBAL;
> + }
> +
> + for (i = 0; i < e820.nr_map; i++) {
> +

RE: Shutdown problem in SMP system happened on Tegra20

2012-08-24 Thread Bill Huang

nvpublic
> On Fri, Aug 24, 2012 at 04:23:39PM +0800, Bill Huang wrote:
> > When doing shutdown on Tegra20/Tegra30, we need to read/write PMIC
> > registers through I2C to perform the power off sequence.
> > Unfortunately, sometimes we'll fail to shutdown due to I2C timeout on
> > Tegra20. And the cause of the timeout is due to the CPU which I2C
> > controller IRQ affined to will have chance to be offlined without
> > migrating all irqs affined to it, so the following I2C transactions
> > will fail (no any CPU will handle that interrupt since then).
> 
> > Some snippet of the shutdown codes:
> >
> > void kernel_power_off(void)
> > {
> > kernel_shutdown_prepare(SYSTEM_POWER_OFF);
> > :
> > disable_nonboot_cpus();
> > :
> > machine_power_off();
> > }
> >
> > void machine_power_off(void)
> > {
> > machine_shutdown();
> > if (pm_power_off)
> > pm_power_off(); /* this is where we send I2C write to shutdown 
> > */ }
> >
> > void machine_shutdown(void)
> > {
> > #ifdef CONFIG_SMP
> > smp_send_stop();
> > #endif
> > }
> >
> > In "smp_send_stop()", it will send "IPI_CPU_STOPS" to offline other
> > cpus except current cpu (smp_processor_id()), however, current cpu
> > will not always be cpu0 at least at Tegra20, that said for example
> > cpu1 might be the current cpu and cpu0 will be offlined and this is the 
> > case why the I2C transaction
> will timeout.
> >
> > For normal case, "disable_nonboot_cpus()" call will disable all other
> > Cpus except cpu0, that means we won't hit the problem mentioned here
> > since cpu0 will always be the current cpu in the call "smp_send_stop", but 
> > the call to
> "disable_nonboot_cpus"
> > will happen only when "CONFIG_PM_SLEEP_SMP" is enabled which is not
> > the case for Tegra20/Tegra30, we don't support suspend yet so this can't be 
> > enabled.
> 
> So what you're asking for is a feature to do what CONFIG_PM_SLEEP_SMP does, 
> but without
> CONFIG_PM_SLEEP_SMP enabled?

Yeah pretty much, I'm actually asking should we take care of this since maybe 
not all platforms 
will have this config enabled?
> 
> Why not just ensure that CONFIG_PM_SLEEP_SMP is enabled if your platform 
> requires that the lowest CPU
> number be the CPU dealing with reboot?

Someday we will have it enabled, but before that we'll hit the issue, so you 
don't think
this should be taken care of? Thanks.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 00/14] aoe driver v49 performance and usability improvements

2012-08-24 Thread Ed Cashin

[second send after HTML part made vger reject my first email]

On 32 Aug 2012, Ed Cashin writes:

> These patches go a long way to updating the in-kernel aoe driver with
> the changes that have been in the coraid.com-distributed version,
> bringing it from (aoe internal) version 47 to version 49.  They apply
> to commit 23dcfa61bac244e1 of the mainline git tree.

Just a heads up: A colleague found a list_del corruption warning using
3.5.2 with these patches, but the warning also occurs without these
patches, so I plan to do a git bisect to find out when the regression
occurred.  The warning appears to be for the events of the bdi->completions
when blk_cleanup_queue is called during "rmmod aoe".

-- 
  Ed
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/5] x86: Only direct map addresses that are marked as E820_RAM

2012-08-24 Thread Jacob Shin

Currently direct mappings are created for [ 0 to max_low_pfn<
---
 arch/x86/include/asm/page_types.h |9 +++
 arch/x86/kernel/setup.c   |  125 +
 arch/x86/mm/init.c|2 +
 arch/x86/mm/init_64.c |6 +-
 4 files changed, 112 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/page_types.h 
b/arch/x86/include/asm/page_types.h
index e21fdd1..409047a 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -3,6 +3,7 @@
 
 #include 
 #include 
+#include 
 
 /* PAGE_SHIFT determines the page size */
 #define PAGE_SHIFT 12
@@ -40,12 +41,20 @@
 #endif /* CONFIG_X86_64 */
 
 #ifndef __ASSEMBLY__
+#include 
 
 extern int devmem_is_allowed(unsigned long pagenr);
 
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
+extern struct range pfn_mapped[E820_X_MAX];
+extern int nr_pfn_mapped;
+
+extern void add_pfn_range_mapped(unsigned long start_pfn, unsigned long 
end_pfn);
+extern bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long 
end_pfn);
+extern bool pfn_is_mapped(unsigned long pfn);
+
 static inline phys_addr_t get_max_mapped(void)
 {
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 751e020..4217fb4 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -115,13 +115,46 @@
 #include 
 
 /*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
+ * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+ * max_pfn_mapped: highest direct mapped pfn over 4GB
+ *
+ * The direct mapping only covers E820_RAM regions, so the ranges and gaps are
+ * represented by pfn_mapped
  */
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
+struct range pfn_mapped[E820_X_MAX];
+int nr_pfn_mapped;
+
+void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn)
+{
+   nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX,
+nr_pfn_mapped, start_pfn, end_pfn);
+
+   max_pfn_mapped = max(max_pfn_mapped, end_pfn);
+
+   if (end_pfn <= (1UL << (32 - PAGE_SHIFT)))
+   max_low_pfn_mapped = max(max_low_pfn_mapped, end_pfn);
+}
+
+bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn)
+{
+   int i;
+
+   for (i = 0; i < nr_pfn_mapped; i++)
+   if ((start_pfn >= pfn_mapped[i].start) &&
+   (end_pfn <= pfn_mapped[i].end))
+   return true;
+
+   return false;
+}
+
+bool pfn_is_mapped(unsigned long pfn)
+{
+   return pfn_range_is_mapped(pfn, pfn + 1);
+}
+
 #ifdef CONFIG_DMI
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
@@ -296,6 +329,68 @@ static void __init cleanup_highmap(void)
 }
 #endif
 
+/*
+ * Iterate through E820 memory map and create direct mappings for only E820_RAM
+ * regions. We cannot simply create direct mappings for all pfns from
+ * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory holes in
+ * high addresses that cannot be marked as UC by fixed/variable range MTRRs.
+ * Depending on the alignment of E820 ranges, this may possibly result in using
+ * smaller size (i.e. 4K instead of 2M or 1G) page tables.
+ */
+static void __init init_memory(void)
+{
+   int i;
+
+   init_gbpages();
+
+   /* Enable PSE if available */
+   if (cpu_has_pse)
+   set_in_cr4(X86_CR4_PSE);
+
+   /* Enable PGE if available */
+   if (cpu_has_pge) {
+   set_in_cr4(X86_CR4_PGE);
+   __supported_pte_mask |= _PAGE_GLOBAL;
+   }
+
+   for (i = 0; i < e820.nr_map; i++) {
+   struct e820entry *ei = [i];
+   u64 start = ei->addr;
+   u64 end = ei->addr + ei->size;
+
+   /* we only map E820_RAM */
+   if (ei->type != E820_RAM)
+   continue;
+
+   if (end <= ISA_END_ADDRESS)
+   continue;
+
+   if (start <= ISA_END_ADDRESS)
+   start = 0;
+#ifdef CONFIG_X86_32
+   /* on 32 bit, we only map up to max_low_pfn */
+   if ((start >> PAGE_SHIFT) >= max_low_pfn)
+   continue;
+
+   if ((end >> PAGE_SHIFT) > max_low_pfn)
+   end = max_low_pfn << PAGE_SHIFT;
+#endif
+   /* the ISA range is always mapped regardless of holes */
+   if (!pfn_range_is_mapped(0, ISA_END_ADDRESS << PAGE_SHIFT) &&
+   start != 0)
+   init_memory_mapping(0, ISA_END_ADDRESS);
+
+   init_memory_mapping(start, end);
+   }
+
+#ifdef CONFIG_X86_64
+   if (max_pfn > max_low_pfn) {
+   /* can we

[PATCH 2/5] x86: find_early_table_space based on memory ranges that are being mapped

2012-08-24 Thread Jacob Shin

Current logic finds enough space for direct mapping page tables from 0
to end. Instead, we only need to find enough space to cover mr[0].start
to mr[nr_range].end -- the range that is actually being mapped by
init_memory_mapping()

This patch also reportedly fixes suspend/resume issue reported in:

https://lkml.org/lkml/2012/8/11/83

Signed-off-by: Jacob Shin 
---
 arch/x86/mm/init.c |   62 +---
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 2f07e09..e2b21e0 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -35,40 +35,48 @@ struct map_range {
unsigned page_size_mask;
 };
 
-static void __init find_early_table_space(struct map_range *mr, unsigned long 
end,
- int use_pse, int use_gbpages)
+/*
+ * First calculate space needed for kernel direct mapping page tables to cover
+ * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 
1GB
+ * pages. Then find enough contiguous space for those page tables.
+ */
+static void __init find_early_table_space(struct map_range *mr, int nr_range)
 {
-   unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+   int i;
+   unsigned long puds = 0, pmds = 0, ptes = 0, tables;
+   unsigned long start = 0, good_end;
phys_addr_t base;
 
-   puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-   tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
-
-   if (use_gbpages) {
-   unsigned long extra;
-
-   extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
-   pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
-   } else
-   pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+   for (i = 0; i < nr_range; i++) {
+   unsigned long range, extra;
 
-   tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+   range = mr[i].end - mr[i].start;
+   puds += (range + PUD_SIZE - 1) >> PUD_SHIFT;
 
-   if (use_pse) {
-   unsigned long extra;
+   if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) {
+   extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT);
+   pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+   } else {
+   pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT;
+   }
 
-   extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+   if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) {
+   extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT);
 #ifdef CONFIG_X86_32
-   extra += PMD_SIZE;
+   extra += PMD_SIZE;
 #endif
-   /* The first 2/4M doesn't use large pages. */
-   if (mr->start < PMD_SIZE)
-   extra += mr->end - mr->start;
-
-   ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
-   } else
-   ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+   /* The first 2/4M doesn't use large pages. */
+   if (mr[i].start < PMD_SIZE)
+   extra += range;
+
+   ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+   } else {
+   ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT;
+   }
+   }
 
+   tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+   tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
 
 #ifdef CONFIG_X86_32
@@ -86,7 +94,7 @@ static void __init find_early_table_space(struct map_range 
*mr, unsigned long en
pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT);
 
printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem 
%#010lx-%#010lx]\n",
-   end - 1, pgt_buf_start << PAGE_SHIFT,
+   mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT,
(pgt_buf_top << PAGE_SHIFT) - 1);
 }
 
@@ -257,7 +265,7 @@ unsigned long __init_refok init_memory_mapping(unsigned 
long start,
 * nodes are discovered.
 */
if (!after_bootmem)
-   find_early_table_space([0], end, use_pse, use_gbpages);
+   find_early_table_space(mr, nr_range);
 
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH V4 0/5] x86: Create direct mappings for E820_RAM only

2012-08-24 Thread Jacob Shin

Currently kernel direct mappings are created for all pfns between
[ 0 to max_low_pfn ) and [ 4GB to max_pfn ). When we introduce memory
holes, we end up mapping memory ranges that are not backed by physical
DRAM. This is fine for lower memory addresses which can be marked as UC
by fixed/variable range MTRRs, however we run in to trouble with high
addresses.

The following patchset creates direct mappings only for E820_RAM regions
between 0 ~ max_low_pfn and 4GB ~ max_pfn. And leaves non-E820_RAM and
memory holes unmapped.

This fourth revision of the patchset attempts to resolve comments and
concerns from the following threads:

* https://lkml.org/lkml/2012/8/22/680
* https://lkml.org/lkml/2012/8/13/512
* https://lkml.org/lkml/2012/8/9/536
* https://lkml.org/lkml/2011/10/20/323

Jacob Shin (5):
  x86: Move enabling of PSE and PGE out of init_memory_mapping
  x86: find_early_table_space based on memory ranges that are being
mapped
  x86: Only direct map addresses that are marked as E820_RAM
  x86: Fixup code testing if a pfn is direct mapped
  x86: if kernel .text .data .bss are not marked as E820_RAM, complain
and fix

 arch/x86/include/asm/page_types.h |9 +++
 arch/x86/kernel/cpu/amd.c |6 +-
 arch/x86/kernel/setup.c   |  130 -
 arch/x86/mm/init.c|   74 ++---
 arch/x86/mm/init_64.c |6 +-
 arch/x86/platform/efi/efi.c   |8 +--
 6 files changed, 167 insertions(+), 66 deletions(-)

-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 5/5] x86: if kernel .text .data .bss are not marked as E820_RAM, complain and fix

2012-08-24 Thread Jacob Shin

There could be cases where user supplied memmap=exactmap memory
mappings do not mark the region where the kernel .text .data and
.bss reside as E820_RAM as reported here:

https://lkml.org/lkml/2012/8/14/86

Handle it by complaining, and adding the range back into the e820.

Signed-off-by: Jacob Shin 
---
 arch/x86/kernel/setup.c |   15 +++
 1 file changed, 15 insertions(+)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 4217fb4..b84aceb5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -926,6 +926,21 @@ void __init setup_arch(char **cmdline_p)
insert_resource(_resource, _resource);
insert_resource(_resource, _resource);
 
+   /*
+* Complain if .text .data and .bss are not marked as E820_RAM and
+* attempt to fix it by adding the range. We may have a confused BIOS,
+* or the user may have incorrectly supplied it via memmap=exactmap. If
+* we really are running on top non-RAM, we will crash later anyways.
+*/
+   if (!e820_all_mapped(code_resource.start, bss_resource.end, E820_RAM)) {
+   pr_warn(".text .data .bss are not marked as E820_RAM!\n");
+
+   e820_add_region(code_resource.start,
+   bss_resource.end - code_resource.start + 1,
+   E820_RAM);
+   sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), _map);
+   }
+
trim_bios_range();
 #ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/5] x86: Fixup code testing if a pfn is direct mapped

2012-08-24 Thread Jacob Shin

Update code that previously assumed pfns [ 0 - max_low_pfn_mapped ) and
[ 4GB - max_pfn_mapped ) were always direct mapped, to now look up
pfn_mapped ranges instead.

Signed-off-by: Jacob Shin 
---
 arch/x86/kernel/cpu/amd.c   |6 +-
 arch/x86/platform/efi/efi.c |8 
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9d92e19..554ccfc 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -677,11 +677,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 */
if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, )) {
printk(KERN_DEBUG "tseg: %010llx\n", tseg);
-   if ((tseg>>PMD_SHIFT) <
-   (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
-   ((tseg>>PMD_SHIFT) <
-   (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
-   (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT
+   if (pfn_is_mapped(tseg))
set_memory_4k((unsigned long)__va(tseg), 1);
}
}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92660eda..f1facde 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -776,7 +776,7 @@ void __init efi_enter_virtual_mode(void)
efi_memory_desc_t *md, *prev_md = NULL;
efi_status_t status;
unsigned long size;
-   u64 end, systab, addr, npages, end_pfn;
+   u64 end, systab, addr, npages, start_pfn, end_pfn;
void *p, *va, *new_memmap = NULL;
int count = 0;
 
@@ -827,10 +827,10 @@ void __init efi_enter_virtual_mode(void)
size = md->num_pages << EFI_PAGE_SHIFT;
end = md->phys_addr + size;
 
+   start_pfn = PFN_DOWN(md->phys_addr);
end_pfn = PFN_UP(end);
-   if (end_pfn <= max_low_pfn_mapped
-   || (end_pfn > (1UL << (32 - PAGE_SHIFT))
-   && end_pfn <= max_pfn_mapped))
+
+   if (pfn_range_is_mapped(start_pfn, end_pfn))
va = __va(md->phys_addr);
else
va = efi_ioremap(md->phys_addr, size, md->type);
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/5] x86: Move enabling of PSE and PGE out of init_memory_mapping

2012-08-24 Thread Jacob Shin

Depending on the platform, init_memory_mapping() may be called multiple
times. Move it out to setup_arch() to avoid writing to cr4 on every call.

Signed-off-by: Jacob Shin 
---
 arch/x86/kernel/setup.c |   10 ++
 arch/x86/mm/init.c  |   10 --
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80..751e020 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -913,6 +913,16 @@ void __init setup_arch(char **cmdline_p)
 
init_gbpages();
 
+   /* Enable PSE if available */
+   if (cpu_has_pse)
+   set_in_cr4(X86_CR4_PSE);
+
+   /* Enable PGE if available */
+   if (cpu_has_pge) {
+   set_in_cr4(X86_CR4_PGE);
+   __supported_pte_mask |= _PAGE_GLOBAL;
+   }
+
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn

Re: [PATCH] regulator: disable supply regulator if it is enabled for boot-on

2012-08-24 Thread Rabin Vincent

On Fri, Aug 24, 2012 at 11:22:05PM +0530, Laxman Dewangan wrote:
> I tried to reproduce the issue but could not able to do this.
> Can you please send me your board/dt files where you are porviding
> platform data for regulator?
> This will help me to reproduce the issue.

Here's a dts patch:

diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi 
b/arch/arm/boot/dts/vexpress-v2m.dtsi
index dba53fd..386eafa 100644
--- a/arch/arm/boot/dts/vexpress-v2m.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m.dtsi
@@ -207,5 +207,20 @@
regulator-max-microvolt = <330>;
regulator-always-on;
};
+
+   vbat: fixedregulator@1 {
+   compatible = "regulator-fixed";
+   regulator-name = "vbat";
+   regulator-min-microvolt = <330>;
+   regulator-max-microvolt = <330>;
+   };
+
+   fixedregulator@2 {
+   compatible = "regulator-fixed";
+   regulator-name = "vtest1";
+   regulator-min-microvolt = <330>;
+   regulator-max-microvolt = <330>;
+   vin-supply = <>;
+   regulator-boot-on;
+   };
};
 };

If you want to test it with fixed regulators, you'll need the hack below
to bypass the ops->disable check in regulator_init_complete(). 

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 185468c..05f3028 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -129,9 +129,16 @@ static int fixed_voltage_list_voltage(struct regulator_dev 
*dev,
return data->microvolts;
 }
 
+static int fixed_enable(struct regulator_dev *dev)
+{
+   return 0;
+}
+
 static struct regulator_ops fixed_voltage_ops = {
.get_voltage = fixed_voltage_get_voltage,
.list_voltage = fixed_voltage_list_voltage,
+   .disable = fixed_enable,
+   .enable = fixed_enable,
 };
 
 static int __devinit reg_fixed_voltage_probe(struct platform_device *pdev)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] ioat: Adding Ivy Bridge IOATDMA PCI device IDs

2012-08-24 Thread Dave Jiang

Signed-off-by: Dave Jiang 
---

 drivers/dma/ioat/pci.c |   22 ++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 5e3a40f..c057306 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -40,6 +40,17 @@ MODULE_VERSION(IOAT_DMA_VERSION);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel Corporation");
 
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB0  0x0e20
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB1  0x0e21
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB2  0x0e22
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB3  0x0e23
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB4  0x0e24
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB5  0x0e25
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB6  0x0e26
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB7  0x0e27
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB8  0x0e2e
+#define PCI_DEVICE_ID_INTEL_IOAT_IVB9  0x0e2f
+
 static struct pci_device_id ioat_pci_tbl[] = {
/* I/OAT v1 platforms */
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
@@ -83,6 +94,17 @@ static struct pci_device_id ioat_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
 
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+   { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

RE: 3.5.1 kernel: Oops + stracktrace + ext4 kernel errors!

2012-08-24 Thread Justin Piszcz



-Original Message-
From: Theodore Ts'o [mailto:ty...@mit.edu] 
Sent: Friday, August 24, 2012 6:39 PM
To: Justin Piszcz
Cc: linux-kernel@vger.kernel.org; linux-e...@vger.kernel.org; al piszcz
Subject: Re: 3.5.1 kernel: Oops + stracktrace + ext4 kernel errors!

On Fri, Aug 24, 2012 at 11:31:44AM -0400, Justin Piszcz wrote:
> Hello,
> 
> Thoughts?
> 
> Saw this when trying to copy files to array with Samba and doing file
> operations:
> 
> [28939.505792] [ cut here ]
> [29367.345433] BUG: unable to handle kernel NULL pointer dereference
> at 0028
> [29367.345455] IP: [] ext4_ext_remove_space+0x89c/0xc90

Fixed by commit 89a4e48f84 in upstream.  It is scheduled for inclusion
in the a stable kernel series; I believe it should be in 3.5.3.

Regards,

- Ted


--

Thanks.. if/when I come across another box I can test with I will ensure
that patch (89a4e48f84 ) gets applied.  For PROD hosts I need stability >
16T.

Justin.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable

2012-08-24 Thread Tejun Heo

Hello,

On Sat, Aug 25, 2012 at 12:59:25AM +0200, Sasha Levin wrote:
> Thats the thing, the amount of things of things you can do with a given bucket
> is very limited. You can't add entries to any point besides the head (without
> walking the entire list).

Kinda my point.  We already have all the hlist*() interface to deal
with such cases.  Having something which is evidently the trivial
hlist hashtable and advertises as such in the interface can be
helpful.  I think we need that more than we need anything fancy.

Heh, this is a debate about which one is less insignificant.  I can
see your point.  I'd really like to hear what others think on this.

Guys, do we want something which is evidently trivial hlist hashtable
which can use hlist_*() API directly or do we want something better
encapsulated?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [ 08/32] drm/i915: correctly order the ring init sequence

2012-08-24 Thread Herton Ronaldo Krzesinski

On Sun, Aug 19, 2012 at 08:57:04PM -0700, Greg Kroah-Hartman wrote:
> From: Greg KH 
> 
> 3.4-stable review patch.  If anyone has any objections, please let me know.
> 
> --
> 
> From: Daniel Vetter 
> 
> commit 0d8957c8a90bbb5d34fab9a304459448a5131e06 upstream.
> 
> We may only start to set up the new register values after having
> confirmed that the ring is truely off. Otherwise the hw might lose the
> newly written register values. This is caught later on in the init
> sequence, when we check whether the register writes have stuck.
> 
> Reviewed-by: Jani Nikula 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50522
> Tested-by: Yang Guang 
> Signed-off-by: Daniel Vetter 
> Signed-off-by: Greg Kroah-Hartman 

I think with this commit also the following commits should be picked for
3.4 right? (as suggested for 3.0):

f01db988ef6f6c70a6cc36ee71e4a98a68901229
b7884eb45ec98c0d34c7f49005ae9d4b4b4e38f6

Just reporting that I tested this 3.4.10 proposed update with the two
commits above cherry-picked/backported applied, and worked ok.

The first cherry-picked cleanly, while
b7884eb45ec98c0d34c7f49005ae9d4b4b4e38f6 needed backporting for 3.4,
like happened with 3.0, this is a proposed backport which I
applied/tested, is similar to 3.0 and 3.2 versions:

>From a2712ae26afde5be2bc62080755d1324164f53d3 Mon Sep 17 00:00:00 2001
From: Daniel Vetter 
Date: Mon, 4 Jun 2012 11:18:15 +0200
Subject: [PATCH] drm/i915: hold forcewake around ring hw init

Empirical evidence suggests that we need to: On at least one ivb
machine when running the hangman i-g-t test, the rings don't properly
initialize properly - the RING_START registers seems to be stuck at
all zeros.

Holding forcewake around this register init sequences makes chip reset
reliable again. Note that this is not the first such issue:

commit f01db988ef6f6c70a6cc36ee71e4a98a68901229
Author: Sean Paul 
Date:   Fri Mar 16 12:43:22 2012 -0400

drm/i915: Add wait_for in init_ring_common

added delay loops to make RING_START and RING_CTL initialization
reliable on the blt ring at boot-up. So I guess it won't hurt if we do
this unconditionally for all force_wake needing gpus.

To avoid copy of the HAS_FORCE_WAKE check I've added a new
intel_info bit for that.

v2: Fixup missing commas in static struct and properly handling the
error case in init_ring_common, both noticed by Jani Nikula.

Cc: sta...@vger.kernel.org
Reported-and-tested-by: Yang Guang 
Reviewed-by: Eugeni Dodonov 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50522
Signed-Off-by: Daniel Vetter 
[herton: backport to 3.4:
 - adjust for different struct intel_device_info layouts
 - drop changes to Haswell/Valleyview, not present in 3.4
 - NEEDS_FORCE_WAKE is on i915_drv.h, and doesn't have IS_VALLEYVIEW ]
Signed-off-by: Herton Ronaldo Krzesinski 
---
 drivers/gpu/drm/i915/i915_drv.c |4 
 drivers/gpu/drm/i915/i915_drv.h |7 +--
 drivers/gpu/drm/i915/intel_ringbuffer.c |   16 +---
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index ae8a64f..c654557 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -224,6 +224,7 @@ static const struct intel_device_info 
intel_sandybridge_d_info = {
.has_bsd_ring = 1,
.has_blt_ring = 1,
.has_llc = 1,
+   .has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_sandybridge_m_info = {
@@ -233,6 +234,7 @@ static const struct intel_device_info 
intel_sandybridge_m_info = {
.has_bsd_ring = 1,
.has_blt_ring = 1,
.has_llc = 1,
+   .has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_ivybridge_d_info = {
@@ -241,6 +243,7 @@ static const struct intel_device_info 
intel_ivybridge_d_info = {
.has_bsd_ring = 1,
.has_blt_ring = 1,
.has_llc = 1,
+   .has_force_wake = 1,
 };
 
 static const struct intel_device_info intel_ivybridge_m_info = {
@@ -250,6 +253,7 @@ static const struct intel_device_info 
intel_ivybridge_m_info = {
.has_bsd_ring = 1,
.has_blt_ring = 1,
.has_llc = 1,
+   .has_force_wake = 1,
 };
 
 static const struct pci_device_id pciidlist[] = {  /* aka */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5fabc6c..a2117b2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -255,6 +255,7 @@ struct intel_device_info {
u8 is_broadwater:1;
u8 is_crestline:1;
u8 is_ivybridge:1;
+   u8 has_force_wake:1;
u8 has_fbc:1;
u8 has_pipe_cxsr:1;
u8 has_hotplug:1;
@@ -1051,6 +1052,8 @@ struct drm_i915_file_private {
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
 
+#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake)
+
 #include "i915_trace.h"

Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable

2012-08-24 Thread Sasha Levin

>> Why do we need hash_head/hash_for_each_head()? I haven't stumbled on a place 
>> yet
>> that needed direct access to the bucket itself.
> 
> Because whole hash table walking is much less common and we can avoid
> another full set of iterators.

I don't agree. Out of 32 places which now use a hashtable iterator of some kind,
12 of them (38%) walk the entire table.

The thing is that usually data structures are indexable by more than one key, so
usually hashtables are fully walked in cold paths to look for different keys.

Take kernel/workqueue.c for example: There are 4 places which do a key lookup
(find_worker_executing_work()) and 3 places which fully walk the entire table
(for_each_busy_worker()).

>> This basically means 11 macros/functions that would let us have full
>> encapsulation and will make it very easy for future implementations to work 
>> with
>> this API instead of making up a new one. It's also not significantly (+~2-3)
>> more than the ones you listed.
> 
> I'm not sure whether full encapsulation is a good idea for trivial
> hashtable.  For higher level stuff, sure but at this level I think
> benefits coming from known obvious implementation can be larger.
> e.g. suppose the caller knows certain entries to be way colder than
> others and wants to put them at the end of the chain.

Thats the thing, the amount of things of things you can do with a given bucket
is very limited. You can't add entries to any point besides the head (without
walking the entire list).

Basically you can do only two things with a bucket:

 - Add something to it at a very specific place.
 - Walk it

So I don't understand whats the point in exposing the internal structure of the
hashtable if there's nothing significant that can be gained from it by the user.

> 
> So, I think implmenting the minimal set of helpers which reflect the
> underlying trivial implementation explicitly could actually be better
> even when discounting the reduced number of wrappers.
> 
> Thanks.
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v7 0/4] cgroup: add xattr support

2012-08-24 Thread Tejun Heo

Hello,

On Thu, Aug 23, 2012 at 04:53:27PM -0400, a...@redhat.com wrote:
> This series are a refreshed version of a patchset submitted by Li Zefan back
> in march:
>   https://lkml.org/lkml/2012/3/1/13

Applied to cgroup/for-3.7 w/ "Original-Patch-by: Li Zefan" added for
the first three patches.

* Can you please update MTA setting so that the From: header contains
  your full name?  Importing the series to git ended up with
  "a...@redhat.com ".

* Can you please add some comments and documentation regarding this?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: 3.5.1 kernel: Oops + stracktrace + ext4 kernel errors!

2012-08-24 Thread Theodore Ts'o

On Fri, Aug 24, 2012 at 11:31:44AM -0400, Justin Piszcz wrote:
> Hello,
> 
> Thoughts?
> 
> Saw this when trying to copy files to array with Samba and doing file
> operations:
> 
> [28939.505792] [ cut here ]
> [29367.345433] BUG: unable to handle kernel NULL pointer dereference
> at 0028
> [29367.345455] IP: [] ext4_ext_remove_space+0x89c/0xc90

Fixed by commit 89a4e48f84 in upstream.  It is scheduled for inclusion
in the a stable kernel series; I believe it should be in 3.5.3.

Regards,

- Ted
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] ACPI: power: Use KERN_DEBUG when no power resources are found

2012-08-24 Thread Joe Perches

On Thu, 2012-08-23 at 15:26 +0200, Borislav Petkov wrote:
> On Fri, Aug 10, 2012 at 10:05:53AM +0800, Aaron Lu wrote:
> > commit a606dac368eed5696fb38e16b1394f1d049c09e9 adds support to link
> > devices which have _PRx, if a device does not have _PRx, a warning
> > message will be printed.
> > 
> > This commit is for ZPODD on Intel's platform, on AMD's platform, there
> > is no _PRx to support ZPODD, we use _PSx.
> > 
> > So instead of printing a useless warning message on AMD's platform,
> > changing the print level to DEBUG to suppress this message.
[]
> > diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
[]
> > @@ -460,7 +460,7 @@ int acpi_power_resource_register_device(struct device 
> > *dev, acpi_handle handle)
> > return ret;
> >  
> >  no_power_resource:
> > -   printk(KERN_WARNING PREFIX "Invalid Power Resource to register!");
> > +   printk(KERN_DEBUG PREFIX "Invalid Power Resource to register!");

Perhaps add something like:
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
printk(etc...)
instead?


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/1] backlight: Add Backlight driver for lm3630 chip

2012-08-24 Thread Andrew Morton

On Fri, 24 Aug 2012 14:03:23 +0900
GShark Jeong  wrote:

> I've reviewed and tested you patch ( lm3630 and lm3639) on my real board
> and these are working well .
> Thank you.

Great, thanks.

> ( Do I need to  send back this patch to you again? or will the current
> status be applied for next branch? )

No, that's OK - when the time comes to send the patch upstream I shall
first fold the fixup patches into the base patch and update the changelog.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Logitech USB headset not working in 3.6-rc3

2012-08-24 Thread Josh Boyer

On Fri, Aug 24, 2012 at 11:30:12PM +0200, Daniel Mack wrote:
> On Fri, Aug 24, 2012 at 9:08 PM, Josh Boyer  wrote:
> > Hi All,
> >
> > We've had a report[1] that the Logitech USB headset 0003:046D:0A0C isn't
> > working with 3.6-rc3.  It seems the last working kernel was based on
> > commit 10c63c9, and it first stopped working with a kernel based on
> > commit 23dcfa6.  There are only a few ALSA commits between those
> > revisions, so hopefully this is something that is fairly easy to
> > identify.  The only commit to USB audio in that set is:
> >
> > commit e9ba389c5ffc4dd29dfe17e00e4887730235
> > Author: Takashi Iwai 
> > Date:   Wed Aug 15 12:32:00 2012 +0200
> >
> > ALSA: usb-audio: Fix scheduling-while-atomic bug in PCM capture stream
> >
> >
> > I've CC'd the reporter and attached the alsa-info is below.
> 
> Does it work again once you revert that commit?

Haven't built a kernel with that done yet.  Had a few other things pop
up this afternoon.  If Bruno doesn't build one himself, I'll try to get
one built later this evening for testing.

josh
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Logitech USB headset not working in 3.6-rc3

2012-08-24 Thread Daniel Mack

Hi,

On 24.08.2012 21:08, Josh Boyer wrote:
> We've had a report[1] that the Logitech USB headset 0003:046D:0A0C isn't
> working with 3.6-rc3.  It seems the last working kernel was based on
> commit 10c63c9, and it first stopped working with a kernel based on
> commit 23dcfa6.  There are only a few ALSA commits between those
> revisions, so hopefully this is something that is fairly easy to
> identify.  The only commit to USB audio in that set is:

[...]

> !!ALSA/HDA dmesg
> !!--
> 
> [   38.190306] SELinux: initialized (dev configfs, type configfs), uses 
> genfs_contexts
> [   38.229616] snd_hda_intel :00:1b.0: irq 66 for MSI/MSI-X
> [   38.270699] ALSA sound/usb/mixer.c:866 6:0: cannot get min/max values for 
> control 2 (id 6)
> [   38.274097] ALSA sound/usb/mixer.c:866 1:0: cannot get min/max values for 
> control 2 (id 1)
> [   38.276753] ALSA sound/usb/mixer.c:866 2:0: cannot get min/max values for 
> control 2 (id 2)
> [   38.279322] ALSA sound/pci/hda/hda_auto_parser.c:322 autoconfig: 
> line_outs=1 (0xe/0x0/0x0/0x0/0x0) type:line
> [   38.279326] ALSA sound/pci/hda/hda_auto_parser.c:326speaker_outs=1 
> (0x11/0x0/0x0/0x0/0x0)
> [   38.279329] ALSA sound/pci/hda/hda_auto_parser.c:330hp_outs=1 
> (0xd/0x0/0x0/0x0/0x0)
> [   38.279331] ALSA sound/pci/hda/hda_auto_parser.c:331mono: mono_out=0x0
> [   38.279333] ALSA sound/pci/hda/hda_auto_parser.c:335inputs:
> [   38.279336] ALSA sound/pci/hda/hda_auto_parser.c:339  Mic=0x10
> [   38.279342] ALSA sound/pci/hda/hda_auto_parser.c:339  Line=0xf
> [   38.285983] usbcore: registered new interface driver snd-usb-audio

Also, according to this dmesg, the device is probed just fine (which the
commit you mentioned wouldn't change anything about though).

So what does "isn't working anymore" refer to precisely? Are there any
more dmesg entries generated once the stream is started?


Daniel

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[tip:x86/fpu] x86, fpu: use non-lazy fpu restore for processors supporting xsave

2012-08-24 Thread tip-bot for Suresh Siddha

Commit-ID:  127f5403bfbc5f52cf0fbbadfa5e624a32a137ff
Gitweb: http://git.kernel.org/tip/127f5403bfbc5f52cf0fbbadfa5e624a32a137ff
Author: Suresh Siddha 
AuthorDate: Fri, 24 Aug 2012 14:13:02 -0700
Committer:  H. Peter Anvin 
CommitDate: Fri, 24 Aug 2012 14:26:54 -0700

x86, fpu: use non-lazy fpu restore for processors supporting xsave

Fundamental model of the current Linux kernel is to lazily init and
restore FPU instead of restoring the task state during context switch.
This changes that fundamental lazy model to the non-lazy model for
the processors supporting xsave feature.

Reasons driving this model change are:

i. Newer processors support optimized state save/restore using xsaveopt and
xrstor by tracking the INIT state and MODIFIED state during context-switch.
This is faster than modifying the cr0.TS bit which has serializing semantics.

ii. Newer glibc versions use SSE for some of the optimized copy/clear routines.
With certain workloads (like boot, kernel-compilation etc), application
completes its work with in the first 5 task switches, thus taking upto 5 #DNA
traps with the kernel not getting a chance to apply the above mentioned
pre-load heuristic.

iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit
and thus will not work correctly in the presence of lazy restore. Non-lazy
state restore is needed for enabling such features.

Some data on a two socket SNB system:
 * Saved 20K DNA exceptions during boot on a two socket SNB system.
 * Saved 50K DNA exceptions during kernel-compilation workload.
 * Improved throughput of the AVX based checksumming function inside the
   kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts
   pair.

Signed-off-by: Suresh Siddha 
Link: 
http://lkml.kernel.org/r/1345842782-24175-7-git-send-email-suresh.b.sid...@intel.com
Cc: Jim Kukunas 
Cc: NeilBrown 
Cc: Avi Kivity 
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/fpu-internal.h |   96 +++
 arch/x86/include/asm/i387.h |1 +
 arch/x86/include/asm/xsave.h|1 +
 arch/x86/kernel/i387.c  |   20 ++-
 arch/x86/kernel/process.c   |   12 +++--
 arch/x86/kernel/process_32.c|4 --
 arch/x86/kernel/process_64.c|4 --
 arch/x86/kernel/traps.c |5 ++-
 arch/x86/kernel/xsave.c |   57 +
 9 files changed, 140 insertions(+), 60 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h 
b/arch/x86/include/asm/fpu-internal.h
index fac39e9..e31cc6e 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct 
task_struct *tsk)
 static inline void __thread_fpu_end(struct task_struct *tsk)
 {
__thread_clear_has_fpu(tsk);
-   stts();
+   if (!use_xsave())
+   stts();
 }
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-   clts();
+   if (!use_xsave())
+   clts();
__thread_set_has_fpu(tsk);
 }
 
+static inline void __drop_fpu(struct task_struct *tsk)
+{
+   if (__thread_has_fpu(tsk)) {
+   /* Ignore delayed exceptions from user space */
+   asm volatile("1: fwait\n"
+"2:\n"
+_ASM_EXTABLE(1b, 2b));
+   __thread_fpu_end(tsk);
+   }
+}
+
+static inline void drop_fpu(struct task_struct *tsk)
+{
+   /*
+* Forget coprocessor state..
+*/
+   preempt_disable();
+   tsk->fpu_counter = 0;
+   __drop_fpu(tsk);
+   clear_used_math();
+   preempt_enable();
+}
+
+static inline void drop_init_fpu(struct task_struct *tsk)
+{
+   if (!use_xsave())
+   drop_fpu(tsk);
+   else
+   xrstor_state(init_xstate_buf, -1);
+}
+
 /*
  * FPU state switching for scheduling.
  *
@@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct 
task_struct *old, struct ta
 {
fpu_switch_t fpu;
 
-   fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+   /*
+* If the task has used the math, pre-load the FPU on xsave processors
+* or if the past 5 consecutive context-switches used math.
+*/
+   fpu.preload = tsk_used_math(new) && (use_xsave() ||
+new->fpu_counter > 5);
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
@@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct 
task_struct *old, struct ta
new->fpu_counter++;
__thread_set_has_fpu(new);
prefetch(new->thread.fpu.state);
-   } else
+   } else if (!use_xsave())
stts();
} else {
old->fpu_counter = 0;
old->thread.fpu.last_cpu =

[tip:x86/fpu] lguest, x86: handle guest TS bit for lazy/ non-lazy fpu host models

2012-08-24 Thread tip-bot for Suresh Siddha

Commit-ID:  1ce83ffda9aea53e6e4b6b6a82c028a019526010
Gitweb: http://git.kernel.org/tip/1ce83ffda9aea53e6e4b6b6a82c028a019526010
Author: Suresh Siddha 
AuthorDate: Fri, 24 Aug 2012 14:13:01 -0700
Committer:  H. Peter Anvin 
CommitDate: Fri, 24 Aug 2012 14:26:52 -0700

lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models

Instead of using unlazy_fpu() check if user_has_fpu() and set/clear
the host TS bits so that the lguest works fine with both the
lazy/non-lazy FPU host models with minimal changes.

Signed-off-by: Suresh Siddha 
Link: 
http://lkml.kernel.org/r/1345842782-24175-6-git-send-email-suresh.b.sid...@intel.com
Cc: Rusty Russell 
Signed-off-by: H. Peter Anvin 
---
 drivers/lguest/x86/core.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 39809035..4af12e1 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 * we set it now, so we can trap and pass that trap to the Guest if it
 * uses the FPU.
 */
-   if (cpu->ts)
-   unlazy_fpu(current);
+   if (cpu->ts && user_has_fpu())
+   stts();
 
/*
 * SYSENTER is an optimized way of doing system calls.  We can't allow
@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 if (boot_cpu_has(X86_FEATURE_SEP))
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 
+   /* Clear the host TS bit if it was set above. */
+   if (cpu->ts && user_has_fpu())
+   clts();
+
/*
 * If the Guest page faulted, then the cr2 register will tell us the
 * bad virtual address.  We have to grab this now, because once we
@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 * a different CPU. So all the critical stuff should be done
 * before this.
 */
-   else if (cpu->regs->trapnum == 7)
+   else if (cpu->regs->trapnum == 7 && !user_has_fpu())
math_state_restore();
 }
 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH drm-next 3/3] drm/i915/contexts: Fixup merge with commit b6c7488df68a

2012-08-24 Thread Sedat Dilek

This is a fixup patch for the merge of drm-next into linux-next caused
by commit b6c7488df68a ("drm/i915/contexts: fix list corruption").

Reported-By: Stephen Rothwell 
Signed-off-by: Sedat Dilek 
---
 drivers/gpu/drm/i915/i915_gem.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4f6841d..e8a5cb2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2273,11 +2273,11 @@ int i915_gpu_idle(struct drm_device *dev)
 
/* Flush everything onto the inactive list. */
for_each_ring(ring, dev_priv, i) {
-   ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
+   ret = i915_ring_idle(ring);
if (ret)
return ret;
 
-   ret = i915_ring_idle(ring);
+   ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID);
if (ret)
return ret;
 
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH drm-next 2/3] drm/i915: Remove reference to drm_display_info raw_edid field

2012-08-24 Thread Sedat Dilek

Reported-By: Stephen Rothwell 
Acked-by: Jani Nikula 
Acked-by: Dave Airlie 
Signed-off-by: Sedat Dilek 
---
 drivers/gpu/drm/i915/intel_modes.c |1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/intel_modes.c 
b/drivers/gpu/drm/i915/intel_modes.c
index 29b7259..4bc1c0f 100644
--- a/drivers/gpu/drm/i915/intel_modes.c
+++ b/drivers/gpu/drm/i915/intel_modes.c
@@ -45,7 +45,6 @@ int intel_connector_update_modes(struct drm_connector 
*connector,
drm_mode_connector_update_edid_property(connector, edid);
ret = drm_add_edid_modes(connector, edid);
drm_edid_to_eld(connector, edid);
-   connector->display_info.raw_edid = NULL;
kfree(edid);
 
return ret;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH drm-next 1/3] drm/udl: usb: Fix recursive Kconfig dependency

2012-08-24 Thread Sedat Dilek

In drivers/usb/Kconfig "config USB_ARCH_HAS_HCD" is within "if USB_SUPPORT"
statement.

In drivers/gpu/drm/Kconfig "config DRM_USB" depends on USB_ARCH_HAS_HCD
but selects USB_SUPPORT which leads to the error for udl Kconfig:

$ yes "" | make oldconfig
scripts/kconfig/conf --oldconfig Kconfig
drivers/gpu/drm/udl/Kconfig:1:error: recursive dependency detected!
drivers/gpu/drm/udl/Kconfig:1:  symbol DRM_UDL depends on USB_ARCH_HAS_HCD
drivers/usb/Kconfig:76: symbol USB_ARCH_HAS_HCD depends on USB_SUPPORT
drivers/usb/Kconfig:58: symbol USB_SUPPORT is selected by DRM_USB
drivers/gpu/drm/Kconfig:22: symbol DRM_USB is selected by DRM_UDL

Fix this by changing from select to depends on USB_SUPPORT in
"config DRM_USB".

This is a follow-up fix to df0b344300724e00db9fff7eb6406eb91f450b91
in Dave's drm-next GIT branch.

[ v2: Restore old status, but change from select to depends on USB_SUPPORT ]
[ v3: Use common prefix "drm/udl" in label ]

Signed-off-by: Sedat Dilek 
---
 drivers/gpu/drm/Kconfig |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 3a8c683..0cbdc45 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -22,9 +22,8 @@ menuconfig DRM
 config DRM_USB
tristate
depends on DRM
-   depends on USB_ARCH_HAS_HCD
+   depends on USB_SUPPORT && USB_ARCH_HAS_HCD
select USB
-   select USB_SUPPORT
 
 config DRM_KMS_HELPER
tristate
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[tip:x86/fpu] x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage

2012-08-24 Thread tip-bot for Suresh Siddha

Commit-ID:  964735018df03c94dd12665385d59e3b2c7c08b8
Gitweb: http://git.kernel.org/tip/964735018df03c94dd12665385d59e3b2c7c08b8
Author: Suresh Siddha 
AuthorDate: Fri, 24 Aug 2012 14:13:00 -0700
Committer:  H. Peter Anvin 
CommitDate: Fri, 24 Aug 2012 14:26:50 -0700

x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage

use kernel_fpu_begin/end() instead of unconditionally accessing cr0 and
saving/restoring just the few used xmm/ymm registers.

This has some advantages like:
* If the task's FPU state is already active, then kernel_fpu_begin()
  will just save the user-state and avoiding the read/write of cr0.
  In general, cr0 accesses are much slower.

* Manual save/restore of xmm/ymm registers will affect the 'modified' and
  the 'init' optimizations brought in the by xsaveopt/xrstor
  infrastructure.

* Foward compatibility with future vector register extensions will be a
  problem if the xmm/ymm registers are manually saved and restored
  (corrupting the extended state of those vector registers).

With this patch, there was no significant difference in the xor throughput
using AVX, measured during boot.

Signed-off-by: Suresh Siddha 
Link: 
http://lkml.kernel.org/r/1345842782-24175-5-git-send-email-suresh.b.sid...@intel.com
Cc: Jim Kukunas 
Cc: NeilBrown 
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/xor_32.h  |   56 +---
 arch/x86/include/asm/xor_64.h  |   61 ++--
 arch/x86/include/asm/xor_avx.h |   54 ---
 3 files changed, 29 insertions(+), 142 deletions(-)

diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 4545708..aabd585 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
  */
 
-#define XMMS_SAVE  \
-do {   \
-   preempt_disable();  \
-   cr0 = read_cr0();   \
-   clts(); \
-   asm volatile(   \
-   "movups %%xmm0,(%0) ;\n\t"  \
-   "movups %%xmm1,0x10(%0) ;\n\t"  \
-   "movups %%xmm2,0x20(%0) ;\n\t"  \
-   "movups %%xmm3,0x30(%0) ;\n\t"  \
-   :   \
-   : "r" (xmm_save)\
-   : "memory");\
-} while (0)
-
-#define XMMS_RESTORE   \
-do {   \
-   asm volatile(   \
-   "sfence ;\n\t"  \
-   "movups (%0),%%xmm0 ;\n\t"  \
-   "movups 0x10(%0),%%xmm1 ;\n\t"  \
-   "movups 0x20(%0),%%xmm2 ;\n\t"  \
-   "movups 0x30(%0),%%xmm3 ;\n\t"  \
-   :   \
-   : "r" (xmm_save)\
-   : "memory");\
-   write_cr0(cr0); \
-   preempt_enable();   \
-} while (0)
-
-#define ALIGN16 __attribute__((aligned(16)))
-
 #define OFFS(x)"16*("#x")"
 #define PF_OFFS(x) "256+16*("#x")"
 #definePF0(x)  "   prefetchnta "PF_OFFS(x)"(%1)
;\n"
@@ -587,10 +555,8 @@ static void
 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
asm volatile(
 #undef BLOCK
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned 
long *p2)
:
: "memory");
 
-   XMMS_RESTORE;
+   kernel_fpu_end();
 }
 
 static void
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
  unsigned long *p3)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
asm volatile(
 #undef BLOCK
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
:
: "memory" );
 
-   XMMS_RESTORE;
+   kernel_fpu_end();
 }
 
 static void
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
  unsigned long *p3, unsigned long *p4)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
asm volatile(
 #undef BLOCK
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
:
: "memory" );
 
-   XMMS_RESTORE;
+   kernel_fpu_end();
 }
 
 static void
@@ -770,10

[tip:x86/fpu] x86, kvm: use kernel_fpu_begin/end() in kvm_load/ put_guest_fpu()

2012-08-24 Thread tip-bot for Suresh Siddha

Commit-ID:  98700fa647b3572f7fa55485570ab9fc53b91d23
Gitweb: http://git.kernel.org/tip/98700fa647b3572f7fa55485570ab9fc53b91d23
Author: Suresh Siddha 
AuthorDate: Fri, 24 Aug 2012 14:12:59 -0700
Committer:  H. Peter Anvin 
CommitDate: Fri, 24 Aug 2012 14:26:49 -0700

x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()

kvm's guest fpu save/restore should be wrapped around
kernel_fpu_begin/end(). This will avoid for example taking a DNA
in kvm_load_guest_fpu() when it tries to load the fpu immediately
after doing unlazy_fpu() on the host side.

More importantly this will prevent the host process fpu from being
corrupted.

Signed-off-by: Suresh Siddha 
Link: 
http://lkml.kernel.org/r/1345842782-24175-4-git-send-email-suresh.b.sid...@intel.com
Cc: Avi Kivity 
Signed-off-by: H. Peter Anvin 
---
 arch/x86/kvm/x86.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be6d549..b92cc39 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5954,7 +5954,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 */
kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
-   unlazy_fpu(current);
+   kernel_fpu_begin();
fpu_restore_checking(>arch.guest_fpu);
trace_kvm_fpu(1);
 }
@@ -5968,6 +5968,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
vcpu->guest_fpu_loaded = 0;
fpu_save_init(>arch.guest_fpu);
+   kernel_fpu_end();
++vcpu->stat.fpu_reload;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[tip:x86/fpu] x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()

2012-08-24 Thread tip-bot for Suresh Siddha

Commit-ID:  cc50fae05beb2db9f4587bbb1a0d6aba2af5b407
Gitweb: http://git.kernel.org/tip/cc50fae05beb2db9f4587bbb1a0d6aba2af5b407
Author: Suresh Siddha 
AuthorDate: Fri, 24 Aug 2012 14:12:58 -0700
Committer:  H. Peter Anvin 
CommitDate: Fri, 24 Aug 2012 14:26:48 -0700

x86, fpu: remove unnecessary user_fpu_end() in save_xstate_sig()

Few lines below we do drop_fpu() which is more safer. Remove the
unnecessary user_fpu_end() in save_xstate_sig(), which allows
the drop_fpu() to ignore any pending exceptions from the user-space
and drop the current fpu.

Signed-off-by: Suresh Siddha 
Link: 
http://lkml.kernel.org/r/1345842782-24175-3-git-send-email-suresh.b.sid...@intel.com
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/fpu-internal.h |   17 +++--
 arch/x86/kernel/xsave.c |1 -
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h 
b/arch/x86/include/asm/fpu-internal.h
index fe95ad0..fac39e9 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -412,22 +412,11 @@ static inline void __drop_fpu(struct task_struct *tsk)
 }
 
 /*
- * The actual user_fpu_begin/end() functions
- * need to be preemption-safe.
+ * Need to be preemption-safe.
  *
- * NOTE! user_fpu_end() must be used only after you
- * have saved the FP state, and user_fpu_begin() must
- * be used only immediately before restoring it.
- * These functions do not do any save/restore on
- * their own.
+ * NOTE! user_fpu_begin() must be used only immediately before restoring
+ * it. This function does not do any save/restore on their own.
  */
-static inline void user_fpu_end(void)
-{
-   preempt_disable();
-   __thread_fpu_end(current);
-   preempt_enable();
-}
-
 static inline void user_fpu_begin(void)
 {
preempt_disable();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 6cfc7d9..f0bb844 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -254,7 +254,6 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, 
int size)
/* Update the thread's fxstate to save the fsave header. */
if (ia32_fxstate)
fpu_fxsave(>thread.fpu);
-   user_fpu_end();
} else {
sanitize_i387_state(tsk);
if (__copy_to_user(buf_fx, xsave, xstate_size))
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

perf backtraces off-by-1

2012-08-24 Thread Arun Sharma

Some of our language runtimes like to map IP addresses in perf backtrace 
to specific byte codes. The way things stand now, the addresses on the 
backtrace are return addresses, rather than the caller. I think this 
issue may be present for other unusual call/return sequences where the 
user may be more interested in the calling instruction rather than the 
instruction control flow would return to.


A simple hack such as the one below makes our JIT guys happy. But the
code is not right if there was an asynchronous transfer of control (eg:
signal handler or interrupt).

libunwind contains similar code, but has the additional info in the 
unwind information to recognize async control transfer.


Wondering if this has been discussed before. One option is to support 
this for user mode only, with code to detect signal frames. Any other ideas?


   -Arun

--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -296,6 +296,7 @@ int machine__resolve_callchain(struct machine *self, 
struct perf_evsel *evsel,

u8 cpumode = PERF_RECORD_MISC_USER;
unsigned int i;
int err;
+   int async;

callchain_cursor_reset(>hists.callchain_cursor);

@@ -322,6 +323,11 @@ int machine__resolve_callchain(struct machine 
*self, struct perf_evsel *evsel,

continue;
}

+   /* XXX: check if this was an async control transfer */
+   async = 0;
+if (!async) {
+   ip--;
+   }
al.filtered = false;
thread__find_addr_location(thread, self, cpumode,
   MAP__FUNCTION, ip, , NULL);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[tip:x86/fpu] x86, fpu: drop_fpu() before restoring new state from sigframe

2012-08-24 Thread tip-bot for Suresh Siddha

Commit-ID:  739390035c5fba2132fa424309786ff7bdd2cc1e
Gitweb: http://git.kernel.org/tip/739390035c5fba2132fa424309786ff7bdd2cc1e
Author: Suresh Siddha 
AuthorDate: Fri, 24 Aug 2012 14:12:57 -0700
Committer:  H. Peter Anvin 
CommitDate: Fri, 24 Aug 2012 14:26:47 -0700

x86, fpu: drop_fpu() before restoring new state from sigframe

No need to save the state with unlazy_fpu(), that is about to get overwritten
by the state from the signal frame. Instead use drop_fpu() and continue
to restore the new state.

Also fold the stop_fpu_preload() into drop_fpu().

Signed-off-by: Suresh Siddha 
Link: 
http://lkml.kernel.org/r/1345842782-24175-2-git-send-email-suresh.b.sid...@intel.com
Signed-off-by: H. Peter Anvin 
---
 arch/x86/include/asm/fpu-internal.h |7 +--
 arch/x86/kernel/xsave.c |8 +++-
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h 
b/arch/x86/include/asm/fpu-internal.h
index ba83a08..fe95ad0 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -448,17 +448,12 @@ static inline void save_init_fpu(struct task_struct *tsk)
preempt_enable();
 }
 
-static inline void stop_fpu_preload(struct task_struct *tsk)
-{
-   tsk->fpu_counter = 0;
-}
-
 static inline void drop_fpu(struct task_struct *tsk)
 {
/*
 * Forget coprocessor state..
 */
-   stop_fpu_preload(tsk);
+   tsk->fpu_counter = 0;
preempt_disable();
__drop_fpu(tsk);
preempt_enable();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a23d100..6cfc7d9 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -381,16 +381,14 @@ int __restore_xstate_sig(void __user *buf, void __user 
*buf_fx, int size)
struct xsave_struct *xsave = >thread.fpu.state->xsave;
struct user_i387_ia32_struct env;
 
-   stop_fpu_preload(tsk);
-   unlazy_fpu(tsk);
+   drop_fpu(tsk);
 
if (__copy_from_user(xsave, buf_fx, state_size) ||
-   __copy_from_user(, buf, sizeof(env))) {
-   drop_fpu(tsk);
+   __copy_from_user(, buf, sizeof(env)))
return -1;
-   }
 
sanitize_restored_xstate(tsk, , xstate_bv, fx_only);
+   set_used_math();
} else {
/*
 * For 64-bit frames and 32-bit fsave frames, restore the user
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] fork: fix oops after fork failure

2012-08-24 Thread Andrew Morton

On Thu, 23 Aug 2012 19:36:08 +0400
Glauber Costa  wrote:

> When we want to duplicate a new process, dup_task_struct() will undergo
> a series of allocations. If alloc_thread_info_node() fails, we call
> free_task_struct() and return.
> 
> This seems right, but it is not. free_task_struct() will not only free
> the task struct from the kmem_cache, but will also call
> arch_release_task_struct(). The problem is that this function is
> supposed to undo whatever arch-specific work done by
> arch_dup_task_struct(), that is not yet called at this point.  The
> particular problem I ran accross was that in x86, we will arrive at
> fpu_free() without having ever allocated it.

I think ths was already fixed by f19b9f74b7ea3b ("fork: fix error
handling in dup_task()").  As you would have noticed if you were
preparing patches against up-to-date kernel versions!

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v2] mm: hugetlb: add arch hook for clearing page flags before entering pool

2012-08-24 Thread Andrew Morton

On Thu, 23 Aug 2012 18:36:02 +0100
Will Deacon  wrote:

> On Thu, Aug 23, 2012 at 06:11:56PM +0100, Michal Hocko wrote:
> > On Thu 23-08-12 17:37:13, Will Deacon wrote:
> > > The core page allocator ensures that page flags are zeroed when freeing
> > > pages via free_pages_check. A number of architectures (ARM, PPC, MIPS)
> > > rely on this property to treat new pages as dirty with respect to the
> > > data cache and perform the appropriate flushing before mapping the pages
> > > into userspace.
> > > 
> > > This can lead to cache synchronisation problems when using hugepages,
> > > since the allocator keeps its own pool of pages above the usual page
> > > allocator and does not reset the page flags when freeing a page into
> > > the pool.
> > > 
> > > This patch adds a new architecture hook, arch_clear_hugepage_flags, so
> > > that architectures which rely on the page flags being in a particular
> > > state for fresh allocations can adjust the flags accordingly when a
> > > page is freed into the pool.

You could have used __weak here quite neatly, but whatever.

> Next step: start posting the ARM code!

I suggest you keep this patch in whichever tree holds that arm code.  If
I see this patch turn up in linux-next then I'll just drop my copy,
expecting that this patch will be merged alongside the ARM changes.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH can-next v6] can: add tx/rx LED trigger support

2012-08-24 Thread Fabio Baltieri

Hello Kurt,

On Fri, Aug 24, 2012 at 02:42:48PM +0200, Kurt Van Dijck wrote:
> On Fri, Aug 24, 2012 at 01:28:16PM +0200, Marc Kleine-Budde wrote:
> > On 08/24/2012 07:10 AM, Kurt Van Dijck wrote:
> > > Hello,
> > > 
> > > I find the CAN led triggers an interesting thing.
> > > 
> > > And then, this scenario fell crossed my mind:
> > > Imagine I do:
> > > [insert CAN device: can0]
> > > $ ip link set can0 name helga
> > > [insert another CAN device: again 'can0']
> > > 
> > > Registering 'can0-tx' led trigger will fail for the second CAN device,
> > > since that led trigger name is already reserved for CAN device 'helga'.
> > Good point.

Yep, thanks for pointing that out!

Interface renaming was something I considered when I first wrote the
code and I had the mac80211-led driver in mind, as that driver uses the
phy name and not the netdev one for its triggers.

The reason why I did not care that much in the end is that on SoC based
systems trigger-led association is made at probe time, based on data
either from platform_data or devicetree, so I imagined that once the
kernel is ported to the board and default triggers are set correctly at
boot time, the userspace is free to rename CAN interfaces and nobody
should notice... :^)

The thing I did not consider are hot-plug interfaces mixed with
renaming, such as in the case you pointed out - it's probably not really
common but still possible.

> > > I'm not sure how to fix such.
> > > If 'rx' & 'tx' may be combined, reusing the netdev name may be possible?
> > > Just wild thinking ...
> > 
> > I think the device's name (not netdev) is unique in the system and
> > cannot be changed.
>
> but may contain several netdev's ...

Ouch.

> 
> > 
> > On my device tree enabled mx28 I'm talking about the "80032000.can" in:
> 
> You idea triggered another thougt: since control is put in device drivers,
> why putting the name in the generic can_dev struct?

Why not?  That makes the API easy.

> A more flexible approach to assign names is the key to success here.
> The correct 'works in all conditions' approach is not yet in my sight :-(

Agreed.

What about using a combination of device name + an optional port index
specified in devm_can_led_init()? (something like to platform_device names)
Of course that would require changing the API for libraries like
register_sja1000dev(), to add a port index.

Fabio
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 3/3] HWPOISON: prevent inode cache removal to keep AS_HWPOISON sticky

2012-08-24 Thread Naoya Horiguchi

Hello,

On Thu, Aug 23, 2012 at 04:31:43PM -0400, Naoya Horiguchi wrote:
> On Thu, Aug 23, 2012 at 05:11:25PM +0800, Fengguang Wu wrote:
> > On Wed, Aug 22, 2012 at 11:17:35AM -0400, Naoya Horiguchi wrote:
...
> > > diff --git v3.6-rc1.orig/fs/inode.c v3.6-rc1/fs/inode.c
> > > index ac8d904..8742397 100644
> > > --- v3.6-rc1.orig/fs/inode.c
> > > +++ v3.6-rc1/fs/inode.c
> > > @@ -717,6 +717,15 @@ void prune_icache_sb(struct super_block *sb, int 
> > > nr_to_scan)
> > >   }
> > >  
> > >   /*
> > > +  * Keep inode caches on memory for user processes to certainly
> > > +  * be aware of memory errors.
> > > +  */
> > > + if (unlikely(mapping_hwpoison(inode->i_mapping))) {
> > > + spin_unlock(>i_lock);
> > > + continue;
> > > + }
> > 
> > That chunk prevents reclaiming all the cached pages. However the intention
> > is only to keep the struct inode together with the hwpoison bit?
> 
> Yes, we can not reclaim pagecaches from shrink_slab(), but we can do from
> shrink_zone(). So it shouldn't happen that cached pages on hwpoisoned file
> remain for long under high memory pressure.

I might lose your point. Are you suggesting this chunk should come after
if (inode_has_buffers(inode) || inode->i_data.nrpages) { ... } block,
aren't you?  I think that's right, so I'll try and test it this weekend.

> > > + /*
> > >* Referenced or dirty inodes are still in use. Give them
> > >* another pass through the LRU as we canot reclaim them now.
> > >*/
> > > @@ -1405,6 +1414,9 @@ static void iput_final(struct inode *inode)
> > >   inode->i_state &= ~I_WILL_FREE;
> > >   }
> > >  
> > > + if (unlikely(mapping_hwpoison(inode->i_mapping) && drop))
> > > + mapping_clear_hwpoison(inode->i_mapping);
> > 
> > Is that clear necessary? Because the bit will be gone with the inode
> > struct: it's going to be de-allocated anyway.
> 
> With the chunk in prune_icache_sb() we keep the inode struct with
> AS_HWPOISON set on memory, so in order to remove it, we need explicitly
> clear the bit.
> Without this clear, the inode remains until system reboot.

And again, you are right here. Without this clear, this inode will be
cleared in destroy_inode().

Thanks,
Naoya
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2] mm/mmu_notifier: init notifier if necessary

2012-08-24 Thread Andrew Morton

On Fri, 24 Aug 2012 22:37:55 +0800
Wanpeng Li  wrote:

> From: Gavin Shan 
> 
> While registering MMU notifier, new instance of MMU notifier_mm will
> be allocated and later free'd if currrent mm_struct's MMU notifier_mm
> has been initialized. That cause some overhead. The patch tries to
> eleminate that.
> 
> Signed-off-by: Gavin Shan 
> Signed-off-by: Wanpeng Li 
> ---
>  mm/mmu_notifier.c |   22 +++---
>  1 files changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
> index 862b608..fb4067f 100644
> --- a/mm/mmu_notifier.c
> +++ b/mm/mmu_notifier.c
> @@ -192,22 +192,23 @@ static int do_mmu_notifier_register(struct mmu_notifier 
> *mn,
>  
>   BUG_ON(atomic_read(>mm_users) <= 0);
>  
> - ret = -ENOMEM;
> - mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL);
> - if (unlikely(!mmu_notifier_mm))
> - goto out;
> -
>   if (take_mmap_sem)
>   down_write(>mmap_sem);
>   ret = mm_take_all_locks(mm);
>   if (unlikely(ret))
> - goto out_cleanup;
> + goto out;
>  
>   if (!mm_has_notifiers(mm)) {
> + mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm),
> + GFP_ATOMIC);

Why was the code switched to the far weaker GFP_ATOMIC?  We can still
perform sleeping allocations inside mmap_sem.

> + if (unlikely(!mmu_notifier_mm)) {
> + ret = -ENOMEM;
> + goto out_of_mem;
> + }
>   INIT_HLIST_HEAD(_notifier_mm->list);
>   spin_lock_init(_notifier_mm->lock);
> +
>   mm->mmu_notifier_mm = mmu_notifier_mm;
> - mmu_notifier_mm = NULL;
>   }
>   atomic_inc(>mm_count);
>  

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH V4] mfd: add MAX8907 core driver

2012-08-24 Thread Stephen Warren

On 08/15/2012 10:28 AM, Stephen Warren wrote:
> From: Gyungoh Yoo 
> 
> The MAX8907 is an I2C-based power-management IC containing voltage
> regulators, a reset controller, a real-time clock, and a touch-screen
> controller.

Samuel,

Does this look OK now? (although you're probably traveling to a
conference right now...)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] fs/proc: Move kfree outside pde_unload_lock

2012-08-24 Thread Nathan Zimmer

On Fri, Aug 24, 2012 at 11:45:45AM -0500, Nathan Zimmer wrote:
> On 08/24/2012 09:58 AM, Eric Dumazet wrote:
>> Le vendredi 24 août 2012 à 09:48 -0500, Nathan Zimmer a écrit :
>>> On Wed, Aug 22, 2012 at 11:42:58PM +0200, Eric Dumazet wrote:
 On Wed, 2012-08-22 at 20:28 +0200, Eric Dumazet wrote:

> Thats interesting, but if you really want this to fly, one RCU
> conversion would be much better ;)
>
> pde_users would be an atomic_t and you would avoid the spinlock
> contention.
 Here is what I had in mind, I would be interested to know how it helps a 
 512 core machine ;)

>>> Here are the results and they look great.
>>>
>>> cpuinfo baselinemoved kfree Rcu
>>> tasks   read-secread-secread-sec
>>> 1   0.0141  0.0141  0.0141
>>> 2   0.0140  0.0140  0.0142
>>> 4   0.0140  0.0141  0.0141
>>> 8   0.0145  0.0145  0.0140
>>> 16  0.0553  0.0548  0.0168
>>> 32  0.1688  0.1622  0.0549
>>> 64  0.5017  0.3856  0.1690
>>> 128 1.7005  0.9710  0.5038
>>> 256 5.2513  2.6519  2.0804
>>> 512 8.0529  6.2976  3.0162
>>>
>>>
>>>
>> Indeed...
>>
>> Could you explicit the test you are actually doing ?
>>
>> Thanks
>>
>>
>
>
> It is a dead simple test.
> The test starts by forking off X number of tasks
> assigning each their own cpu.
> Each task then allocs a bit of memory.
> All tasks wait on a memory cell for the go order.
> We measure the read time starting here.
> Once the go order is given they all read a chunk of the selected proc file.
> I was using /proc/cpuinfo to test.
> Once everyone has finished we take the end read time.
>

Here is the text for those who are curious.


/**/
char *helpstr[] = {
	"This test program is a generic template.",
	0
};

#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 
#include 
#include 
#include 

//#include "setup.h"

#define MAXCPUS 4096
#define perrorx(s)  do { perror(s); exit(1);} while(0)
#define mb()asm volatile("mfence":::"memory")
#define barrier()   asm volatile("": : :"memory")
#define cpu_relax() asm volatile ("rep;nop":::"memory");


extern int optind, opterr;
extern char *optarg;

static int verbose = 0;
static int header = 0;
static char *file = "/proc/stat";
static int numtasks = 1;
static int repeat = 1;
static int bufsize = 1024;

struct control_s {
	int ready;
	int done;
	int go;
	int exit;
} *cntl;


static cpu_set_t *defmask;
static int cpu_set_size;

static void runon_init(void)
{
if (!defmask) {
cpu_set_size = CPU_ALLOC_SIZE(MAXCPUS);
defmask = CPU_ALLOC(MAXCPUS);
if (sched_getaffinity(0, cpu_set_size, defmask) < 0)
perrorx("unexpected failure in runon_init");
}
}


static double timeInSeconds(long time_in_microseconds)
{
double temp;

temp = time_in_microseconds;
temp /= 100;

return temp;
}

static int runon(int cpu)
{
cpu_set_t *mask;

	runon_init();
mask = CPU_ALLOC(MAXCPUS);
if (cpu < 0 || cpu >= MAXCPUS)
return -1;
CPU_ZERO_S(cpu_set_size, mask);
CPU_SET_S(cpu, cpu_set_size, mask);
if (sched_setaffinity(0, cpu_set_size, mask) < 0)
return -1;
CPU_FREE(mask);
return 0;
}

static long getCurrentTime()
{
struct timeval tp;
long usec;

mb();
gettimeofday(, 0);
usec = tp.tv_sec * 100 + tp.tv_usec;
mb();
return usec;
}


static void do_help(void)
{
	char **p;

	for (p = helpstr; *p; p++)
		printf("%s\n", *p);
	exit(0);
}

static void slave(int id)
{
	FILE *f;
	int i;
	char *buf;

	runon(id);
	buf = malloc(bufsize);
	memset(buf, 0, bufsize);

	if ((f = fopen(file, "r")) < 0)
		perrorx("open failed");
	while (fgets(buf, bufsize, f) != NULL) {
	}
	fclose(f);

	(void)__sync_fetch_and_add(>ready, 1);
	while (!cntl->go)
		cpu_relax();

	for (i = 0; i < repeat; i++) {
		if ((f = fopen(file, "r")) < 0)
			perrorx("open failed");
		while (fgets(buf, bufsize, f) != NULL) {
		}
		fclose(f);
		barrier();
	}

	(void)__sync_fetch_and_add(>done, 1);
	while (!cntl->exit)
		cpu_relax();
	exit(0);
}

int main(int argc, char **argv)
{
	int i, c, stat, er = 0;
	static char optstr[] = "b:f:hn:r:v";
	unsigned long t, tfork, tready, tread, texit;

	opterr = 1;
	while ((c = getopt(argc, argv, optstr)) != EOF)
		switch (c) {
		case 'b':
			bufsize = atoi(optarg);
			break;
		case 'f':
			file = optarg;
			break;
		case 'h':
			header++;
			break;
		case 'n':
			numtasks = atoi(optarg);
			break;
		case 'r':
			repeat = atoi(optarg);
			break;
		case 'v':
			verbose++;
			break;
		case '?':

Re: [PATCH 0/6] x86, fpu: cleanups, introduce non-lazy FPU restore for xsave

2012-08-24 Thread H. Peter Anvin

I have applied this to tip:x86/fpu, but I have also asked Suresh to
prepare a followon patch to decouple eager save from the existence of
the XSAVE instruction.  It seems pretty clear that eager save is a net
benefit in the presence of the XSAVEOPT, but it isn't as clear for only
having XSAVE, as far as I can tell.  Either way it would seem to be a
policy decision that is somewhat separate from the exact instruction.

-hpa
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Logitech USB headset not working in 3.6-rc3

2012-08-24 Thread Daniel Mack

On Fri, Aug 24, 2012 at 9:08 PM, Josh Boyer  wrote:
> Hi All,
>
> We've had a report[1] that the Logitech USB headset 0003:046D:0A0C isn't
> working with 3.6-rc3.  It seems the last working kernel was based on
> commit 10c63c9, and it first stopped working with a kernel based on
> commit 23dcfa6.  There are only a few ALSA commits between those
> revisions, so hopefully this is something that is fairly easy to
> identify.  The only commit to USB audio in that set is:
>
> commit e9ba389c5ffc4dd29dfe17e00e4887730235
> Author: Takashi Iwai 
> Date:   Wed Aug 15 12:32:00 2012 +0200
>
> ALSA: usb-audio: Fix scheduling-while-atomic bug in PCM capture stream
>
>
> I've CC'd the reporter and attached the alsa-info is below.

Does it work again once you revert that commit?


Daniel




> [1] https://bugzilla.redhat.com/show_bug.cgi?id=851619
>
> upload=true=true=
> !!
> !!ALSA Information Script v 0.4.60
> !!
>
> !!Script ran on: Fri Aug 24 18:35:42 UTC 2012
>
>
> !!Linux Distribution
> !!--
>
> Fedora release 18 (Rawhide) Fedora release 18 (Rawhide) NAME=Fedora ID=fedora 
> PRETTY_NAME="Fedora 18 (Rawhide)" CPE_NAME="cpe:/o:fedoraproject:fedora:18" 
> Fedora release 18 (Rawhide) Fedora release 18 (Rawhide)
>
>
> !!DMI Information
> !!---
>
> Manufacturer:  Dell Inc.
> Product Name:  Precision WorkStation 690
> Product Version:
>
>
> !!Kernel Information
> !!--
>
> Kernel release:3.6.0-0.rc3.git0.1.fc18.x86_64
> Operating System:  GNU/Linux
> Architecture:  x86_64
> Processor: x86_64
> SMP Enabled:   Yes
>
>
> !!ALSA Version
> !!
>
> Driver version: 1.0.25
> Library version:1.0.25
> Utilities version:  1.0.25
>
>
> !!Loaded ALSA modules
> !!---
>
> snd_hda_intel
> snd_usb_audio
>
>
> !!Sound Servers on this system
> !!
>
> Pulseaudio:
>   Installed - Yes (/usr/bin/pulseaudio)
>   Running - Yes
>
> aRts:
>   Installed - Yes (/usr/bin/artsd)
>   Running - No
>
> Jack:
>   Installed - Yes (/usr/bin/jackd)
>   Running - No
>
>
> !!Soundcards recognised by ALSA
> !!-
>
>  0 [Intel  ]: HDA-Intel - HDA Intel
>   HDA Intel at 0xfcffc000 irq 66
>  1 [Headset]: USB-Audio - Logitech USB Headset
>   Logitech Logitech USB Headset at usb-:00:1d.7-6.4, 
> full speed
>
>
> !!PCI Soundcards installed in the system
> !!--
>
> 00:1b.0 Audio device: Intel Corporation 631xESB/632xESB High Definition Audio 
> Controller (rev 09)
>
>
> !!Advanced information - PCI Vendor/Device/Subsystem ID's
> !!
>
> 00:1b.0 0403: 8086:269a (rev 09)
> Subsystem: 1028:01c0
>
>
> !!Loaded sound module options
> !!--
>
> !!Module: snd_hda_intel
> align_buffer_size : -1
> bdl_pos_adj : 
> 1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
> beep_mode : 
> N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
> enable : 
> Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y
> enable_msi : -1
> id : 
> (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null)
> index : 
> -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
> model : 
> (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null)
> patch : 
> (null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null),(null)
> position_fix : 
> 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
> power_save : 0
> power_save_controller : Y
> probe_mask : 
> -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
> probe_only : 
> 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
> single_cmd : N
> snoop : Y
>
> !!Module: snd_usb_audio
> async_unlink : Y
> device_setup : 
> 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
> enable : 
> Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y,Y
> id : 
>

Re: [PATCH 00/11] rcu: Add missing RCU idle APIs on idle loop v2

2012-08-24 Thread Paul E. McKenney

On Thu, Aug 23, 2012 at 04:58:24PM +0200, Frederic Weisbecker wrote:
> Hi,
> 
> Changes since v1:
> 
> - Fixed preempt handling in alpha idle loop
> - added ack from Geert
> - fixed stable email address, sorry :-/
> 
> This time I built tested everywhere but: h8300 (compiler internal error),
> and mn10300, parisc, score (cross compilers not available in
> ftp://ftp.kernel.org/pub/tools/crosstool/files/bin/x86_64/4.6.3/)
> 
> For testing, you can pull from:
> 
> git://github.com/fweisbec/linux-dynticks.git
>   rcu/idle-fix-v2 
> 
> Thanks.

I have queued these on -rcu branch rcu/idle:

git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu.git

This problem has been in place since 3.3, so it is hard to argue that
it is a regression for this merge window.  I have therefore queued it
for 3.7.

Thanx, Paul

> Frederic Weisbecker (11):
>   alpha: Fix preemption handling in idle loop
>   alpha: Add missing RCU idle APIs on idle loop
>   cris: Add missing RCU idle APIs on idle loop
>   frv: Add missing RCU idle APIs on idle loop
>   h8300: Add missing RCU idle APIs on idle loop
>   m32r: Add missing RCU idle APIs on idle loop
>   m68k: Add missing RCU idle APIs on idle loop
>   mn10300: Add missing RCU idle APIs on idle loop
>   parisc: Add missing RCU idle APIs on idle loop
>   score: Add missing RCU idle APIs on idle loop
>   xtensa: Add missing RCU idle APIs on idle loop
> 
>  arch/alpha/kernel/process.c   |6 +-
>  arch/alpha/kernel/smp.c   |1 +
>  arch/cris/kernel/process.c|3 +++
>  arch/frv/kernel/process.c |3 +++
>  arch/h8300/kernel/process.c   |3 +++
>  arch/m32r/kernel/process.c|3 +++
>  arch/m68k/kernel/process.c|3 +++
>  arch/mn10300/kernel/process.c |3 +++
>  arch/parisc/kernel/process.c  |3 +++
>  arch/score/kernel/process.c   |4 +++-
>  arch/xtensa/kernel/process.c  |3 +++
>  11 files changed, 33 insertions(+), 2 deletions(-)
> 
> -- 
> 1.7.5.4
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 03/23] serial: omap: don't access the platform_device

2012-08-24 Thread Tony Lindgren

* Felipe Balbi  [120823 03:37]:
> The driver doesn't need to know about its platform_device.
> 
> Everything the driver needs can be done through the
> struct device pointer. In case we need to use the
> OMAP-specific PM function pointers, those can make
> sure to find the device's platform_device pointer
> so they can find the struct omap_device through
> pdev->archdata field.
> 
> Tested-by: Shubhrajyoti D 
> Acked-by: Santosh Shilimkar 
> Signed-off-by: Felipe Balbi 

Acked-by: Tony Lindgren 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 01/17] hashtable: introduce a small and naive hashtable

2012-08-24 Thread Tejun Heo

Hello,

On Fri, Aug 24, 2012 at 10:53:45PM +0200, Sasha Levin wrote:
> Yup, but we could be using the same API for dynamic non-resizable and static 
> if
> we go with the DECLARE/hash_init. We could switch between them (and other
> implementations) without having to change the code.

I think it's better to stick with the usual conventions.

> > * DECLARE/DEFINE
> > * hash_head()
> > * hash_for_each_head()
> > * hash_add*()
> > * hash_for_each_possible*()
>  * hash_for_each*() ?
> 
> Why do we need hash_head/hash_for_each_head()? I haven't stumbled on a place 
> yet
> that needed direct access to the bucket itself.

Because whole hash table walking is much less common and we can avoid
another full set of iterators.

> This basically means 11 macros/functions that would let us have full
> encapsulation and will make it very easy for future implementations to work 
> with
> this API instead of making up a new one. It's also not significantly (+~2-3)
> more than the ones you listed.

I'm not sure whether full encapsulation is a good idea for trivial
hashtable.  For higher level stuff, sure but at this level I think
benefits coming from known obvious implementation can be larger.
e.g. suppose the caller knows certain entries to be way colder than
others and wants to put them at the end of the chain.

So, I think implmenting the minimal set of helpers which reflect the
underlying trivial implementation explicitly could actually be better
even when discounting the reduced number of wrappers.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 02/14] aoe: kernel thread handles I/O completions for simple locking

2012-08-24 Thread Andrew Morton

On Fri, 17 Aug 2012 21:24:08 -0400
Ed Cashin  wrote:

> This patch makes the frames the aoe driver uses to track the
> relationship between bios and packets more flexible and detached, so
> that they can be passed to an "aoe_ktio" thread for completion of I/O.
> 
> The frames are handled much like skbs, with a capped amount of
> preallocation so that real-world use cases are likely to run smoothly
> and degenerate gracefully even under memory pressure.
> 
> Decoupling I/O completion from the receive path and serializing it in
> a process makes it easier to think about the correctness of the
> locking in the driver, especially in the case of a remote MAC address
> becoming unusable.
> 
> ...
>
> +static int
> +kthread(void *vp)
> +{
> + struct ktstate *k;
> + DECLARE_WAITQUEUE(wait, current);
> + sigset_t blocked;
> + int more;
> +
> + k = vp;
> +#ifdef PF_NOFREEZE

PF_NOFREEZE can never be undefined.

> + current->flags |= PF_NOFREEZE;
> +#endif
> + set_user_nice(current, -10);
> + sigfillset();
> + sigprocmask(SIG_BLOCK, , NULL);
> + flush_signals(current);

This is a kernel thread - it shouldn't need to fiddle with signals.

> + complete(>rendez);

That's odd.  Why do a complete() before we even start?  A code comment
is needed if this is indeed correct.

> + do {
> + __set_current_state(TASK_UNINTERRUPTIBLE);

I think this statement is simply unneeded.

> + spin_lock_irq(k->lock);
> + more = k->fn();
> + if (!more) {
> + add_wait_queue(k->waitq, );
> + __set_current_state(TASK_INTERRUPTIBLE);
> + }
> + spin_unlock_irq(k->lock);
> + if (!more) {
> + schedule();
> + remove_wait_queue(k->waitq, );
> + } else
> + cond_resched();

Here we can do a cond_resched() when in state TASK_INTERRUPTIBLE.  Such
a schedule() will never return unless some other thread flips this task
into state TASK_RUNNING.  But if another thread does that, we should
have been on that waitqueue!

It seems all confused and racy.

> + } while (!kthread_should_stop());
> + __set_current_state(TASK_RUNNING);

I don't think there's any path by which we can get here in any state
other than TASK_RUNNING.

> + complete(>rendez);
> + return 0;
> +}

This function might be a bit neater if it were to use
prepare_to_wait()/finish_wait().

> +static void
> +aoe_ktstop(struct ktstate *k)
> +{
> + kthread_stop(k->task);
> + wait_for_completion(>rendez);
> +}
> +
> +static int
> +aoe_ktstart(struct ktstate *k)
> +{
> + struct task_struct *task;
> +
> + init_completion(>rendez);
> + task = kthread_run(kthread, k, k->name);
> + if (task == NULL || IS_ERR(task))
> + return -EFAULT;

EFAULT makes no sense?

> + k->task = task;
> + wait_for_completion(>rendez);
> + init_completion(>rendez);/* for exit */
> + return 0;
> +}
>
> ...
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/6] x86, fpu: drop_fpu() before restoring new state from sigframe

2012-08-24 Thread Suresh Siddha

No need to save the state with unlazy_fpu(), that is about to get overwritten
by the state from the signal frame. Instead use drop_fpu() and continue
to restore the new state.

Also fold the stop_fpu_preload() into drop_fpu().

Signed-off-by: Suresh Siddha 
---
 arch/x86/include/asm/fpu-internal.h |7 +--
 arch/x86/kernel/xsave.c |8 +++-
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h 
b/arch/x86/include/asm/fpu-internal.h
index ba83a08..fe95ad0 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -448,17 +448,12 @@ static inline void save_init_fpu(struct task_struct *tsk)
preempt_enable();
 }
 
-static inline void stop_fpu_preload(struct task_struct *tsk)
-{
-   tsk->fpu_counter = 0;
-}
-
 static inline void drop_fpu(struct task_struct *tsk)
 {
/*
 * Forget coprocessor state..
 */
-   stop_fpu_preload(tsk);
+   tsk->fpu_counter = 0;
preempt_disable();
__drop_fpu(tsk);
preempt_enable();
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index a23d100..6cfc7d9 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -381,16 +381,14 @@ int __restore_xstate_sig(void __user *buf, void __user 
*buf_fx, int size)
struct xsave_struct *xsave = >thread.fpu.state->xsave;
struct user_i387_ia32_struct env;
 
-   stop_fpu_preload(tsk);
-   unlazy_fpu(tsk);
+   drop_fpu(tsk);
 
if (__copy_from_user(xsave, buf_fx, state_size) ||
-   __copy_from_user(, buf, sizeof(env))) {
-   drop_fpu(tsk);
+   __copy_from_user(, buf, sizeof(env)))
return -1;
-   }
 
sanitize_restored_xstate(tsk, , xstate_bv, fx_only);
+   set_used_math();
} else {
/*
 * For 64-bit frames and 32-bit fsave frames, restore the user
-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 3/6] x86, kvm: use kernel_fpu_begin/end() in kvm_load/put_guest_fpu()

2012-08-24 Thread Suresh Siddha

kvm's guest fpu save/restore should be wrapped around
kernel_fpu_begin/end(). This will avoid for example taking a DNA
in kvm_load_guest_fpu() when it tries to load the fpu immediately
after doing unlazy_fpu() on the host side.

More importantly this will prevent the host process fpu from being
corrupted.

Signed-off-by: Suresh Siddha 
Cc: Avi Kivity 
---
 arch/x86/kvm/x86.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42bce48..67e773c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5969,7 +5969,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 */
kvm_put_guest_xcr0(vcpu);
vcpu->guest_fpu_loaded = 1;
-   unlazy_fpu(current);
+   kernel_fpu_begin();
fpu_restore_checking(>arch.guest_fpu);
trace_kvm_fpu(1);
 }
@@ -5983,6 +5983,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 
vcpu->guest_fpu_loaded = 0;
fpu_save_init(>arch.guest_fpu);
+   kernel_fpu_end();
++vcpu->stat.fpu_reload;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0);
-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 4/6] x86, fpu: always use kernel_fpu_begin/end() for in-kernel FPU usage

2012-08-24 Thread Suresh Siddha

use kernel_fpu_begin/end() instead of unconditionally accessing cr0 and
saving/restoring just the few used xmm/ymm registers.

This has some advantages like:
* If the task's FPU state is already active, then kernel_fpu_begin()
  will just save the user-state and avoiding the read/write of cr0.
  In general, cr0 accesses are much slower.

* Manual save/restore of xmm/ymm registers will affect the 'modified' and
  the 'init' optimizations brought in the by xsaveopt/xrstor
  infrastructure.

* Foward compatibility with future vector register extensions will be a
  problem if the xmm/ymm registers are manually saved and restored
  (corrupting the extended state of those vector registers).

With this patch, there was no significant difference in the xor throughput
using AVX, measured during boot.

Signed-off-by: Suresh Siddha 
Cc: Jim Kukunas 
Cc: NeilBrown 
---
 arch/x86/include/asm/xor_32.h  |   56 +---
 arch/x86/include/asm/xor_64.h  |   61 ++--
 arch/x86/include/asm/xor_avx.h |   54 ---
 3 files changed, 29 insertions(+), 142 deletions(-)

diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index 4545708..aabd585 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -534,38 +534,6 @@ static struct xor_block_template xor_block_p5_mmx = {
  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
  */
 
-#define XMMS_SAVE  \
-do {   \
-   preempt_disable();  \
-   cr0 = read_cr0();   \
-   clts(); \
-   asm volatile(   \
-   "movups %%xmm0,(%0) ;\n\t"  \
-   "movups %%xmm1,0x10(%0) ;\n\t"  \
-   "movups %%xmm2,0x20(%0) ;\n\t"  \
-   "movups %%xmm3,0x30(%0) ;\n\t"  \
-   :   \
-   : "r" (xmm_save)\
-   : "memory");\
-} while (0)
-
-#define XMMS_RESTORE   \
-do {   \
-   asm volatile(   \
-   "sfence ;\n\t"  \
-   "movups (%0),%%xmm0 ;\n\t"  \
-   "movups 0x10(%0),%%xmm1 ;\n\t"  \
-   "movups 0x20(%0),%%xmm2 ;\n\t"  \
-   "movups 0x30(%0),%%xmm3 ;\n\t"  \
-   :   \
-   : "r" (xmm_save)\
-   : "memory");\
-   write_cr0(cr0); \
-   preempt_enable();   \
-} while (0)
-
-#define ALIGN16 __attribute__((aligned(16)))
-
 #define OFFS(x)"16*("#x")"
 #define PF_OFFS(x) "256+16*("#x")"
 #definePF0(x)  "   prefetchnta "PF_OFFS(x)"(%1)
;\n"
@@ -587,10 +555,8 @@ static void
 xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
asm volatile(
 #undef BLOCK
@@ -633,7 +599,7 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned 
long *p2)
:
: "memory");
 
-   XMMS_RESTORE;
+   kernel_fpu_end();
 }
 
 static void
@@ -641,10 +607,8 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
  unsigned long *p3)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
asm volatile(
 #undef BLOCK
@@ -694,7 +658,7 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
:
: "memory" );
 
-   XMMS_RESTORE;
+   kernel_fpu_end();
 }
 
 static void
@@ -702,10 +666,8 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
  unsigned long *p3, unsigned long *p4)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
asm volatile(
 #undef BLOCK
@@ -762,7 +724,7 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
:
: "memory" );
 
-   XMMS_RESTORE;
+   kernel_fpu_end();
 }
 
 static void
@@ -770,10 +732,8 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned 
long *p2,
  unsigned long *p3, unsigned long *p4, unsigned long *p5)
 {
unsigned long lines = bytes >> 8;
-   char xmm_save[16*4] ALIGN16;
-   int cr0;
 
-   XMMS_SAVE;
+   kernel_fpu_begin();
 
/* Make sure GCC forgets anything it knows about p4 or p5,
   such that it won't pass to the asm volatile below a
@@ -850,7 +810,7 @@ xor_sse_5(unsigned long

[PATCH 6/6] x86, fpu: use non-lazy fpu restore for processors supporting xsave

2012-08-24 Thread Suresh Siddha

Fundamental model of the current Linux kernel is to lazily init and
restore FPU instead of restoring the task state during context switch.
This changes that fundamental lazy model to the non-lazy model for
the processors supporting xsave feature.

Reasons driving this model change are:

i. Newer processors support optimized state save/restore using xsaveopt and
xrstor by tracking the INIT state and MODIFIED state during context-switch.
This is faster than modifying the cr0.TS bit which has serializing semantics.

ii. Newer glibc versions use SSE for some of the optimized copy/clear routines.
With certain workloads (like boot, kernel-compilation etc), application
completes its work with in the first 5 task switches, thus taking upto 5 #DNA
traps with the kernel not getting a chance to apply the above mentioned
pre-load heuristic.

iii. Some xstate features (like AMD's LWP feature) don't honor the cr0.TS bit
and thus will not work correctly in the presence of lazy restore. Non-lazy
state restore is needed for enabling such features.

Some data on a two socket SNB system:
 * Saved 20K DNA exceptions during boot on a two socket SNB system.
 * Saved 50K DNA exceptions during kernel-compilation workload.
 * Improved throughput of the AVX based checksumming function inside the
   kernel by ~15% as xsave/xrstor is faster than the serializing clts/stts
   pair.

Signed-off-by: Suresh Siddha 
Cc: Jim Kukunas 
Cc: NeilBrown 
Cc: Avi Kivity 
---
 arch/x86/include/asm/fpu-internal.h |   96 +++
 arch/x86/include/asm/i387.h |1 +
 arch/x86/include/asm/xsave.h|1 +
 arch/x86/kernel/i387.c  |   20 ++-
 arch/x86/kernel/process.c   |   12 +++--
 arch/x86/kernel/process_32.c|4 --
 arch/x86/kernel/process_64.c|4 --
 arch/x86/kernel/traps.c |5 ++-
 arch/x86/kernel/xsave.c |   57 +
 9 files changed, 140 insertions(+), 60 deletions(-)

diff --git a/arch/x86/include/asm/fpu-internal.h 
b/arch/x86/include/asm/fpu-internal.h
index fac39e9..e31cc6e 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -291,15 +291,48 @@ static inline void __thread_set_has_fpu(struct 
task_struct *tsk)
 static inline void __thread_fpu_end(struct task_struct *tsk)
 {
__thread_clear_has_fpu(tsk);
-   stts();
+   if (!use_xsave())
+   stts();
 }
 
 static inline void __thread_fpu_begin(struct task_struct *tsk)
 {
-   clts();
+   if (!use_xsave())
+   clts();
__thread_set_has_fpu(tsk);
 }
 
+static inline void __drop_fpu(struct task_struct *tsk)
+{
+   if (__thread_has_fpu(tsk)) {
+   /* Ignore delayed exceptions from user space */
+   asm volatile("1: fwait\n"
+"2:\n"
+_ASM_EXTABLE(1b, 2b));
+   __thread_fpu_end(tsk);
+   }
+}
+
+static inline void drop_fpu(struct task_struct *tsk)
+{
+   /*
+* Forget coprocessor state..
+*/
+   preempt_disable();
+   tsk->fpu_counter = 0;
+   __drop_fpu(tsk);
+   clear_used_math();
+   preempt_enable();
+}
+
+static inline void drop_init_fpu(struct task_struct *tsk)
+{
+   if (!use_xsave())
+   drop_fpu(tsk);
+   else
+   xrstor_state(init_xstate_buf, -1);
+}
+
 /*
  * FPU state switching for scheduling.
  *
@@ -333,7 +366,12 @@ static inline fpu_switch_t switch_fpu_prepare(struct 
task_struct *old, struct ta
 {
fpu_switch_t fpu;
 
-   fpu.preload = tsk_used_math(new) && new->fpu_counter > 5;
+   /*
+* If the task has used the math, pre-load the FPU on xsave processors
+* or if the past 5 consecutive context-switches used math.
+*/
+   fpu.preload = tsk_used_math(new) && (use_xsave() ||
+new->fpu_counter > 5);
if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
@@ -345,14 +383,14 @@ static inline fpu_switch_t switch_fpu_prepare(struct 
task_struct *old, struct ta
new->fpu_counter++;
__thread_set_has_fpu(new);
prefetch(new->thread.fpu.state);
-   } else
+   } else if (!use_xsave())
stts();
} else {
old->fpu_counter = 0;
old->thread.fpu.last_cpu = ~0;
if (fpu.preload) {
new->fpu_counter++;
-   if (fpu_lazy_restore(new, cpu))
+   if (!use_xsave() && fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
@@ -372,7 +410,7 @@ static inline void switch_fpu_finish(struct task_struct 
*new, fpu_switch_t fpu)
 {
if

[PATCH 5/6] lguest, x86: handle guest TS bit for lazy/non-lazy fpu host models

2012-08-24 Thread Suresh Siddha

Instead of using unlazy_fpu() check if user_has_fpu() and set/clear
the host TS bits so that the lguest works fine with both the
lazy/non-lazy FPU host models with minimal changes.

Signed-off-by: Suresh Siddha 
Cc: Rusty Russell 
---
 drivers/lguest/x86/core.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 39809035..4af12e1 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -203,8 +203,8 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 * we set it now, so we can trap and pass that trap to the Guest if it
 * uses the FPU.
 */
-   if (cpu->ts)
-   unlazy_fpu(current);
+   if (cpu->ts && user_has_fpu())
+   stts();
 
/*
 * SYSENTER is an optimized way of doing system calls.  We can't allow
@@ -234,6 +234,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 if (boot_cpu_has(X86_FEATURE_SEP))
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 
+   /* Clear the host TS bit if it was set above. */
+   if (cpu->ts && user_has_fpu())
+   clts();
+
/*
 * If the Guest page faulted, then the cr2 register will tell us the
 * bad virtual address.  We have to grab this now, because once we
@@ -249,7 +253,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 * a different CPU. So all the critical stuff should be done
 * before this.
 */
-   else if (cpu->regs->trapnum == 7)
+   else if (cpu->regs->trapnum == 7 && !user_has_fpu())
math_state_restore();
 }
 
-- 
1.7.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 980 matches

Mail list logo