from:"Chen Yucong"

[PATCH] cpu/hotplug: print warning message in proper places for online or offline cpu

2016-05-04 Thread Chen Yucong

When we online or offline a CPU, unexpected errors may occur for
a number of reasons. So if a function called within _cpu_down() or
_cpu_up() returns a error code indicating a failed operation, the
warning message should be printed rather than fallow the failed
__cpu_notify().

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 kernel/cpu.c |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3e3f6e4..b8a2d2a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -317,8 +317,8 @@ static int notify_prepare(unsigned int cpu)
ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, _calls);
if (ret) {
nr_calls--;
-   printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
-   __func__, cpu);
+   pr_warn("%s: attempt to bring up CPU %u failed\n",
+   __func__, cpu);
__cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
}
return ret;
@@ -353,6 +353,8 @@ static int bringup_cpu(unsigned int cpu)
ret = __cpu_up(cpu, idle);
if (ret) {
cpu_notify(CPU_UP_CANCELED, cpu);
+   pr_warn("%s: attempt to bring up CPU %u failed\n",
+   __func__, cpu);
return ret;
}
ret = bringup_wait_for_ap(cpu);
@@ -662,7 +664,7 @@ static int notify_down_prepare(unsigned int cpu)
nr_calls--;
__cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
pr_warn("%s: attempt to take down CPU %u failed\n",
-   __func__, cpu);
+   __func__, cpu);
}
return err;
 }
@@ -737,6 +739,8 @@ static int takedown_cpu(unsigned int cpu)
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
kthread_unpark(per_cpu_ptr(_state, cpu)->thread);
+   pr_warn("%s: attempt to take down CPU %u failed\n",
+   __func__, cpu);
return err;
}
BUG_ON(cpu_online(cpu));
-- 
1.7.10.4

[PATCH] cpu/hotplug: print warning message in proper places for online or offline cpu

2016-05-04 Thread Chen Yucong

When we online or offline a CPU, unexpected errors may occur for
a number of reasons. So if a function called within _cpu_down() or
_cpu_up() returns a error code indicating a failed operation, the
warning message should be printed rather than fallow the failed
__cpu_notify().

Signed-off-by: Chen Yucong 
---
 kernel/cpu.c |   10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3e3f6e4..b8a2d2a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -317,8 +317,8 @@ static int notify_prepare(unsigned int cpu)
ret = __cpu_notify(CPU_UP_PREPARE, cpu, -1, _calls);
if (ret) {
nr_calls--;
-   printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n",
-   __func__, cpu);
+   pr_warn("%s: attempt to bring up CPU %u failed\n",
+   __func__, cpu);
__cpu_notify(CPU_UP_CANCELED, cpu, nr_calls, NULL);
}
return ret;
@@ -353,6 +353,8 @@ static int bringup_cpu(unsigned int cpu)
ret = __cpu_up(cpu, idle);
if (ret) {
cpu_notify(CPU_UP_CANCELED, cpu);
+   pr_warn("%s: attempt to bring up CPU %u failed\n",
+   __func__, cpu);
return ret;
}
ret = bringup_wait_for_ap(cpu);
@@ -662,7 +664,7 @@ static int notify_down_prepare(unsigned int cpu)
nr_calls--;
__cpu_notify(CPU_DOWN_FAILED, cpu, nr_calls, NULL);
pr_warn("%s: attempt to take down CPU %u failed\n",
-   __func__, cpu);
+   __func__, cpu);
}
return err;
 }
@@ -737,6 +739,8 @@ static int takedown_cpu(unsigned int cpu)
irq_unlock_sparse();
/* Unpark the hotplug thread so we can rollback there */
kthread_unpark(per_cpu_ptr(_state, cpu)->thread);
+   pr_warn("%s: attempt to take down CPU %u failed\n",
+   __func__, cpu);
return err;
}
BUG_ON(cpu_online(cpu));
-- 
1.7.10.4

[PATCH] mm/memory_failure: unify the output-prefix for printk()

2016-04-27 Thread Chen Yucong

This patch aims to replace 'MCE' that was introduced by
'commit c2200538d89d ("mm/memory-failure: fix race with
compound page split/merge")' with 'Memory failure'.[1]

[1] https://lkml.org/lkml/2016/4/18/894

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 mm/memory-failure.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 839aa53..2fcca6b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -894,7 +894,8 @@ int get_hwpoison_page(struct page *page)
if (head == compound_head(page))
return 1;
 
-   pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page));
+   pr_info("Memory failure: %#lx cannot catch tail\n",
+   page_to_pfn(page));
put_page(head);
}
 
-- 
1.8.3.1

[PATCH] mm/memory_failure: unify the output-prefix for printk()

2016-04-27 Thread Chen Yucong

This patch aims to replace 'MCE' that was introduced by
'commit c2200538d89d ("mm/memory-failure: fix race with
compound page split/merge")' with 'Memory failure'.[1]

[1] https://lkml.org/lkml/2016/4/18/894

Signed-off-by: Chen Yucong 
---
 mm/memory-failure.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 839aa53..2fcca6b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -894,7 +894,8 @@ int get_hwpoison_page(struct page *page)
if (head == compound_head(page))
return 1;
 
-   pr_info("MCE: %#lx cannot catch tail\n", page_to_pfn(page));
+   pr_info("Memory failure: %#lx cannot catch tail\n",
+   page_to_pfn(page));
put_page(head);
}
 
-- 
1.8.3.1

[PATCH] memory failure: replace 'MCE' with 'Memory failure'

2016-04-08 Thread Chen Yucong

HWPoison was specific to some particular x86 platforms.
And it is often seen as high level machine check handler.
And therefore, 'MCE' is used for the format prefix of
printk(). However, 'PowerNV' has also used HWPoison for
handling memory errors[1], so 'MCE' is no longer suitable
to memory_failure.c.

Additionally, 'MCE' and 'Memory failure' have different
context. The former belongs to exception context and the
latter belongs to process context. Furthermore, HWPoison
can also be used for off-lining those sub-health pages
that do not trigger any machine check exception.

This patch aims to replace 'MCE' with a more appropriate prefix.

[1] commit 75eb3d9b60c2 ("powerpc/powernv: Get FSP memory errors
and plumb into memory poison infrastructure.")

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 mm/memory-failure.c |   69 ---
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 78f5f26..e9752f4 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -184,8 +184,8 @@ static int kill_proc(struct task_struct *t, unsigned long 
addr, int trapno,
struct siginfo si;
int ret;
 
-   pr_err("MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
-  pfn, t->comm, t->pid);
+   pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory 
corruption\n",
+   pfn, t->comm, t->pid);
si.si_signo = SIGBUS;
si.si_errno = 0;
si.si_addr = (void *)addr;
@@ -208,7 +208,7 @@ static int kill_proc(struct task_struct *t, unsigned long 
addr, int trapno,
ret = send_sig_info(SIGBUS, , t);  /* synchronous? */
}
if (ret < 0)
-   pr_info("MCE: Error sending signal to %s:%d: %d\n",
+   pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
return ret;
 }
@@ -289,7 +289,7 @@ static void add_to_kill(struct task_struct *tsk, struct 
page *p,
} else {
tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
if (!tk) {
-   pr_err("MCE: Out of memory while machine check 
handling\n");
+   pr_err("Memory failure: Out of memory while machine 
check handling\n");
return;
}
}
@@ -303,7 +303,7 @@ static void add_to_kill(struct task_struct *tsk, struct 
page *p,
 * a SIGKILL because the error is not contained anymore.
 */
if (tk->addr == -EFAULT) {
-   pr_info("MCE: Unable to find user space address %lx in %s\n",
+   pr_info("Memory failure: Unable to find user space address %lx 
in %s\n",
page_to_pfn(p), tsk->comm);
tk->addr_valid = 0;
}
@@ -334,7 +334,7 @@ static void kill_procs(struct list_head *to_kill, int 
forcekill, int trapno,
 * signal and then access the memory. Just kill it.
 */
if (fail || tk->addr_valid == 0) {
-   pr_err("MCE %#lx: forcibly killing %s:%d 
because of failure to unmap corrupted page\n",
+   pr_err("Memory failure: %#lx: forcibly killing 
%s:%d because of failure to unmap corrupted page\n",
   pfn, tk->tsk->comm, tk->tsk->pid);
force_sig(SIGKILL, tk->tsk);
}
@@ -347,7 +347,7 @@ static void kill_procs(struct list_head *to_kill, int 
forcekill, int trapno,
 */
else if (kill_proc(tk->tsk, tk->addr, trapno,
  pfn, page, flags) < 0)
-   pr_err("MCE %#lx: Cannot send advisory machine 
check signal to %s:%d\n",
+   pr_err("Memory failure: %#lx: Cannot send 
advisory machine check signal to %s:%d\n",
   pfn, tk->tsk->comm, tk->tsk->pid);
}
put_task_struct(tk->tsk);
@@ -559,7 +559,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
  */
 static int me_unknown(struct page *p, unsigned long pfn)
 {
-   pr_err("MCE %#lx: Unknown page state\n", pfn);
+   pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
return MF_FAILED;
 }
 
@@ -604,11 +604,12 @@ static int me_pagecache_clean(struct page *p, unsigned 
long pfn)
if (mapping->a_ops->error_remove_page) {
err = mapping->a_ops->error_remove_page(mapping, p);
if (err != 0) {
-

[PATCH] memory failure: replace 'MCE' with 'Memory failure'

2016-04-08 Thread Chen Yucong

HWPoison was specific to some particular x86 platforms.
And it is often seen as high level machine check handler.
And therefore, 'MCE' is used for the format prefix of
printk(). However, 'PowerNV' has also used HWPoison for
handling memory errors[1], so 'MCE' is no longer suitable
to memory_failure.c.

Additionally, 'MCE' and 'Memory failure' have different
context. The former belongs to exception context and the
latter belongs to process context. Furthermore, HWPoison
can also be used for off-lining those sub-health pages
that do not trigger any machine check exception.

This patch aims to replace 'MCE' with a more appropriate prefix.

[1] commit 75eb3d9b60c2 ("powerpc/powernv: Get FSP memory errors
and plumb into memory poison infrastructure.")

Signed-off-by: Chen Yucong 
---
 mm/memory-failure.c |   69 ---
 1 file changed, 38 insertions(+), 31 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 78f5f26..e9752f4 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -184,8 +184,8 @@ static int kill_proc(struct task_struct *t, unsigned long 
addr, int trapno,
struct siginfo si;
int ret;
 
-   pr_err("MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
-  pfn, t->comm, t->pid);
+   pr_err("Memory failure: %#lx: Killing %s:%d due to hardware memory 
corruption\n",
+   pfn, t->comm, t->pid);
si.si_signo = SIGBUS;
si.si_errno = 0;
si.si_addr = (void *)addr;
@@ -208,7 +208,7 @@ static int kill_proc(struct task_struct *t, unsigned long 
addr, int trapno,
ret = send_sig_info(SIGBUS, , t);  /* synchronous? */
}
if (ret < 0)
-   pr_info("MCE: Error sending signal to %s:%d: %d\n",
+   pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
return ret;
 }
@@ -289,7 +289,7 @@ static void add_to_kill(struct task_struct *tsk, struct 
page *p,
} else {
tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
if (!tk) {
-   pr_err("MCE: Out of memory while machine check 
handling\n");
+   pr_err("Memory failure: Out of memory while machine 
check handling\n");
return;
}
}
@@ -303,7 +303,7 @@ static void add_to_kill(struct task_struct *tsk, struct 
page *p,
 * a SIGKILL because the error is not contained anymore.
 */
if (tk->addr == -EFAULT) {
-   pr_info("MCE: Unable to find user space address %lx in %s\n",
+   pr_info("Memory failure: Unable to find user space address %lx 
in %s\n",
page_to_pfn(p), tsk->comm);
tk->addr_valid = 0;
}
@@ -334,7 +334,7 @@ static void kill_procs(struct list_head *to_kill, int 
forcekill, int trapno,
 * signal and then access the memory. Just kill it.
 */
if (fail || tk->addr_valid == 0) {
-   pr_err("MCE %#lx: forcibly killing %s:%d 
because of failure to unmap corrupted page\n",
+   pr_err("Memory failure: %#lx: forcibly killing 
%s:%d because of failure to unmap corrupted page\n",
   pfn, tk->tsk->comm, tk->tsk->pid);
force_sig(SIGKILL, tk->tsk);
}
@@ -347,7 +347,7 @@ static void kill_procs(struct list_head *to_kill, int 
forcekill, int trapno,
 */
else if (kill_proc(tk->tsk, tk->addr, trapno,
  pfn, page, flags) < 0)
-   pr_err("MCE %#lx: Cannot send advisory machine 
check signal to %s:%d\n",
+   pr_err("Memory failure: %#lx: Cannot send 
advisory machine check signal to %s:%d\n",
   pfn, tk->tsk->comm, tk->tsk->pid);
}
put_task_struct(tk->tsk);
@@ -559,7 +559,7 @@ static int me_kernel(struct page *p, unsigned long pfn)
  */
 static int me_unknown(struct page *p, unsigned long pfn)
 {
-   pr_err("MCE %#lx: Unknown page state\n", pfn);
+   pr_err("Memory failure: %#lx: Unknown page state\n", pfn);
return MF_FAILED;
 }
 
@@ -604,11 +604,12 @@ static int me_pagecache_clean(struct page *p, unsigned 
long pfn)
if (mapping->a_ops->error_remove_page) {
err = mapping->a_ops->error_remove_page(mapping, p);
if (err != 0) {
-   pr_info("MCE %#lx: Failed to punch page:

Re: [PATCH] arch/x86: convert all printk(KERN_INFO ) to pr_info()

2016-02-25 Thread Chen Yucong

On Thu, 2016-02-25 at 09:22 +0100, Ingo Molnar wrote:
> * Chen Yucong <sla...@gmail.com> wrote:
> 
> > arch/x86/* use a mixture of printk(KERN_INFO ) and pr_info().
> > This patch converts all of printk(KERN_INFO ) to pr_info() for
> > arch/x86 directory.
> > 
> > Signed-off-by: Chen Yucong <sla...@gmail.com>
> 
> >  85 files changed, 332 insertions(+), 394 deletions(-)
> 
> So what this changelog is missing is how it was ensured that the printk 
> output did 
> not change.
> 
 The previously committed patch was applied to a particular directory.
This involved all printk(KERN_), including `ugly` pr_debug().

However this patch are only related to printk(KERN_INFO ...), pr_info()
is less dangerous than pr_debug(). The macro body of pr_info() is
simple and definite, without any conditional options.

An theme to notice: this patch also refers to only a few pr_err(), and
if it is feasible, the next patch will covert all printk(KERN_ERR ) to
pr_err(). This might be a less risky way for conversion.

thx!
   cyc

Re: [PATCH] arch/x86: convert all printk(KERN_INFO ) to pr_info()

2016-02-25 Thread Chen Yucong

On Thu, 2016-02-25 at 09:22 +0100, Ingo Molnar wrote:
> * Chen Yucong  wrote:
> 
> > arch/x86/* use a mixture of printk(KERN_INFO ) and pr_info().
> > This patch converts all of printk(KERN_INFO ) to pr_info() for
> > arch/x86 directory.
> > 
> > Signed-off-by: Chen Yucong 
> 
> >  85 files changed, 332 insertions(+), 394 deletions(-)
> 
> So what this changelog is missing is how it was ensured that the printk 
> output did 
> not change.
> 
 The previously committed patch was applied to a particular directory.
This involved all printk(KERN_), including `ugly` pr_debug().

However this patch are only related to printk(KERN_INFO ...), pr_info()
is less dangerous than pr_debug(). The macro body of pr_info() is
simple and definite, without any conditional options.

An theme to notice: this patch also refers to only a few pr_err(), and
if it is feasible, the next patch will covert all printk(KERN_ERR ) to
pr_err(). This might be a less risky way for conversion.

thx!
   cyc

[PATCH] mm, memory hotplug: print debug message in the proper way for online_pages

2016-02-24 Thread Chen Yucong

online_pages() simply returns an error value if
memory_notify(MEM_GOING_ONLINE, ) return a value that is not
what we want for successfully onlining target pages. This patch
arms to print more failure information like offline_pages() in
online_pages.

This patch also converts printk(KERN_) to pr_(),
and moves __offline_pages() to not print failure information with
KERN_INFO according to David Rientjes's suggestion[1].

[1] https://lkml.org/lkml/2016/2/24/1094

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 mm/memory_hotplug.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c832ef3..4d15c20 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1059,10 +1059,9 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
 
ret = memory_notify(MEM_GOING_ONLINE, );
ret = notifier_to_errno(ret);
-   if (ret) {
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
-   }
+   if (ret)
+   goto failed_addition;
+
/*
 * If this zone is not populated, then it is not in zonelist.
 * This means the page allocator ignores this zone.
@@ -1080,12 +1079,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
mutex_unlock(_mutex);
-   printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] 
failed\n",
-  (unsigned long long) pfn << PAGE_SHIFT,
-  (((unsigned long long) pfn + nr_pages)
-   << PAGE_SHIFT) - 1);
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
+   goto failed_addition;
}
 
zone->present_pages += onlined_pages;
@@ -1118,6 +1112,13 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (onlined_pages)
memory_notify(MEM_ONLINE, );
return 0;
+
+failed_addition:
+   pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
+(unsigned long long) pfn << PAGE_SHIFT,
+(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+   memory_notify(MEM_CANCEL_ONLINE, );
+   return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
@@ -1529,8 +1530,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long 
end_pfn)
 
} else {
 #ifdef CONFIG_DEBUG_VM
-   printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
-  pfn);
+   pr_alert("removing pfn %lx from LRU failed\n", pfn);
dump_page(page, "failed to remove from LRU");
 #endif
put_page(page);
@@ -1858,7 +1858,7 @@ repeat:
ret = -EBUSY;
goto failed_removal;
}
-   printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
+   pr_info("Offlined Pages %ld\n", offlined_pages);
/* Ok, all of our target is isolated.
   We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
@@ -1895,9 +1895,9 @@ repeat:
return 0;
 
 failed_removal:
-   printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n",
-  (unsigned long long) start_pfn << PAGE_SHIFT,
-  ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
+   pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n",
+(unsigned long long) start_pfn << PAGE_SHIFT,
+((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_OFFLINE, );
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
-- 
1.8.3.1

[PATCH] mm, memory hotplug: print debug message in the proper way for online_pages

2016-02-24 Thread Chen Yucong

online_pages() simply returns an error value if
memory_notify(MEM_GOING_ONLINE, ) return a value that is not
what we want for successfully onlining target pages. This patch
arms to print more failure information like offline_pages() in
online_pages.

This patch also converts printk(KERN_) to pr_(),
and moves __offline_pages() to not print failure information with
KERN_INFO according to David Rientjes's suggestion[1].

[1] https://lkml.org/lkml/2016/2/24/1094

Signed-off-by: Chen Yucong 
---
 mm/memory_hotplug.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c832ef3..4d15c20 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1059,10 +1059,9 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
 
ret = memory_notify(MEM_GOING_ONLINE, );
ret = notifier_to_errno(ret);
-   if (ret) {
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
-   }
+   if (ret)
+   goto failed_addition;
+
/*
 * If this zone is not populated, then it is not in zonelist.
 * This means the page allocator ignores this zone.
@@ -1080,12 +1079,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
mutex_unlock(_mutex);
-   printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] 
failed\n",
-  (unsigned long long) pfn << PAGE_SHIFT,
-  (((unsigned long long) pfn + nr_pages)
-   << PAGE_SHIFT) - 1);
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
+   goto failed_addition;
}
 
zone->present_pages += onlined_pages;
@@ -1118,6 +1112,13 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (onlined_pages)
memory_notify(MEM_ONLINE, );
return 0;
+
+failed_addition:
+   pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
+(unsigned long long) pfn << PAGE_SHIFT,
+(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+   memory_notify(MEM_CANCEL_ONLINE, );
+   return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
@@ -1529,8 +1530,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long 
end_pfn)
 
} else {
 #ifdef CONFIG_DEBUG_VM
-   printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
-  pfn);
+   pr_alert("removing pfn %lx from LRU failed\n", pfn);
dump_page(page, "failed to remove from LRU");
 #endif
put_page(page);
@@ -1858,7 +1858,7 @@ repeat:
ret = -EBUSY;
goto failed_removal;
}
-   printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
+   pr_info("Offlined Pages %ld\n", offlined_pages);
/* Ok, all of our target is isolated.
   We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
@@ -1895,9 +1895,9 @@ repeat:
return 0;
 
 failed_removal:
-   printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n",
-  (unsigned long long) start_pfn << PAGE_SHIFT,
-  ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
+   pr_debug("memory offlining [mem %#010llx-%#010llx] failed\n",
+(unsigned long long) start_pfn << PAGE_SHIFT,
+((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_OFFLINE, );
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
-- 
1.8.3.1

Re: [PATCH] mm, memory hotplug: print more failure information for online_pages

2016-02-24 Thread Chen Yucong

On Wed, 2016-02-24 at 13:33 -0800, David Rientjes wrote:
> On Wed, 24 Feb 2016, Chen Yucong wrote:
> 
> > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > index c832ef3..e4b6dec3 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -1059,10 +1059,9 @@ int __ref online_pages(unsigned long pfn, unsigned 
> > long nr_pages, int online_typ
> >  
> > ret = memory_notify(MEM_GOING_ONLINE, );
> > ret = notifier_to_errno(ret);
> > -   if (ret) {
> > -   memory_notify(MEM_CANCEL_ONLINE, );
> > -   return ret;
> > -   }
> > +   if (ret)
> > +   goto failed_addition;
> > +
> > /*
> >  * If this zone is not populated, then it is not in zonelist.
> >  * This means the page allocator ignores this zone.
> > @@ -1080,12 +1079,7 @@ int __ref online_pages(unsigned long pfn, unsigned 
> > long nr_pages, int online_typ
> > if (need_zonelists_rebuild)
> > zone_pcp_reset(zone);
> > mutex_unlock(_mutex);
> > -   printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] 
> > failed\n",
> > -  (unsigned long long) pfn << PAGE_SHIFT,
> > -  (((unsigned long long) pfn + nr_pages)
> > -   << PAGE_SHIFT) - 1);
> > -   memory_notify(MEM_CANCEL_ONLINE, );
> > -   return ret;
> > +   goto failed_addition;
> > }
> >  
> > zone->present_pages += onlined_pages;
> > @@ -1118,6 +1112,13 @@ int __ref online_pages(unsigned long pfn, unsigned 
> > long nr_pages, int online_typ
> > if (onlined_pages)
> > memory_notify(MEM_ONLINE, );
> > return 0;
> > +
> > +failed_addition:
> > +   pr_info("online_pages [mem %#010llx-%#010llx] failed\n",
> > +   (unsigned long long) pfn << PAGE_SHIFT,
> > +   (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
> > +   memory_notify(MEM_CANCEL_ONLINE, );
> > +   return ret;
> >  }
> >  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
> >  
> 
> Please explain how the conversion from KERN_DEBUG to KERN_INFO level is 
> better?

Like __offline_pages(), printk() in online_pages() is used for reporting
an failed addition rather than debug information.
Another reason is that pr_debug() is not an exact equivalent of 
printk(KERN_DEBUG ...)

/* If you are writing a driver, please use dev_dbg instead */
#if defined(CONFIG_DYNAMIC_DEBUG)
/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here
*/
#define pr_debug(fmt, ...) \
dynamic_pr_debug(fmt, ##__VA_ARGS__)
#elif defined(DEBUG)
#define pr_debug(fmt, ...) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug(fmt, ...) \
no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif
 

> If the onlining returns an error value, which it will, why do we need to 
> leave an artifact behind in the kernel log that it failed?

In __offline_pages(), we can find the following snippet:

...
ret = memory_notify(MEM_GOING_OFFLINE, );
ret = notifier_to_errno(ret);
if (ret)
goto failed_removal;
...
offlined_pages = check_pages_isolated(start_pfn, end_pfn);
if (offlined_pages < 0) {
ret = -EBUSY;
goto failed_removal;
}
...
failed_removal:
printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] 
...

Similarly, there's no single cause for failed online_pages operation.
So if memory_notify(MEM_GOING_ONLINE, ) returns an error
value, the result of online_pages is also ""online_pages [mem %#010llx-%
#010llx] failed\n".

thx!
cyc

Re: [PATCH] mm, memory hotplug: print more failure information for online_pages

2016-02-24 Thread Chen Yucong

On Wed, 2016-02-24 at 13:33 -0800, David Rientjes wrote:
> On Wed, 24 Feb 2016, Chen Yucong wrote:
> 
> > diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> > index c832ef3..e4b6dec3 100644
> > --- a/mm/memory_hotplug.c
> > +++ b/mm/memory_hotplug.c
> > @@ -1059,10 +1059,9 @@ int __ref online_pages(unsigned long pfn, unsigned 
> > long nr_pages, int online_typ
> >  
> > ret = memory_notify(MEM_GOING_ONLINE, );
> > ret = notifier_to_errno(ret);
> > -   if (ret) {
> > -   memory_notify(MEM_CANCEL_ONLINE, );
> > -   return ret;
> > -   }
> > +   if (ret)
> > +   goto failed_addition;
> > +
> > /*
> >  * If this zone is not populated, then it is not in zonelist.
> >  * This means the page allocator ignores this zone.
> > @@ -1080,12 +1079,7 @@ int __ref online_pages(unsigned long pfn, unsigned 
> > long nr_pages, int online_typ
> > if (need_zonelists_rebuild)
> > zone_pcp_reset(zone);
> > mutex_unlock(_mutex);
> > -   printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] 
> > failed\n",
> > -  (unsigned long long) pfn << PAGE_SHIFT,
> > -  (((unsigned long long) pfn + nr_pages)
> > -   << PAGE_SHIFT) - 1);
> > -   memory_notify(MEM_CANCEL_ONLINE, );
> > -   return ret;
> > +   goto failed_addition;
> > }
> >  
> > zone->present_pages += onlined_pages;
> > @@ -1118,6 +1112,13 @@ int __ref online_pages(unsigned long pfn, unsigned 
> > long nr_pages, int online_typ
> > if (onlined_pages)
> > memory_notify(MEM_ONLINE, );
> > return 0;
> > +
> > +failed_addition:
> > +   pr_info("online_pages [mem %#010llx-%#010llx] failed\n",
> > +   (unsigned long long) pfn << PAGE_SHIFT,
> > +   (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
> > +   memory_notify(MEM_CANCEL_ONLINE, );
> > +   return ret;
> >  }
> >  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
> >  
> 
> Please explain how the conversion from KERN_DEBUG to KERN_INFO level is 
> better?

Like __offline_pages(), printk() in online_pages() is used for reporting
an failed addition rather than debug information.
Another reason is that pr_debug() is not an exact equivalent of 
printk(KERN_DEBUG ...)

/* If you are writing a driver, please use dev_dbg instead */
#if defined(CONFIG_DYNAMIC_DEBUG)
/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here
*/
#define pr_debug(fmt, ...) \
dynamic_pr_debug(fmt, ##__VA_ARGS__)
#elif defined(DEBUG)
#define pr_debug(fmt, ...) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#else
#define pr_debug(fmt, ...) \
no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
#endif
 

> If the onlining returns an error value, which it will, why do we need to 
> leave an artifact behind in the kernel log that it failed?

In __offline_pages(), we can find the following snippet:

...
ret = memory_notify(MEM_GOING_OFFLINE, );
ret = notifier_to_errno(ret);
if (ret)
goto failed_removal;
...
offlined_pages = check_pages_isolated(start_pfn, end_pfn);
if (offlined_pages < 0) {
ret = -EBUSY;
goto failed_removal;
}
...
failed_removal:
printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] 
...

Similarly, there's no single cause for failed online_pages operation.
So if memory_notify(MEM_GOING_ONLINE, ) returns an error
value, the result of online_pages is also ""online_pages [mem %#010llx-%
#010llx] failed\n".

thx!
cyc

[PATCH] mm, memory hotplug: print more failure information for online_pages

2016-02-24 Thread Chen Yucong

online_pages() simply returns an error value if
memory_notify(MEM_GOING_ONLINE, ) return a value that is not
what we want for successfully onlining target pages. This patch
arms to print more failure information like offline_pages() in
online_pages. And this patch also converts printk(KERN_)
to pr_().

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 mm/memory_hotplug.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c832ef3..e4b6dec3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1059,10 +1059,9 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
 
ret = memory_notify(MEM_GOING_ONLINE, );
ret = notifier_to_errno(ret);
-   if (ret) {
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
-   }
+   if (ret)
+   goto failed_addition;
+
/*
 * If this zone is not populated, then it is not in zonelist.
 * This means the page allocator ignores this zone.
@@ -1080,12 +1079,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
mutex_unlock(_mutex);
-   printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] 
failed\n",
-  (unsigned long long) pfn << PAGE_SHIFT,
-  (((unsigned long long) pfn + nr_pages)
-   << PAGE_SHIFT) - 1);
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
+   goto failed_addition;
}
 
zone->present_pages += onlined_pages;
@@ -1118,6 +1112,13 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (onlined_pages)
memory_notify(MEM_ONLINE, );
return 0;
+
+failed_addition:
+   pr_info("online_pages [mem %#010llx-%#010llx] failed\n",
+   (unsigned long long) pfn << PAGE_SHIFT,
+   (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+   memory_notify(MEM_CANCEL_ONLINE, );
+   return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
@@ -1529,8 +1530,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long 
end_pfn)
 
} else {
 #ifdef CONFIG_DEBUG_VM
-   printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
-  pfn);
+   pr_alert("removing pfn %lx from LRU failed\n", pfn);
dump_page(page, "failed to remove from LRU");
 #endif
put_page(page);
@@ -1858,7 +1858,7 @@ repeat:
ret = -EBUSY;
goto failed_removal;
}
-   printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
+   pr_info("Offlined Pages %ld\n", offlined_pages);
/* Ok, all of our target is isolated.
   We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
@@ -1895,9 +1895,9 @@ repeat:
return 0;
 
 failed_removal:
-   printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n",
-  (unsigned long long) start_pfn << PAGE_SHIFT,
-  ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
+   pr_info("memory offlining [mem %#010llx-%#010llx] failed\n",
+   (unsigned long long) start_pfn << PAGE_SHIFT,
+   ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_OFFLINE, );
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
-- 
1.8.3.1

[PATCH] mm, memory hotplug: print more failure information for online_pages

2016-02-24 Thread Chen Yucong

online_pages() simply returns an error value if
memory_notify(MEM_GOING_ONLINE, ) return a value that is not
what we want for successfully onlining target pages. This patch
arms to print more failure information like offline_pages() in
online_pages. And this patch also converts printk(KERN_)
to pr_().

Signed-off-by: Chen Yucong 
---
 mm/memory_hotplug.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c832ef3..e4b6dec3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1059,10 +1059,9 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
 
ret = memory_notify(MEM_GOING_ONLINE, );
ret = notifier_to_errno(ret);
-   if (ret) {
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
-   }
+   if (ret)
+   goto failed_addition;
+
/*
 * If this zone is not populated, then it is not in zonelist.
 * This means the page allocator ignores this zone.
@@ -1080,12 +1079,7 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (need_zonelists_rebuild)
zone_pcp_reset(zone);
mutex_unlock(_mutex);
-   printk(KERN_DEBUG "online_pages [mem %#010llx-%#010llx] 
failed\n",
-  (unsigned long long) pfn << PAGE_SHIFT,
-  (((unsigned long long) pfn + nr_pages)
-   << PAGE_SHIFT) - 1);
-   memory_notify(MEM_CANCEL_ONLINE, );
-   return ret;
+   goto failed_addition;
}
 
zone->present_pages += onlined_pages;
@@ -1118,6 +1112,13 @@ int __ref online_pages(unsigned long pfn, unsigned long 
nr_pages, int online_typ
if (onlined_pages)
memory_notify(MEM_ONLINE, );
return 0;
+
+failed_addition:
+   pr_info("online_pages [mem %#010llx-%#010llx] failed\n",
+   (unsigned long long) pfn << PAGE_SHIFT,
+   (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
+   memory_notify(MEM_CANCEL_ONLINE, );
+   return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
@@ -1529,8 +1530,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long 
end_pfn)
 
} else {
 #ifdef CONFIG_DEBUG_VM
-   printk(KERN_ALERT "removing pfn %lx from LRU failed\n",
-  pfn);
+   pr_alert("removing pfn %lx from LRU failed\n", pfn);
dump_page(page, "failed to remove from LRU");
 #endif
put_page(page);
@@ -1858,7 +1858,7 @@ repeat:
ret = -EBUSY;
goto failed_removal;
}
-   printk(KERN_INFO "Offlined Pages %ld\n", offlined_pages);
+   pr_info("Offlined Pages %ld\n", offlined_pages);
/* Ok, all of our target is isolated.
   We cannot do rollback at this point. */
offline_isolated_pages(start_pfn, end_pfn);
@@ -1895,9 +1895,9 @@ repeat:
return 0;
 
 failed_removal:
-   printk(KERN_INFO "memory offlining [mem %#010llx-%#010llx] failed\n",
-  (unsigned long long) start_pfn << PAGE_SHIFT,
-  ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
+   pr_info("memory offlining [mem %#010llx-%#010llx] failed\n",
+   (unsigned long long) start_pfn << PAGE_SHIFT,
+   ((unsigned long long) end_pfn << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_OFFLINE, );
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
-- 
1.8.3.1

[PATCH] arch/x86: convert all printk(KERN_INFO ) to pr_info()

2016-02-22 Thread Chen Yucong

arch/x86/* use a mixture of printk(KERN_INFO ) and pr_info().
This patch converts all of printk(KERN_INFO ) to pr_info() for
arch/x86 directory.

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 arch/x86/crypto/blowfish_glue.c  |  5 +---
 arch/x86/crypto/camellia_glue.c  |  5 +---
 arch/x86/crypto/serpent_sse2_glue.c  |  2 +-
 arch/x86/crypto/twofish_glue_3way.c  |  5 +---
 arch/x86/include/asm/gart.h  |  8 ++---
 arch/x86/kernel/acpi/boot.c  | 22 +++---
 arch/x86/kernel/acpi/sleep.c |  2 +-
 arch/x86/kernel/apb_timer.c  | 11 ---
 arch/x86/kernel/apic/apic.c  | 12 
 arch/x86/kernel/apic/bigsmp_32.c |  5 ++--
 arch/x86/kernel/apic/io_apic.c   | 34 ++---
 arch/x86/kernel/apic/probe_32.c  |  8 ++---
 arch/x86/kernel/apic/x2apic_uv_x.c   |  4 +--
 arch/x86/kernel/apm_32.c | 57 
 arch/x86/kernel/bootflag.c   |  2 +-
 arch/x86/kernel/check.c  |  7 +++--
 arch/x86/kernel/cpuid.c  |  4 +--
 arch/x86/kernel/e820.c   | 27 -
 arch/x86/kernel/early-quirks.c   | 38 +---
 arch/x86/kernel/fpu/init.c   |  3 +-
 arch/x86/kernel/fpu/xstate.c |  3 +-
 arch/x86/kernel/hpet.c   | 23 +++
 arch/x86/kernel/i8259.c  |  2 +-
 arch/x86/kernel/kprobes/core.c   | 12 
 arch/x86/kernel/kvm.c| 10 +++
 arch/x86/kernel/kvmclock.c   | 10 +++
 arch/x86/kernel/mmconf-fam10h_64.c   |  2 +-
 arch/x86/kernel/paravirt.c   |  3 +-
 arch/x86/kernel/pci-calgary_64.c | 23 +++
 arch/x86/kernel/pci-swiotlb.c|  3 +-
 arch/x86/kernel/quirks.c |  3 +-
 arch/x86/kernel/setup.c  | 15 +-
 arch/x86/kernel/smpboot.c| 10 +++
 arch/x86/kernel/sysfb_efi.c  |  7 ++---
 arch/x86/kernel/test_nx.c|  2 +-
 arch/x86/kernel/vsmp_64.c|  4 +--
 arch/x86/kvm/assigned-dev.c  | 21 +++--
 arch/x86/kvm/cpuid.c |  2 +-
 arch/x86/kvm/irq_comm.c  |  2 +-
 arch/x86/kvm/lapic.c |  6 ++--
 arch/x86/kvm/svm.c   |  8 ++---
 arch/x86/kvm/vmx.c   |  8 ++---
 arch/x86/kvm/x86.c   |  4 +--
 arch/x86/lguest/boot.c   |  2 +-
 arch/x86/mm/highmem_32.c |  4 +--
 arch/x86/mm/init.c   |  2 +-
 arch/x86/mm/init_32.c| 23 +++
 arch/x86/mm/init_64.c|  8 ++---
 arch/x86/mm/kmemcheck/kmemcheck.c|  7 ++---
 arch/x86/mm/numa.c   | 14 -
 arch/x86/mm/numa_32.c|  3 +-
 arch/x86/mm/numa_emulation.c |  4 +--
 arch/x86/mm/pageattr-test.c  |  7 ++---
 arch/x86/mm/pgtable.c|  4 +--
 arch/x86/mm/setup_nx.c   |  6 ++--
 arch/x86/mm/srat.c   | 17 ++-
 arch/x86/oprofile/nmi_int.c  | 10 +++
 arch/x86/oprofile/op_model_amd.c |  2 +-
 arch/x86/pci/acpi.c  | 13 
 arch/x86/pci/amd_bus.c   |  4 +--
 arch/x86/pci/broadcom_bus.c  |  4 +--
 arch/x86/pci/common.c| 14 -
 arch/x86/pci/direct.c|  6 ++--
 arch/x86/pci/early.c |  3 +-
 arch/x86/pci/irq.c   |  8 ++---
 arch/x86/pci/legacy.c|  2 +-
 arch/x86/pci/mmconfig_32.c   |  2 +-
 arch/x86/pci/olpc.c  |  2 +-
 arch/x86/pci/pcbios.c|  6 ++--
 arch/x86/pci/xen.c   |  4 +--
 arch/x86/platform/geode/alix.c   |  9 +++---
 arch/x86/platform/geode/geos.c   |  4 +--
 arch/x86/platform/geode/net5501.c|  4 +--
 arch/x86/platform/iris/iris.c|  4 +--
 arch/x86/platform/olpc/olpc-xo1-pm.c |  4 +--
 arch/x86/platform/olpc/olpc_ofw.c|  2 +-
 arch/x86/platform/uv/bios_uv.c   |  2 +-
 arch/x86/platform/uv/tlb_uv.c|  2 +-
 arch/x86/platform/uv/uv_time.c   |  8 ++---
 arch/x86/um/sysrq_64.c   | 30 +--
 arch/x86/um/tls_32.c |  4 +--
 arch/x86/xen/enlighten.c |  9 +++---
 arch/x86/xen/platform-pci-unplug.c   | 12 
 arch/x86/xen/setup.c |  2 +-
 arch/x86/xen/time.c  |  5 ++--
 85 files changed, 332 insertions(+), 394 deletions(-)

diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 17c0553..f8769cc 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -458,10 +458,7 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU 
blacklist");
 static int __init init(void)
 {
if (!force && is_blacklisted_cpu()) {
-   printk(KERN_INFO
-   "blowfish-x86_64: performance on this CPU "
-

[PATCH] arch/x86: convert all printk(KERN_INFO ) to pr_info()

2016-02-22 Thread Chen Yucong

arch/x86/* use a mixture of printk(KERN_INFO ) and pr_info().
This patch converts all of printk(KERN_INFO ) to pr_info() for
arch/x86 directory.

Signed-off-by: Chen Yucong 
---
 arch/x86/crypto/blowfish_glue.c  |  5 +---
 arch/x86/crypto/camellia_glue.c  |  5 +---
 arch/x86/crypto/serpent_sse2_glue.c  |  2 +-
 arch/x86/crypto/twofish_glue_3way.c  |  5 +---
 arch/x86/include/asm/gart.h  |  8 ++---
 arch/x86/kernel/acpi/boot.c  | 22 +++---
 arch/x86/kernel/acpi/sleep.c |  2 +-
 arch/x86/kernel/apb_timer.c  | 11 ---
 arch/x86/kernel/apic/apic.c  | 12 
 arch/x86/kernel/apic/bigsmp_32.c |  5 ++--
 arch/x86/kernel/apic/io_apic.c   | 34 ++---
 arch/x86/kernel/apic/probe_32.c  |  8 ++---
 arch/x86/kernel/apic/x2apic_uv_x.c   |  4 +--
 arch/x86/kernel/apm_32.c | 57 
 arch/x86/kernel/bootflag.c   |  2 +-
 arch/x86/kernel/check.c  |  7 +++--
 arch/x86/kernel/cpuid.c  |  4 +--
 arch/x86/kernel/e820.c   | 27 -
 arch/x86/kernel/early-quirks.c   | 38 +---
 arch/x86/kernel/fpu/init.c   |  3 +-
 arch/x86/kernel/fpu/xstate.c |  3 +-
 arch/x86/kernel/hpet.c   | 23 +++
 arch/x86/kernel/i8259.c  |  2 +-
 arch/x86/kernel/kprobes/core.c   | 12 
 arch/x86/kernel/kvm.c| 10 +++
 arch/x86/kernel/kvmclock.c   | 10 +++
 arch/x86/kernel/mmconf-fam10h_64.c   |  2 +-
 arch/x86/kernel/paravirt.c   |  3 +-
 arch/x86/kernel/pci-calgary_64.c | 23 +++
 arch/x86/kernel/pci-swiotlb.c|  3 +-
 arch/x86/kernel/quirks.c |  3 +-
 arch/x86/kernel/setup.c  | 15 +-
 arch/x86/kernel/smpboot.c| 10 +++
 arch/x86/kernel/sysfb_efi.c  |  7 ++---
 arch/x86/kernel/test_nx.c|  2 +-
 arch/x86/kernel/vsmp_64.c|  4 +--
 arch/x86/kvm/assigned-dev.c  | 21 +++--
 arch/x86/kvm/cpuid.c |  2 +-
 arch/x86/kvm/irq_comm.c  |  2 +-
 arch/x86/kvm/lapic.c |  6 ++--
 arch/x86/kvm/svm.c   |  8 ++---
 arch/x86/kvm/vmx.c   |  8 ++---
 arch/x86/kvm/x86.c   |  4 +--
 arch/x86/lguest/boot.c   |  2 +-
 arch/x86/mm/highmem_32.c |  4 +--
 arch/x86/mm/init.c   |  2 +-
 arch/x86/mm/init_32.c| 23 +++
 arch/x86/mm/init_64.c|  8 ++---
 arch/x86/mm/kmemcheck/kmemcheck.c|  7 ++---
 arch/x86/mm/numa.c   | 14 -
 arch/x86/mm/numa_32.c|  3 +-
 arch/x86/mm/numa_emulation.c |  4 +--
 arch/x86/mm/pageattr-test.c  |  7 ++---
 arch/x86/mm/pgtable.c|  4 +--
 arch/x86/mm/setup_nx.c   |  6 ++--
 arch/x86/mm/srat.c   | 17 ++-
 arch/x86/oprofile/nmi_int.c  | 10 +++
 arch/x86/oprofile/op_model_amd.c |  2 +-
 arch/x86/pci/acpi.c  | 13 
 arch/x86/pci/amd_bus.c   |  4 +--
 arch/x86/pci/broadcom_bus.c  |  4 +--
 arch/x86/pci/common.c| 14 -
 arch/x86/pci/direct.c|  6 ++--
 arch/x86/pci/early.c |  3 +-
 arch/x86/pci/irq.c   |  8 ++---
 arch/x86/pci/legacy.c|  2 +-
 arch/x86/pci/mmconfig_32.c   |  2 +-
 arch/x86/pci/olpc.c  |  2 +-
 arch/x86/pci/pcbios.c|  6 ++--
 arch/x86/pci/xen.c   |  4 +--
 arch/x86/platform/geode/alix.c   |  9 +++---
 arch/x86/platform/geode/geos.c   |  4 +--
 arch/x86/platform/geode/net5501.c|  4 +--
 arch/x86/platform/iris/iris.c|  4 +--
 arch/x86/platform/olpc/olpc-xo1-pm.c |  4 +--
 arch/x86/platform/olpc/olpc_ofw.c|  2 +-
 arch/x86/platform/uv/bios_uv.c   |  2 +-
 arch/x86/platform/uv/tlb_uv.c|  2 +-
 arch/x86/platform/uv/uv_time.c   |  8 ++---
 arch/x86/um/sysrq_64.c   | 30 +--
 arch/x86/um/tls_32.c |  4 +--
 arch/x86/xen/enlighten.c |  9 +++---
 arch/x86/xen/platform-pci-unplug.c   | 12 
 arch/x86/xen/setup.c |  2 +-
 arch/x86/xen/time.c  |  5 ++--
 85 files changed, 332 insertions(+), 394 deletions(-)

diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 17c0553..f8769cc 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -458,10 +458,7 @@ MODULE_PARM_DESC(force, "Force module load, ignore CPU 
blacklist");
 static int __init init(void)
 {
if (!force && is_blacklisted_cpu()) {
-   printk(KERN_INFO
-   "blowfish-x86_64: performance on this CPU "
-   &qu

[PATCH v2] x86/kernel: use pr_() and dev_

2016-02-13 Thread Chen Yucong

arch/x86/kernel/* use a mixture of printk(KERN_ ) and pr_().
This patch converts the bulk of printk(KERN_ ) to pr_() and
uses dev_dbg() instead of the dev_printk(KERN_DEBUG,). All pr_warning()
calls have been replaced with pr_warn().

Not sure what to do about the printk(KERN_DEFAULT) and printk() without a
log level.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/acpi/boot.c | 110 
 arch/x86/kernel/acpi/cstate.c   |   5 +-
 arch/x86/kernel/acpi/sleep.c|   2 +-
 arch/x86/kernel/alternative.c   |   8 +--
 arch/x86/kernel/amd_gart_64.c   |  13 ++---
 arch/x86/kernel/apb_timer.c |  23 
 arch/x86/kernel/apic/apic.c |  43 +++---
 arch/x86/kernel/apic/apic_flat_64.c |   4 +-
 arch/x86/kernel/apic/apic_noop.c|   2 +-
 arch/x86/kernel/apic/bigsmp_32.c|   5 +-
 arch/x86/kernel/apic/io_apic.c  |  95 +++
 arch/x86/kernel/apic/probe_32.c |   9 ++-
 arch/x86/kernel/apic/x2apic_phys.c  |   2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c  |   3 +-
 arch/x86/kernel/apm_32.c|  80 --
 arch/x86/kernel/bootflag.c  |   4 +-
 arch/x86/kernel/check.c |  11 ++--
 arch/x86/kernel/cpuid.c |   2 +-
 arch/x86/kernel/crash_dump_32.c |   6 +-
 arch/x86/kernel/devicetree.c|   6 +-
 arch/x86/kernel/doublefault.c   |  17 +++---
 arch/x86/kernel/dumpstack.c |   4 +-
 arch/x86/kernel/e820.c  |  66 +++---
 arch/x86/kernel/early-quirks.c  |  38 +
 arch/x86/kernel/early_printk.c  |   4 +-
 arch/x86/kernel/fpu/init.c  |   3 +-
 arch/x86/kernel/fpu/xstate.c|   3 +-
 arch/x86/kernel/hpet.c  |  33 +--
 arch/x86/kernel/i8259.c |   5 +-
 arch/x86/kernel/irq_32.c|   6 +-
 arch/x86/kernel/jump_label.c|   4 +-
 arch/x86/kernel/kgdb.c  |   8 +--
 arch/x86/kernel/kprobes/core.c  |  11 ++--
 arch/x86/kernel/kvm.c   |  12 ++--
 arch/x86/kernel/kvmclock.c  |  10 ++--
 arch/x86/kernel/mmconf-fam10h_64.c  |   2 +-
 arch/x86/kernel/module.c|   4 +-
 arch/x86/kernel/nmi_selftest.c  |  12 ++--
 arch/x86/kernel/paravirt.c  |   2 +-
 arch/x86/kernel/pci-calgary_64.c|  87 ++--
 arch/x86/kernel/pci-iommu_table.c   |   4 +-
 arch/x86/kernel/pci-nommu.c |   3 +-
 arch/x86/kernel/pci-swiotlb.c   |   3 +-
 arch/x86/kernel/quirks.c|  48 
 arch/x86/kernel/rtc.c   |   7 +--
 arch/x86/kernel/setup.c |  22 
 arch/x86/kernel/setup_percpu.c  |   2 +-
 arch/x86/kernel/smpboot.c   |   6 +-
 arch/x86/kernel/sysfb_efi.c |   7 +--
 arch/x86/kernel/sysfb_simplefb.c|   2 +-
 arch/x86/kernel/tboot.c |  16 +++---
 arch/x86/kernel/tce_64.c|   5 +-
 arch/x86/kernel/test_nx.c   |  16 +++---
 arch/x86/kernel/test_rodata.c   |  10 ++--
 arch/x86/kernel/tsc_sync.c  |   6 +-
 arch/x86/kernel/vsmp_64.c   |   2 +-
 arch/x86/pci/mmconfig-shared.c  |  32 +--
 arch/x86/pci/mmconfig_64.c  |   4 +-
 58 files changed, 448 insertions(+), 511 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e759076..cb3afc7 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -55,7 +55,8 @@ EXPORT_SYMBOL(acpi_disabled);
 # include 
 #endif /* X86 */
 
-#define PREFIX "ACPI: "
+#undef pr_fmt
+#define pr_fmt(fmt) "ACPI: " fmt
 
 int acpi_noirq;/* skip ACPI IRQ initialization 
*/
 int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
@@ -141,14 +142,14 @@ static int __init acpi_parse_madt(struct 
acpi_table_header *table)
 
madt = (struct acpi_table_madt *)table;
if (!madt) {
-   printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+   pr_warn("Unable to map MADT\n");
return -ENODEV;
}
 
if (madt->address) {
acpi_lapic_addr = (u64) madt->address;
 
-   printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+   pr_debug("Local APIC address 0x%08x\n",
   madt->address);
}
 
@@ -170,7 +171,7 @@ static int acpi_register_lapic(int id, u8 enabled)
unsigned int ver = 0;
 
if (id >= MAX_LOCAL_APIC) {
-   printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+   pr_info("skipped apicid that is too big\n");
return -EINVAL;
}
 
@@ -210,11 +211,11 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, 
const unsigned long end)
 * when we use CPU hotplug.
 */

[PATCH v2] x86/kernel: use pr_() and dev_

2016-02-13 Thread Chen Yucong

arch/x86/kernel/* use a mixture of printk(KERN_ ) and pr_().
This patch converts the bulk of printk(KERN_ ) to pr_() and
uses dev_dbg() instead of the dev_printk(KERN_DEBUG,). All pr_warning()
calls have been replaced with pr_warn().

Not sure what to do about the printk(KERN_DEFAULT) and printk() without a
log level.

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 arch/x86/kernel/acpi/boot.c | 110 
 arch/x86/kernel/acpi/cstate.c   |   5 +-
 arch/x86/kernel/acpi/sleep.c|   2 +-
 arch/x86/kernel/alternative.c   |   8 +--
 arch/x86/kernel/amd_gart_64.c   |  13 ++---
 arch/x86/kernel/apb_timer.c |  23 
 arch/x86/kernel/apic/apic.c |  43 +++---
 arch/x86/kernel/apic/apic_flat_64.c |   4 +-
 arch/x86/kernel/apic/apic_noop.c|   2 +-
 arch/x86/kernel/apic/bigsmp_32.c|   5 +-
 arch/x86/kernel/apic/io_apic.c  |  95 +++
 arch/x86/kernel/apic/probe_32.c |   9 ++-
 arch/x86/kernel/apic/x2apic_phys.c  |   2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c  |   3 +-
 arch/x86/kernel/apm_32.c|  80 --
 arch/x86/kernel/bootflag.c  |   4 +-
 arch/x86/kernel/check.c |  11 ++--
 arch/x86/kernel/cpuid.c |   2 +-
 arch/x86/kernel/crash_dump_32.c |   6 +-
 arch/x86/kernel/devicetree.c|   6 +-
 arch/x86/kernel/doublefault.c   |  17 +++---
 arch/x86/kernel/dumpstack.c |   4 +-
 arch/x86/kernel/e820.c  |  66 +++---
 arch/x86/kernel/early-quirks.c  |  38 +
 arch/x86/kernel/early_printk.c  |   4 +-
 arch/x86/kernel/fpu/init.c  |   3 +-
 arch/x86/kernel/fpu/xstate.c|   3 +-
 arch/x86/kernel/hpet.c  |  33 +--
 arch/x86/kernel/i8259.c |   5 +-
 arch/x86/kernel/irq_32.c|   6 +-
 arch/x86/kernel/jump_label.c|   4 +-
 arch/x86/kernel/kgdb.c  |   8 +--
 arch/x86/kernel/kprobes/core.c  |  11 ++--
 arch/x86/kernel/kvm.c   |  12 ++--
 arch/x86/kernel/kvmclock.c  |  10 ++--
 arch/x86/kernel/mmconf-fam10h_64.c  |   2 +-
 arch/x86/kernel/module.c|   4 +-
 arch/x86/kernel/nmi_selftest.c  |  12 ++--
 arch/x86/kernel/paravirt.c  |   2 +-
 arch/x86/kernel/pci-calgary_64.c|  87 ++--
 arch/x86/kernel/pci-iommu_table.c   |   4 +-
 arch/x86/kernel/pci-nommu.c |   3 +-
 arch/x86/kernel/pci-swiotlb.c   |   3 +-
 arch/x86/kernel/quirks.c|  48 
 arch/x86/kernel/rtc.c   |   7 +--
 arch/x86/kernel/setup.c |  22 
 arch/x86/kernel/setup_percpu.c  |   2 +-
 arch/x86/kernel/smpboot.c   |   6 +-
 arch/x86/kernel/sysfb_efi.c |   7 +--
 arch/x86/kernel/sysfb_simplefb.c|   2 +-
 arch/x86/kernel/tboot.c |  16 +++---
 arch/x86/kernel/tce_64.c|   5 +-
 arch/x86/kernel/test_nx.c   |  16 +++---
 arch/x86/kernel/test_rodata.c   |  10 ++--
 arch/x86/kernel/tsc_sync.c  |   6 +-
 arch/x86/kernel/vsmp_64.c   |   2 +-
 arch/x86/pci/mmconfig-shared.c  |  32 +--
 arch/x86/pci/mmconfig_64.c  |   4 +-
 58 files changed, 448 insertions(+), 511 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e759076..cb3afc7 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -55,7 +55,8 @@ EXPORT_SYMBOL(acpi_disabled);
 # include 
 #endif /* X86 */
 
-#define PREFIX "ACPI: "
+#undef pr_fmt
+#define pr_fmt(fmt) "ACPI: " fmt
 
 int acpi_noirq;/* skip ACPI IRQ initialization 
*/
 int acpi_pci_disabled; /* skip ACPI PCI scan and IRQ initialization */
@@ -141,14 +142,14 @@ static int __init acpi_parse_madt(struct 
acpi_table_header *table)
 
madt = (struct acpi_table_madt *)table;
if (!madt) {
-   printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+   pr_warn("Unable to map MADT\n");
return -ENODEV;
}
 
if (madt->address) {
acpi_lapic_addr = (u64) madt->address;
 
-   printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+   pr_debug("Local APIC address 0x%08x\n",
   madt->address);
}
 
@@ -170,7 +171,7 @@ static int acpi_register_lapic(int id, u8 enabled)
unsigned int ver = 0;
 
if (id >= MAX_LOCAL_APIC) {
-   printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+   pr_info("skipped apicid that is too big\n");
return -EINVAL;
}
 
@@ -210,11 +211,11 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, 
const unsigned long end)
 * when we use CPU

[PATCH 2/2] x86: use pr_default() macro

2016-02-05 Thread Chen Yucong

 - convert printk(KERN_DEFAULT ...) to pr_default(...)

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/dumpstack.c|  3 +--
 arch/x86/kernel/dumpstack_64.c |  4 ++--
 arch/x86/kernel/process_32.c   | 29 ++---
 arch/x86/kernel/process_64.c   | 41 -
 arch/x86/mm/fault.c|  2 +-
 5 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 993706a..1ae9a6d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -257,8 +257,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned short ss;
unsigned long sp;
 #endif
-   printk(KERN_DEFAULT
-  "%s: %04lx [#%d] ", str, err & 0x, ++die_counter);
+   pr_default("%s: %04lx [#%d] ", str, err & 0x, ++die_counter);
 #ifdef CONFIG_PREEMPT
printk("PREEMPT ");
 #endif
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5f1c626..8e6c739 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -316,11 +316,11 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
 
-   printk(KERN_DEFAULT "Stack:\n");
+   pr_default("Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
   0, KERN_DEFAULT);
 
-   printk(KERN_DEFAULT "Code: ");
+   pr_default("Code: ");
 
ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f95091..4039da4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -83,17 +83,17 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
 
-   printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-   (u16)regs->cs, regs->ip, regs->flags,
-   smp_processor_id());
+   pr_default("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+  (u16)regs->cs, regs->ip, regs->flags,
+  smp_processor_id());
print_symbol("EIP is at %s\n", regs->ip);
 
-   printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-   regs->ax, regs->bx, regs->cx, regs->dx);
-   printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
-   regs->si, regs->di, regs->bp, sp);
-   printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
-  (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+   pr_default("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+  regs->ax, regs->bx, regs->cx, regs->dx);
+   pr_default("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+  regs->si, regs->di, regs->bp, sp);
+   pr_default(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+  (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
 
if (!all)
return;
@@ -102,8 +102,8 @@ void __show_regs(struct pt_regs *regs, int all)
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = __read_cr4_safe();
-   printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
-   cr0, cr2, cr3, cr4);
+   pr_default("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+  cr0, cr2, cr3, cr4);
 
get_debugreg(d0, 0);
get_debugreg(d1, 1);
@@ -117,10 +117,9 @@ void __show_regs(struct pt_regs *regs, int all)
(d6 == DR6_RESERVED) && (d7 == 0x400))
return;
 
-   printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
-   d0, d1, d2, d3);
-   printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
-   d6, d7);
+   pr_default("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+  d0, d1, d2, d3);
+   pr_default("DR6: %08lx DR7: %08lx\n", d6, d7);
 }
 
 void release_thread(struct task_struct *dead_task)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71a18a2..37ff6be 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,20 +61,20 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
 
-   printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0x, 
regs->ip);
+   pr_def

[PATCH 1/2] printk: introduce pr_default() macro

2016-02-05 Thread Chen Yucong

Until now, we cover all log-levels by pr_  macros except
KERN_DEFAULT one. Add it for convenience.

Signed-off-by: Chen Yucong 
---
 include/linux/printk.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 51dd6b8..9808130 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -274,6 +274,8 @@ extern asmlinkage void dump_stack(void) __cold;
  */
 #define pr_cont(fmt, ...) \
printk(KERN_CONT fmt, ##__VA_ARGS__)
+#define pr_default(fmt, ...) \
+   printk(KERN_DEFAULT pr_fmt(fmt), ##__VA_ARGS__)
 
 /* pr_devel() should produce zero code unless DEBUG is defined */
 #ifdef DEBUG
@@ -345,6 +347,8 @@ extern asmlinkage void dump_stack(void) __cold;
printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_cont_once(fmt, ...) \
printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_default_once(fmt, ...)  \
+   printk_once(KERN_DEFAULT pr_fmt(fmt), ##__VA_ARGS__)
 
 #if defined(DEBUG)
 #define pr_devel_once(fmt, ...)\
@@ -396,6 +400,8 @@ extern asmlinkage void dump_stack(void) __cold;
printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_info_ratelimited(fmt, ...)  \
printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_default_ratelimited(fmt, ...)   \
+   printk_ratelimited(KERN_DEFAULT pr_fmt(fmt), ##__VA_ARGS__)
 /* no pr_cont_ratelimited, don't do that... */
 
 #if defined(DEBUG)
-- 
1.8.3.1

[PATCH] x86/kernel: use pr_() and dev_

2016-02-05 Thread Chen Yucong

arch/x86/kernel/* use a mixture of printk(KERN_ ) and pr_().
This patch converts the bulk of printk(KERN_ ) to pr_() and
uses dev_dbg() instead of the dev_printk(KERN_DEBUG,). All pr_warning()
calls have been replaced with pr_warn().

Not sure what to do about the printk(KERN_DEFAULT) and printk() without a
log level.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/acpi/boot.c | 106 
 arch/x86/kernel/acpi/cstate.c   |   5 +-
 arch/x86/kernel/acpi/sleep.c|   2 +-
 arch/x86/kernel/alternative.c   |   8 +--
 arch/x86/kernel/amd_gart_64.c   |  13 ++---
 arch/x86/kernel/apb_timer.c |  23 
 arch/x86/kernel/apic/apic.c |  44 +++
 arch/x86/kernel/apic/apic_flat_64.c |   4 +-
 arch/x86/kernel/apic/apic_noop.c|   2 +-
 arch/x86/kernel/apic/bigsmp_32.c|   5 +-
 arch/x86/kernel/apic/io_apic.c  |  95 
 arch/x86/kernel/apic/probe_32.c |   9 ++-
 arch/x86/kernel/apic/x2apic_phys.c  |   2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c  |   3 +-
 arch/x86/kernel/apm_32.c|  79 +--
 arch/x86/kernel/bootflag.c  |   4 +-
 arch/x86/kernel/check.c |  11 ++--
 arch/x86/kernel/cpuid.c |   2 +-
 arch/x86/kernel/crash_dump_32.c |   6 +-
 arch/x86/kernel/devicetree.c|   6 +-
 arch/x86/kernel/doublefault.c   |  16 +++---
 arch/x86/kernel/dumpstack.c |   4 +-
 arch/x86/kernel/e820.c  |  66 +++---
 arch/x86/kernel/early-quirks.c  |  44 +++
 arch/x86/kernel/early_printk.c  |   4 +-
 arch/x86/kernel/fpu/init.c  |   3 +-
 arch/x86/kernel/fpu/xstate.c|   3 +-
 arch/x86/kernel/hpet.c  |  32 +--
 arch/x86/kernel/i8259.c |   5 +-
 arch/x86/kernel/irq_32.c|   6 +-
 arch/x86/kernel/jump_label.c|   4 +-
 arch/x86/kernel/kgdb.c  |   8 +--
 arch/x86/kernel/kprobes/core.c  |  11 ++--
 arch/x86/kernel/kvm.c   |  12 ++--
 arch/x86/kernel/kvmclock.c  |  10 ++--
 arch/x86/kernel/mmconf-fam10h_64.c  |   2 +-
 arch/x86/kernel/module.c|   4 +-
 arch/x86/kernel/nmi_selftest.c  |  12 ++--
 arch/x86/kernel/paravirt.c  |   2 +-
 arch/x86/kernel/pci-calgary_64.c|  84 ++--
 arch/x86/kernel/pci-iommu_table.c   |   4 +-
 arch/x86/kernel/pci-nommu.c |   3 +-
 arch/x86/kernel/pci-swiotlb.c   |   4 +-
 arch/x86/kernel/quirks.c|  48 
 arch/x86/kernel/rtc.c   |   7 +--
 arch/x86/kernel/setup.c |  22 
 arch/x86/kernel/setup_percpu.c  |   2 +-
 arch/x86/kernel/smpboot.c   |   6 +-
 arch/x86/kernel/sysfb_efi.c |   8 +--
 arch/x86/kernel/sysfb_simplefb.c|   2 +-
 arch/x86/kernel/tboot.c |  16 +++---
 arch/x86/kernel/tce_64.c|   6 +-
 arch/x86/kernel/test_nx.c   |  16 +++---
 arch/x86/kernel/test_rodata.c   |  10 ++--
 arch/x86/kernel/tsc_sync.c  |   6 +-
 arch/x86/kernel/vsmp_64.c   |   2 +-
 56 files changed, 444 insertions(+), 479 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e759076..1ba8328 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -141,14 +141,14 @@ static int __init acpi_parse_madt(struct 
acpi_table_header *table)
 
madt = (struct acpi_table_madt *)table;
if (!madt) {
-   printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+   pr_warn(PREFIX "Unable to map MADT\n");
return -ENODEV;
}
 
if (madt->address) {
acpi_lapic_addr = (u64) madt->address;
 
-   printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+   pr_debug(PREFIX "Local APIC address 0x%08x\n",
   madt->address);
}
 
@@ -170,7 +170,7 @@ static int acpi_register_lapic(int id, u8 enabled)
unsigned int ver = 0;
 
if (id >= MAX_LOCAL_APIC) {
-   printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+   pr_info(PREFIX "skipped apicid that is too big\n");
return -EINVAL;
}
 
@@ -210,11 +210,11 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, 
const unsigned long end)
 * when we use CPU hotplug.
 */
if (!apic->apic_id_valid(apic_id) && enabled)
-   printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+   pr_warn(PREFIX "x2apic entry ignored\n");
else
acpi_register_lapic(apic_id, enabled);
 #else
-   printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+   pr_warn(PREFIX "x2apic entry ignored\n");
 #endif
 
return 0

[PATCH] x86/kernel: use pr_() and dev_

2016-02-05 Thread Chen Yucong

arch/x86/kernel/* use a mixture of printk(KERN_ ) and pr_().
This patch converts the bulk of printk(KERN_ ) to pr_() and
uses dev_dbg() instead of the dev_printk(KERN_DEBUG,). All pr_warning()
calls have been replaced with pr_warn().

Not sure what to do about the printk(KERN_DEFAULT) and printk() without a
log level.

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 arch/x86/kernel/acpi/boot.c | 106 
 arch/x86/kernel/acpi/cstate.c   |   5 +-
 arch/x86/kernel/acpi/sleep.c|   2 +-
 arch/x86/kernel/alternative.c   |   8 +--
 arch/x86/kernel/amd_gart_64.c   |  13 ++---
 arch/x86/kernel/apb_timer.c |  23 
 arch/x86/kernel/apic/apic.c |  44 +++
 arch/x86/kernel/apic/apic_flat_64.c |   4 +-
 arch/x86/kernel/apic/apic_noop.c|   2 +-
 arch/x86/kernel/apic/bigsmp_32.c|   5 +-
 arch/x86/kernel/apic/io_apic.c  |  95 
 arch/x86/kernel/apic/probe_32.c |   9 ++-
 arch/x86/kernel/apic/x2apic_phys.c  |   2 +-
 arch/x86/kernel/apic/x2apic_uv_x.c  |   3 +-
 arch/x86/kernel/apm_32.c|  79 +--
 arch/x86/kernel/bootflag.c  |   4 +-
 arch/x86/kernel/check.c |  11 ++--
 arch/x86/kernel/cpuid.c |   2 +-
 arch/x86/kernel/crash_dump_32.c |   6 +-
 arch/x86/kernel/devicetree.c|   6 +-
 arch/x86/kernel/doublefault.c   |  16 +++---
 arch/x86/kernel/dumpstack.c |   4 +-
 arch/x86/kernel/e820.c  |  66 +++---
 arch/x86/kernel/early-quirks.c  |  44 +++
 arch/x86/kernel/early_printk.c  |   4 +-
 arch/x86/kernel/fpu/init.c  |   3 +-
 arch/x86/kernel/fpu/xstate.c|   3 +-
 arch/x86/kernel/hpet.c  |  32 +--
 arch/x86/kernel/i8259.c |   5 +-
 arch/x86/kernel/irq_32.c|   6 +-
 arch/x86/kernel/jump_label.c|   4 +-
 arch/x86/kernel/kgdb.c  |   8 +--
 arch/x86/kernel/kprobes/core.c  |  11 ++--
 arch/x86/kernel/kvm.c   |  12 ++--
 arch/x86/kernel/kvmclock.c  |  10 ++--
 arch/x86/kernel/mmconf-fam10h_64.c  |   2 +-
 arch/x86/kernel/module.c|   4 +-
 arch/x86/kernel/nmi_selftest.c  |  12 ++--
 arch/x86/kernel/paravirt.c  |   2 +-
 arch/x86/kernel/pci-calgary_64.c|  84 ++--
 arch/x86/kernel/pci-iommu_table.c   |   4 +-
 arch/x86/kernel/pci-nommu.c |   3 +-
 arch/x86/kernel/pci-swiotlb.c   |   4 +-
 arch/x86/kernel/quirks.c|  48 
 arch/x86/kernel/rtc.c   |   7 +--
 arch/x86/kernel/setup.c |  22 
 arch/x86/kernel/setup_percpu.c  |   2 +-
 arch/x86/kernel/smpboot.c   |   6 +-
 arch/x86/kernel/sysfb_efi.c |   8 +--
 arch/x86/kernel/sysfb_simplefb.c|   2 +-
 arch/x86/kernel/tboot.c |  16 +++---
 arch/x86/kernel/tce_64.c|   6 +-
 arch/x86/kernel/test_nx.c   |  16 +++---
 arch/x86/kernel/test_rodata.c   |  10 ++--
 arch/x86/kernel/tsc_sync.c  |   6 +-
 arch/x86/kernel/vsmp_64.c   |   2 +-
 56 files changed, 444 insertions(+), 479 deletions(-)

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e759076..1ba8328 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -141,14 +141,14 @@ static int __init acpi_parse_madt(struct 
acpi_table_header *table)
 
madt = (struct acpi_table_madt *)table;
if (!madt) {
-   printk(KERN_WARNING PREFIX "Unable to map MADT\n");
+   pr_warn(PREFIX "Unable to map MADT\n");
return -ENODEV;
}
 
if (madt->address) {
acpi_lapic_addr = (u64) madt->address;
 
-   printk(KERN_DEBUG PREFIX "Local APIC address 0x%08x\n",
+   pr_debug(PREFIX "Local APIC address 0x%08x\n",
   madt->address);
}
 
@@ -170,7 +170,7 @@ static int acpi_register_lapic(int id, u8 enabled)
unsigned int ver = 0;
 
if (id >= MAX_LOCAL_APIC) {
-   printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+   pr_info(PREFIX "skipped apicid that is too big\n");
return -EINVAL;
}
 
@@ -210,11 +210,11 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, 
const unsigned long end)
 * when we use CPU hotplug.
 */
if (!apic->apic_id_valid(apic_id) && enabled)
-   printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+   pr_warn(PREFIX "x2apic entry ignored\n");
else
acpi_register_lapic(apic_id, enabled);
 #else
-   printk(KERN_WARNING PREFIX "x2apic entry ignored\n");
+   pr_warn(PREFIX "x2apic entry ignored\n");
 #endif
 
re

[PATCH 1/2] printk: introduce pr_default() macro

2016-02-05 Thread Chen Yucong

Until now, we cover all log-levels by pr_  macros except
KERN_DEFAULT one. Add it for convenience.

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 include/linux/printk.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 51dd6b8..9808130 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -274,6 +274,8 @@ extern asmlinkage void dump_stack(void) __cold;
  */
 #define pr_cont(fmt, ...) \
printk(KERN_CONT fmt, ##__VA_ARGS__)
+#define pr_default(fmt, ...) \
+   printk(KERN_DEFAULT pr_fmt(fmt), ##__VA_ARGS__)
 
 /* pr_devel() should produce zero code unless DEBUG is defined */
 #ifdef DEBUG
@@ -345,6 +347,8 @@ extern asmlinkage void dump_stack(void) __cold;
printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_cont_once(fmt, ...) \
printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_default_once(fmt, ...)  \
+   printk_once(KERN_DEFAULT pr_fmt(fmt), ##__VA_ARGS__)
 
 #if defined(DEBUG)
 #define pr_devel_once(fmt, ...)\
@@ -396,6 +400,8 @@ extern asmlinkage void dump_stack(void) __cold;
printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_info_ratelimited(fmt, ...)  \
printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_default_ratelimited(fmt, ...)   \
+   printk_ratelimited(KERN_DEFAULT pr_fmt(fmt), ##__VA_ARGS__)
 /* no pr_cont_ratelimited, don't do that... */
 
 #if defined(DEBUG)
-- 
1.8.3.1

[PATCH 2/2] x86: use pr_default() macro

2016-02-05 Thread Chen Yucong

 - convert printk(KERN_DEFAULT ...) to pr_default(...)

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 arch/x86/kernel/dumpstack.c|  3 +--
 arch/x86/kernel/dumpstack_64.c |  4 ++--
 arch/x86/kernel/process_32.c   | 29 ++---
 arch/x86/kernel/process_64.c   | 41 -
 arch/x86/mm/fault.c|  2 +-
 5 files changed, 38 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 993706a..1ae9a6d 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -257,8 +257,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
unsigned short ss;
unsigned long sp;
 #endif
-   printk(KERN_DEFAULT
-  "%s: %04lx [#%d] ", str, err & 0x, ++die_counter);
+   pr_default("%s: %04lx [#%d] ", str, err & 0x, ++die_counter);
 #ifdef CONFIG_PREEMPT
printk("PREEMPT ");
 #endif
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5f1c626..8e6c739 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -316,11 +316,11 @@ void show_regs(struct pt_regs *regs)
unsigned char c;
u8 *ip;
 
-   printk(KERN_DEFAULT "Stack:\n");
+   pr_default("Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
   0, KERN_DEFAULT);
 
-   printk(KERN_DEFAULT "Code: ");
+   pr_default("Code: ");
 
ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f95091..4039da4 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -83,17 +83,17 @@ void __show_regs(struct pt_regs *regs, int all)
savesegment(gs, gs);
}
 
-   printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-   (u16)regs->cs, regs->ip, regs->flags,
-   smp_processor_id());
+   pr_default("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+  (u16)regs->cs, regs->ip, regs->flags,
+  smp_processor_id());
print_symbol("EIP is at %s\n", regs->ip);
 
-   printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
-   regs->ax, regs->bx, regs->cx, regs->dx);
-   printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
-   regs->si, regs->di, regs->bp, sp);
-   printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
-  (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
+   pr_default("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+  regs->ax, regs->bx, regs->cx, regs->dx);
+   pr_default("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+  regs->si, regs->di, regs->bp, sp);
+   pr_default(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+  (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
 
if (!all)
return;
@@ -102,8 +102,8 @@ void __show_regs(struct pt_regs *regs, int all)
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = __read_cr4_safe();
-   printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
-   cr0, cr2, cr3, cr4);
+   pr_default("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+  cr0, cr2, cr3, cr4);
 
get_debugreg(d0, 0);
get_debugreg(d1, 1);
@@ -117,10 +117,9 @@ void __show_regs(struct pt_regs *regs, int all)
(d6 == DR6_RESERVED) && (d7 == 0x400))
return;
 
-   printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
-   d0, d1, d2, d3);
-   printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
-   d6, d7);
+   pr_default("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+  d0, d1, d2, d3);
+   pr_default("DR6: %08lx DR7: %08lx\n", d6, d7);
 }
 
 void release_thread(struct task_struct *dead_task)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 71a18a2..37ff6be 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -61,20 +61,20 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
 
-   printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0

[tip:x86/cpu] x86/cpu: Convert printk(KERN_ ...) to pr_< level>(...)

2016-02-03 Thread tip-bot for Chen Yucong

Commit-ID:  1b74dde7c47c19a73ea3e9fac95ac27b5d3d50c5
Gitweb: http://git.kernel.org/tip/1b74dde7c47c19a73ea3e9fac95ac27b5d3d50c5
Author: Chen Yucong 
AuthorDate: Tue, 2 Feb 2016 11:45:02 +0800
Committer:  Ingo Molnar 
CommitDate: Wed, 3 Feb 2016 10:30:03 +0100

x86/cpu: Convert printk(KERN_ ...) to pr_(...)

 - Use the more current logging style pr_(...) instead of the old
   printk(KERN_ ...).

 - Convert pr_warning() to pr_warn().

Signed-off-by: Chen Yucong 
Cc: Borislav Petkov 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: 
http://lkml.kernel.org/r/1454384702-21707-1-git-send-email-sla...@gmail.com
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/cpu/amd.c   | 23 +++
 arch/x86/kernel/cpu/bugs_64.c   |  2 +-
 arch/x86/kernel/cpu/centaur.c   | 10 +++
 arch/x86/kernel/cpu/common.c| 42 +--
 arch/x86/kernel/cpu/cyrix.c | 10 +++
 arch/x86/kernel/cpu/hypervisor.c|  2 +-
 arch/x86/kernel/cpu/intel.c | 10 +++
 arch/x86/kernel/cpu/intel_cacheinfo.c   |  2 +-
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 15 +-
 arch/x86/kernel/cpu/mcheck/p5.c | 18 +---
 arch/x86/kernel/cpu/mcheck/therm_throt.c| 15 +-
 arch/x86/kernel/cpu/mcheck/threshold.c  |  4 +--
 arch/x86/kernel/cpu/mcheck/winchip.c|  5 ++--
 arch/x86/kernel/cpu/microcode/amd.c |  2 +-
 arch/x86/kernel/cpu/mshyperv.c  |  8 +++---
 arch/x86/kernel/cpu/mtrr/centaur.c  |  2 +-
 arch/x86/kernel/cpu/mtrr/cleanup.c  | 44 ++---
 arch/x86/kernel/cpu/mtrr/generic.c  | 23 ---
 arch/x86/kernel/cpu/mtrr/main.c | 20 ++---
 arch/x86/kernel/cpu/perf_event.c|  9 +++---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c| 10 +++
 arch/x86/kernel/cpu/perf_event_amd_uncore.c |  4 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c   |  6 ++--
 arch/x86/kernel/cpu/rdrand.c|  2 +-
 arch/x86/kernel/cpu/topology.c  |  4 +--
 arch/x86/kernel/cpu/transmeta.c |  8 +++---
 arch/x86/kernel/cpu/vmware.c|  5 ++--
 27 files changed, 146 insertions(+), 159 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a..97c59fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
 
-   printk(KERN_INFO "AMD K6 stepping B detected - ");
+   pr_info("AMD K6 stepping B detected - ");
 
/*
 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
 
if (d > 20*K6_BUG_LOOP)
-   printk(KERN_CONT
-   "system stability may be impaired when more 
than 32 MB are used.\n");
+   pr_cont("system stability may be impaired when more 
than 32 MB are used.\n");
else
-   printk(KERN_CONT "probably OK (after B9730).\n");
+   pr_cont("probably OK (after B9730).\n");
}
 
/* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling old style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling old style K6 write allocation for %d 
Mb\n",
mbytes);
}
return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling new style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling new style K6 write allocation for %d 
Mb\n",
mbytes);
}
 
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 */
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
-   printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+   pr_info("Enabling disabled K7/SSE Support.\n");
msr_clear_bit(MSR_K7_HWCR, 15);
set_cpu_cap(c, X86_FEATURE_XMM);

[tip:x86/cpu] x86/cpu: Convert printk(KERN_ ...) to pr_< level>(...)

2016-02-03 Thread tip-bot for Chen Yucong

Commit-ID:  1b74dde7c47c19a73ea3e9fac95ac27b5d3d50c5
Gitweb: http://git.kernel.org/tip/1b74dde7c47c19a73ea3e9fac95ac27b5d3d50c5
Author: Chen Yucong <sla...@gmail.com>
AuthorDate: Tue, 2 Feb 2016 11:45:02 +0800
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 3 Feb 2016 10:30:03 +0100

x86/cpu: Convert printk(KERN_ ...) to pr_(...)

 - Use the more current logging style pr_(...) instead of the old
   printk(KERN_ ...).

 - Convert pr_warning() to pr_warn().

Signed-off-by: Chen Yucong <sla...@gmail.com>
Cc: Borislav Petkov <b...@alien8.de>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Link: 
http://lkml.kernel.org/r/1454384702-21707-1-git-send-email-sla...@gmail.com
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 arch/x86/kernel/cpu/amd.c   | 23 +++
 arch/x86/kernel/cpu/bugs_64.c   |  2 +-
 arch/x86/kernel/cpu/centaur.c   | 10 +++
 arch/x86/kernel/cpu/common.c| 42 +--
 arch/x86/kernel/cpu/cyrix.c | 10 +++
 arch/x86/kernel/cpu/hypervisor.c|  2 +-
 arch/x86/kernel/cpu/intel.c | 10 +++
 arch/x86/kernel/cpu/intel_cacheinfo.c   |  2 +-
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 15 +-
 arch/x86/kernel/cpu/mcheck/p5.c | 18 +---
 arch/x86/kernel/cpu/mcheck/therm_throt.c| 15 +-
 arch/x86/kernel/cpu/mcheck/threshold.c  |  4 +--
 arch/x86/kernel/cpu/mcheck/winchip.c|  5 ++--
 arch/x86/kernel/cpu/microcode/amd.c |  2 +-
 arch/x86/kernel/cpu/mshyperv.c  |  8 +++---
 arch/x86/kernel/cpu/mtrr/centaur.c  |  2 +-
 arch/x86/kernel/cpu/mtrr/cleanup.c  | 44 ++---
 arch/x86/kernel/cpu/mtrr/generic.c  | 23 ---
 arch/x86/kernel/cpu/mtrr/main.c | 20 ++---
 arch/x86/kernel/cpu/perf_event.c|  9 +++---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c| 10 +++
 arch/x86/kernel/cpu/perf_event_amd_uncore.c |  4 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c   |  6 ++--
 arch/x86/kernel/cpu/rdrand.c|  2 +-
 arch/x86/kernel/cpu/topology.c  |  4 +--
 arch/x86/kernel/cpu/transmeta.c |  8 +++---
 arch/x86/kernel/cpu/vmware.c|  5 ++--
 27 files changed, 146 insertions(+), 159 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a..97c59fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
 
-   printk(KERN_INFO "AMD K6 stepping B detected - ");
+   pr_info("AMD K6 stepping B detected - ");
 
/*
 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
 
if (d > 20*K6_BUG_LOOP)
-   printk(KERN_CONT
-   "system stability may be impaired when more 
than 32 MB are used.\n");
+   pr_cont("system stability may be impaired when more 
than 32 MB are used.\n");
else
-   printk(KERN_CONT "probably OK (after B9730).\n");
+   pr_cont("probably OK (after B9730).\n");
}
 
/* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling old style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling old style K6 write allocation for %d 
Mb\n",
mbytes);
}
return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling new style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling new style K6 write allocation for %d 
Mb\n",
mbytes);
}
 
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 */
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
-   printk(KERN_INFO "Enabling disabled K7/SSE Support.\n&quo

[PATCH] arch/x86/kernel/cpu: Convert printk(KERN_ ...) to pr_(...)

2016-02-01 Thread Chen Yucong

 - Use the more current logging style pr_(...) instead of the old
   printk(KERN_ ...).
 - Convert pr_warning() to pr_warn().

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/amd.c   | 23 +++
 arch/x86/kernel/cpu/bugs_64.c   |  2 +-
 arch/x86/kernel/cpu/centaur.c   | 10 +++
 arch/x86/kernel/cpu/common.c| 42 +--
 arch/x86/kernel/cpu/cyrix.c | 10 +++
 arch/x86/kernel/cpu/hypervisor.c|  2 +-
 arch/x86/kernel/cpu/intel.c | 10 +++
 arch/x86/kernel/cpu/intel_cacheinfo.c   |  2 +-
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 15 +-
 arch/x86/kernel/cpu/mcheck/p5.c | 18 +---
 arch/x86/kernel/cpu/mcheck/therm_throt.c| 15 +-
 arch/x86/kernel/cpu/mcheck/threshold.c  |  4 +--
 arch/x86/kernel/cpu/mcheck/winchip.c|  5 ++--
 arch/x86/kernel/cpu/microcode/amd.c |  2 +-
 arch/x86/kernel/cpu/mshyperv.c  |  8 +++---
 arch/x86/kernel/cpu/mtrr/centaur.c  |  2 +-
 arch/x86/kernel/cpu/mtrr/cleanup.c  | 44 ++---
 arch/x86/kernel/cpu/mtrr/generic.c  | 23 ---
 arch/x86/kernel/cpu/mtrr/main.c | 20 ++---
 arch/x86/kernel/cpu/perf_event.c|  9 +++---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c| 10 +++
 arch/x86/kernel/cpu/perf_event_amd_uncore.c |  4 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c   |  6 ++--
 arch/x86/kernel/cpu/rdrand.c|  2 +-
 arch/x86/kernel/cpu/topology.c  |  4 +--
 arch/x86/kernel/cpu/transmeta.c |  8 +++---
 arch/x86/kernel/cpu/vmware.c|  5 ++--
 27 files changed, 146 insertions(+), 159 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a..97c59fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
 
-   printk(KERN_INFO "AMD K6 stepping B detected - ");
+   pr_info("AMD K6 stepping B detected - ");
 
/*
 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
 
if (d > 20*K6_BUG_LOOP)
-   printk(KERN_CONT
-   "system stability may be impaired when more 
than 32 MB are used.\n");
+   pr_cont("system stability may be impaired when more 
than 32 MB are used.\n");
else
-   printk(KERN_CONT "probably OK (after B9730).\n");
+   pr_cont("probably OK (after B9730).\n");
}
 
/* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling old style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling old style K6 write allocation for %d 
Mb\n",
mbytes);
}
return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling new style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling new style K6 write allocation for %d 
Mb\n",
mbytes);
}
 
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 */
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
-   printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+   pr_info("Enabling disabled K7/SSE Support.\n");
msr_clear_bit(MSR_K7_HWCR, 15);
set_cpu_cap(c, X86_FEATURE_XMM);
}
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff0) != 0x2000) {
-   printk(KERN_INFO
-   "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
-   l, ((l & 0x000f)|0x2000));
+   pr_info("CPU:

[PATCH] arch/x86/kernel/cpu: Convert printk(KERN_ ...) to pr_(...)

2016-02-01 Thread Chen Yucong

 - Use the more current logging style pr_(...) instead of the old
   printk(KERN_ ...).
 - Convert pr_warning() to pr_warn().

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 arch/x86/kernel/cpu/amd.c   | 23 +++
 arch/x86/kernel/cpu/bugs_64.c   |  2 +-
 arch/x86/kernel/cpu/centaur.c   | 10 +++
 arch/x86/kernel/cpu/common.c| 42 +--
 arch/x86/kernel/cpu/cyrix.c | 10 +++
 arch/x86/kernel/cpu/hypervisor.c|  2 +-
 arch/x86/kernel/cpu/intel.c | 10 +++
 arch/x86/kernel/cpu/intel_cacheinfo.c   |  2 +-
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 15 +-
 arch/x86/kernel/cpu/mcheck/p5.c | 18 +---
 arch/x86/kernel/cpu/mcheck/therm_throt.c| 15 +-
 arch/x86/kernel/cpu/mcheck/threshold.c  |  4 +--
 arch/x86/kernel/cpu/mcheck/winchip.c|  5 ++--
 arch/x86/kernel/cpu/microcode/amd.c |  2 +-
 arch/x86/kernel/cpu/mshyperv.c  |  8 +++---
 arch/x86/kernel/cpu/mtrr/centaur.c  |  2 +-
 arch/x86/kernel/cpu/mtrr/cleanup.c  | 44 ++---
 arch/x86/kernel/cpu/mtrr/generic.c  | 23 ---
 arch/x86/kernel/cpu/mtrr/main.c | 20 ++---
 arch/x86/kernel/cpu/perf_event.c|  9 +++---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c| 10 +++
 arch/x86/kernel/cpu/perf_event_amd_uncore.c |  4 +--
 arch/x86/kernel/cpu/perf_event_intel_ds.c   |  6 ++--
 arch/x86/kernel/cpu/rdrand.c|  2 +-
 arch/x86/kernel/cpu/topology.c  |  4 +--
 arch/x86/kernel/cpu/transmeta.c |  8 +++---
 arch/x86/kernel/cpu/vmware.c|  5 ++--
 27 files changed, 146 insertions(+), 159 deletions(-)

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a07956a..97c59fd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -117,7 +117,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
void (*f_vide)(void);
u64 d, d2;
 
-   printk(KERN_INFO "AMD K6 stepping B detected - ");
+   pr_info("AMD K6 stepping B detected - ");
 
/*
 * It looks like AMD fixed the 2.6.2 bug and improved indirect
@@ -133,10 +133,9 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
d = d2-d;
 
if (d > 20*K6_BUG_LOOP)
-   printk(KERN_CONT
-   "system stability may be impaired when more 
than 32 MB are used.\n");
+   pr_cont("system stability may be impaired when more 
than 32 MB are used.\n");
else
-   printk(KERN_CONT "probably OK (after B9730).\n");
+   pr_cont("probably OK (after B9730).\n");
}
 
/* K6 with old style WHCR */
@@ -154,7 +153,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling old style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling old style K6 write allocation for %d 
Mb\n",
mbytes);
}
return;
@@ -175,7 +174,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
wbinvd();
wrmsr(MSR_K6_WHCR, l, h);
local_irq_restore(flags);
-   printk(KERN_INFO "Enabling new style K6 write 
allocation for %d Mb\n",
+   pr_info("Enabling new style K6 write allocation for %d 
Mb\n",
mbytes);
}
 
@@ -202,7 +201,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
 */
if (c->x86_model >= 6 && c->x86_model <= 10) {
if (!cpu_has(c, X86_FEATURE_XMM)) {
-   printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
+   pr_info("Enabling disabled K7/SSE Support.\n");
msr_clear_bit(MSR_K7_HWCR, 15);
set_cpu_cap(c, X86_FEATURE_XMM);
}
@@ -216,9 +215,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
rdmsr(MSR_K7_CLK_CTL, l, h);
if ((l & 0xfff0) != 0x2000) {
-   printk(KERN_INFO
-   "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
-   l, ((l & 0x000f)|0x2000));
+   pr_info(&quo

[PATCH] mcheck/threshold: use pr_err rather than printk(KERN_ERR ...)

2016-01-31 Thread Chen Yucong

Use the current logging styles. And convert printk(KERN_ERR ...)
to pr_err(...) for threshold.c.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/threshold.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c 
b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..fcf9ae9 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -12,8 +12,8 @@
 
 static void default_threshold_interrupt(void)
 {
-   printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
-THRESHOLD_APIC_VECTOR);
+   pr_err("Unexpected threshold interrupt at vector %x\n",
+   THRESHOLD_APIC_VECTOR);
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-- 
1.8.3.1

[PATCH] mcheck/threshold: use pr_err rather than printk(KERN_ERR ...)

2016-01-31 Thread Chen Yucong

Use the current logging styles. And convert printk(KERN_ERR ...)
to pr_err(...) for threshold.c.

Signed-off-by: Chen Yucong <sla...@gmail.com>
---
 arch/x86/kernel/cpu/mcheck/threshold.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c 
b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..fcf9ae9 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -12,8 +12,8 @@
 
 static void default_threshold_interrupt(void)
 {
-   printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
-THRESHOLD_APIC_VECTOR);
+   pr_err("Unexpected threshold interrupt at vector %x\n",
+   THRESHOLD_APIC_VECTOR);
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
-- 
1.8.3.1

[PATCH] mce-inject: use pr_err() instead of printk(KERN_ERR, ...)

2016-01-30 Thread Chen Yucong

Signed-off-by: Chen Yucong 

Converted printk(KERN_ERR to pr_err.
pr_ format is more compact and enable utilizing of pr_fmt macro.
---
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c 
b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index c0b0d77..517619e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
start = jiffies;
while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
-   printk(KERN_ERR
-   "Timeout waiting for mce inject %lx\n",
+   pr_err("Timeout waiting for mce inject %lx\n",
*cpumask_bits(mce_inject_cpumask));
break;
}
-- 
1.8.3.1

[PATCH] mce-inject: use pr_err() instead of printk(KERN_ERR, ...)

2016-01-30 Thread Chen Yucong

Signed-off-by: Chen Yucong <sla...@gmail.com>

Converted printk(KERN_ERR to pr_err.
pr_ format is more compact and enable utilizing of pr_fmt macro.
---
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c 
b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index c0b0d77..517619e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -183,8 +183,7 @@ static void raise_mce(struct mce *m)
start = jiffies;
while (!cpumask_empty(mce_inject_cpumask)) {
if (!time_before(jiffies, start + 2*HZ)) {
-   printk(KERN_ERR
-   "Timeout waiting for mce inject %lx\n",
+   pr_err("Timeout waiting for mce inject %lx\n",
*cpumask_bits(mce_inject_cpumask));
break;
}
-- 
1.8.3.1

[PATCH] x86/hw-breakpoints: eliminate a compiler warning for hw_breakpoint.c

2014-12-28 Thread Chen Yucong

There is a warning message when we compile the linux-next tree.

arch/x86/kernel/hw_breakpoint.c: In function ‘arch_validate_hwbkpt_settings’:
arch/x86/kernel/hw_breakpoint.c:329:20: warning: ‘align’ may be used 
uninitialized in this function [-Wuninitialized]

This patch aims to eliminate the above compiler warning.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/hw_breakpoint.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 7114ba2..302eab3 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -293,7 +293,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 int arch_validate_hwbkpt_settings(struct perf_event *bp)
 {
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-   unsigned int align;
+   unsigned int align = ~0x0U;
int ret;
 
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86, mce: use mce_usable_address() for UCNA memory error recovery

2014-12-28 Thread Chen Yucong

A machine-check address register (MCi_ADDR) that the processor uses
to report the address or location associated with the logged error.
The address field can hold a virtual (linear) address, a physical
address, or a value indicating an internal physical location, depending
on the type of error. For further information, see the documentation
for particular implementations of the architecture.
   -- AMD64 APM Volume 2

The IA32_MCi_ADDR MSR contains the address of the code or data memory
location that produced the machine-check error. The IA32_MCi_ADDR
register is either not implemented or contains no address if the ADDRV
flag in the IA32_MCi_STATUS register is clear. The address returned is
an offset into a segment, linear address, physical address, or memory
address. This depends on the error encountered.
   -- Intel SDM Volume 3B

As the comment of `mce_usable_address' suggests, we should check if the
address reported by the CPU is in a format we can parse. This patch aims
to use mce_usable_address() for UCNA/Deferred memory error recovery. For
Intel x86_64 platform mce_usable_address() can work fine, but it doesn't
even matter for AMD platform.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce.c |   48 --
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 800d423..c777626 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -607,6 +607,35 @@ static bool memory_error(struct mce *m)
return false;
 }
 
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses up to page granuality for now.
+ */
+static int mce_usable_address(struct mce *m)
+{
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   if (c->x86_vendor == X86_VENDOR_INTEL) {
+   if (!(m->status & MCI_STATUS_MISCV) ||
+   !(m->status & MCI_STATUS_ADDRV))
+   return 0;
+   if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
+   return 0;
+   if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
+   return 0;
+   return 1;
+   } else if (c->x86_vendor == X86_VENDOR_AMD) {
+   /*
+* coming soon
+*/
+   return 0;
+   }
+
+   return 0;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -671,7 +700,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 * do not add it into the ring buffer.
 */
if (severity == MCE_DEFERRED_SEVERITY && memory_error()) {
-   if (m.status & MCI_STATUS_ADDRV) {
+   if (mce_usable_address()) {
mce_ring_add(m.addr >> PAGE_SHIFT);
mce_schedule_work();
}
@@ -976,23 +1005,6 @@ reset:
return ret;
 }
 
-/*
- * Check if the address reported by the CPU is in a format we can parse.
- * It would be possible to add code for most other cases, but all would
- * be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses up to page granuality for now.
- */
-static int mce_usable_address(struct mce *m)
-{
-   if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV))
-   return 0;
-   if (MCI_MISC_ADDR_LSB(m->misc) > PAGE_SHIFT)
-   return 0;
-   if (MCI_MISC_ADDR_MODE(m->misc) != MCI_MISC_ADDR_PHYS)
-   return 0;
-   return 1;
-}
-
 static void mce_clear_state(unsigned long *toclear)
 {
int i;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86, mce: use mce_usable_address() for UCNA memory error recovery

2014-12-28 Thread Chen Yucong

A machine-check address register (MCi_ADDR) that the processor uses
to report the address or location associated with the logged error.
The address field can hold a virtual (linear) address, a physical
address, or a value indicating an internal physical location, depending
on the type of error. For further information, see the documentation
for particular implementations of the architecture.
   -- AMD64 APM Volume 2

The IA32_MCi_ADDR MSR contains the address of the code or data memory
location that produced the machine-check error. The IA32_MCi_ADDR
register is either not implemented or contains no address if the ADDRV
flag in the IA32_MCi_STATUS register is clear. The address returned is
an offset into a segment, linear address, physical address, or memory
address. This depends on the error encountered.
   -- Intel SDM Volume 3B

As the comment of `mce_usable_address' suggests, we should check if the
address reported by the CPU is in a format we can parse. This patch aims
to use mce_usable_address() for UCNA/Deferred memory error recovery. For
Intel x86_64 platform mce_usable_address() can work fine, but it doesn't
even matter for AMD platform.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce.c |   48 --
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 800d423..c777626 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -607,6 +607,35 @@ static bool memory_error(struct mce *m)
return false;
 }
 
+/*
+ * Check if the address reported by the CPU is in a format we can parse.
+ * It would be possible to add code for most other cases, but all would
+ * be somewhat complicated (e.g. segment offset would require an instruction
+ * parser). So only support physical addresses up to page granuality for now.
+ */
+static int mce_usable_address(struct mce *m)
+{
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   if (c-x86_vendor == X86_VENDOR_INTEL) {
+   if (!(m-status  MCI_STATUS_MISCV) ||
+   !(m-status  MCI_STATUS_ADDRV))
+   return 0;
+   if (MCI_MISC_ADDR_LSB(m-misc)  PAGE_SHIFT)
+   return 0;
+   if (MCI_MISC_ADDR_MODE(m-misc) != MCI_MISC_ADDR_PHYS)
+   return 0;
+   return 1;
+   } else if (c-x86_vendor == X86_VENDOR_AMD) {
+   /*
+* coming soon
+*/
+   return 0;
+   }
+
+   return 0;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -671,7 +700,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 * do not add it into the ring buffer.
 */
if (severity == MCE_DEFERRED_SEVERITY  memory_error(m)) {
-   if (m.status  MCI_STATUS_ADDRV) {
+   if (mce_usable_address(m)) {
mce_ring_add(m.addr  PAGE_SHIFT);
mce_schedule_work();
}
@@ -976,23 +1005,6 @@ reset:
return ret;
 }
 
-/*
- * Check if the address reported by the CPU is in a format we can parse.
- * It would be possible to add code for most other cases, but all would
- * be somewhat complicated (e.g. segment offset would require an instruction
- * parser). So only support physical addresses up to page granuality for now.
- */
-static int mce_usable_address(struct mce *m)
-{
-   if (!(m-status  MCI_STATUS_MISCV) || !(m-status  MCI_STATUS_ADDRV))
-   return 0;
-   if (MCI_MISC_ADDR_LSB(m-misc)  PAGE_SHIFT)
-   return 0;
-   if (MCI_MISC_ADDR_MODE(m-misc) != MCI_MISC_ADDR_PHYS)
-   return 0;
-   return 1;
-}
-
 static void mce_clear_state(unsigned long *toclear)
 {
int i;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86/hw-breakpoints: eliminate a compiler warning for hw_breakpoint.c

2014-12-28 Thread Chen Yucong

There is a warning message when we compile the linux-next tree.

arch/x86/kernel/hw_breakpoint.c: In function ‘arch_validate_hwbkpt_settings’:
arch/x86/kernel/hw_breakpoint.c:329:20: warning: ‘align’ may be used 
uninitialized in this function [-Wuninitialized]

This patch aims to eliminate the above compiler warning.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/hw_breakpoint.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 7114ba2..302eab3 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -293,7 +293,7 @@ static int arch_build_bp_info(struct perf_event *bp)
 int arch_validate_hwbkpt_settings(struct perf_event *bp)
 {
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-   unsigned int align;
+   unsigned int align = ~0x0U;
int ret;
 
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-17 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a uncorrected recoverable
machine check error that is not signaled via a machine check exception
and, instead, is reported to system software as a corrected machine
check error. UCNA errors indicate that some data in the system is
corrupted, but the data has not been consumed and the processor state
is valid and you may continue execution on this processor. UCNA errors
require no action from system software to continue execution. Note that
UCNA errors are supported by the processor only when IA32_MCG_CAP[24]
(MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- AMD64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Acked-by: Borislav Petkov 
Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce.c |   46 ++
 1 file changed, 46 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..cfb16f6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,37 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool memory_error(struct mce *m)
+{
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   if (c->x86_vendor == X86_VENDOR_AMD) {
+   /*
+* coming soon
+*/
+   return false;
+   } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the "filter" bit.
+*/
+   return (m->status & 0xef80) == BIT(7) ||
+  (m->status & 0xef00) == BIT(8) ||
+  (m->status & 0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -595,6 +626,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
struct mce m;
+   int severity;
int i;
 
this_cpu_inc(mce_poll_count);
@@ -630,6 +662,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+   severity = mce_severity(, mca_cfg.tolerant, NULL, false);
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY && memory_error()) {
+   if (m.status & MCI_STATUS_ADDRV) {
+   mce_ring_add(m.addr >> PAGE_SHIFT);
+   mce_schedule_work();
+   }
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 0/2]RAS: add the support for handling UCNA/DEFERRED error

2014-11-17 Thread Chen Yucong

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

v2:
The first patch have also eliminated a big hack to make mce_severity() 
work when called from non-exception context on the advice of Tony and 
Boris.
  Link: https://lkml.org/lkml/2014/10/27/1017

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.

V3:
According to Boris, the second patch have also split `memory_error' 
from mem_deferred_error so that the memory_error() function can be used
in other code paths separately. 
  Link: https://lkml.org/lkml/2014/11/6/452

Boris also reported the warning about "MCI_STATUS_POISON" and  
"MCI_STATUS_POISON"
redefined.

V4:
Like MCIP/RIPV/EIPV bits, MCI_STATUS_EN is specific to "machine check 
exception".
As Tony suggested, the severity table entry for the "EN" check should have been
skipped when calling from the CMCI/Poll handler.
  Link: https://lkml.org/lkml/2014/11/11/765
  AMD APM Volume 2: 9.3.2 Error-Reporting Register Banks - MCi_STATUS

memory_error() is incomplete for AMD platform. Boris will try to have a
fix.
  Link: https://lkml.org/lkml/2014/11/10/720

thx!
cyc 

[PATCH v4 1/2] x86, mce, severity: extend the the mce_severity
[PATCH v4 2/2] x86, mce: support memory error recovery for both UCNA

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-11-17 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Reviewed-by: Aravind Gopalakrishnan 
Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |4 +++-
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   23 +--
 arch/x86/kernel/cpu/mcheck/mce.c  |   14 --
 drivers/edac/mce_amd.h|3 ---
 5 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..51b26e89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
charattrname[ATTR_LEN]; /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..8bb4330 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+   unsigned char excp;
unsigned char covered;
char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER  .context = IN_USER
 #define  SER   .ser = SER_REQUIRED
 #define  NOSER .ser = NO_SER
+#define  EXCP  .excp = EXCP_CONTEXT
+#define  NOEXCP.excp = NO_EXCP
 #define  BITCLR(x) .mask = x, .result = 0
 #define  BITSET(x) .mask = x, .result = x
 #define  MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -62,7 +66,7 @@ static struct severity {
),
MCESEV(
NO, "Not enabled",
-   BITCLR(MCI_STATUS_EN)
+   EXCP, BITCLR(MCI_STATUS_EN)
),
MCESEV(
PANIC, "Processor context corrupt",
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, "MCIP not set in MCA handler",
-   MCGMASK(MCG_STATUS_MCIP, 0)
+   EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover -> PANIC */
MCESEV(
PANIC, "Neither restart nor error IP",
-   MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+   EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, "In kernel and no restart IP",
-   KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   ),
+   MCESEV(
+   DEFERRED, "Deferred error",
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, "Corrected error",
@@ -89,7 +97,7 @@ s

[PATCH v4 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-17 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a uncorrected recoverable
machine check error that is not signaled via a machine check exception
and, instead, is reported to system software as a corrected machine
check error. UCNA errors indicate that some data in the system is
corrupted, but the data has not been consumed and the processor state
is valid and you may continue execution on this processor. UCNA errors
require no action from system software to continue execution. Note that
UCNA errors are supported by the processor only when IA32_MCG_CAP[24]
(MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- AMD64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Acked-by: Borislav Petkov b...@suse.de
Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce.c |   46 ++
 1 file changed, 46 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..cfb16f6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,37 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool memory_error(struct mce *m)
+{
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   if (c-x86_vendor == X86_VENDOR_AMD) {
+   /*
+* coming soon
+*/
+   return false;
+   } else if (c-x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the filter bit.
+*/
+   return (m-status  0xef80) == BIT(7) ||
+  (m-status  0xef00) == BIT(8) ||
+  (m-status  0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -595,6 +626,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
struct mce m;
+   int severity;
int i;
 
this_cpu_inc(mce_poll_count);
@@ -630,6 +662,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags  MCP_TIMESTAMP))
m.tsc = 0;
+
+   severity = mce_severity(m, mca_cfg.tolerant, NULL, false);
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY  memory_error(m)) {
+   if (m.status  MCI_STATUS_ADDRV) {
+   mce_ring_add(m.addr  PAGE_SHIFT);
+   mce_schedule_work();
+   }
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 0/2]RAS: add the support for handling UCNA/DEFERRED error

2014-11-17 Thread Chen Yucong

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

v2:
The first patch have also eliminated a big hack to make mce_severity() 
work when called from non-exception context on the advice of Tony and 
Boris.
  Link: https://lkml.org/lkml/2014/10/27/1017

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.

V3:
According to Boris, the second patch have also split `memory_error' 
from mem_deferred_error so that the memory_error() function can be used
in other code paths separately. 
  Link: https://lkml.org/lkml/2014/11/6/452

Boris also reported the warning about MCI_STATUS_POISON and  
MCI_STATUS_POISON
redefined.

V4:
Like MCIP/RIPV/EIPV bits, MCI_STATUS_EN is specific to machine check 
exception.
As Tony suggested, the severity table entry for the EN check should have been
skipped when calling from the CMCI/Poll handler.
  Link: https://lkml.org/lkml/2014/11/11/765
  AMD APM Volume 2: 9.3.2 Error-Reporting Register Banks - MCi_STATUS

memory_error() is incomplete for AMD platform. Boris will try to have a
fix.
  Link: https://lkml.org/lkml/2014/11/10/720

thx!
cyc 

[PATCH v4 1/2] x86, mce, severity: extend the the mce_severity
[PATCH v4 2/2] x86, mce: support memory error recovery for both UCNA

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v4 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-11-17 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Reviewed-by: Aravind Gopalakrishnan aravind.gopalakrish...@amd.com
Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |4 +++-
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   23 +--
 arch/x86/kernel/cpu/mcheck/mce.c  |   14 --
 drivers/edac/mce_amd.h|3 ---
 5 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..51b26e89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
charattrname[ATTR_LEN]; /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..8bb4330 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+   unsigned char excp;
unsigned char covered;
char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER  .context = IN_USER
 #define  SER   .ser = SER_REQUIRED
 #define  NOSER .ser = NO_SER
+#define  EXCP  .excp = EXCP_CONTEXT
+#define  NOEXCP.excp = NO_EXCP
 #define  BITCLR(x) .mask = x, .result = 0
 #define  BITSET(x) .mask = x, .result = x
 #define  MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -62,7 +66,7 @@ static struct severity {
),
MCESEV(
NO, Not enabled,
-   BITCLR(MCI_STATUS_EN)
+   EXCP, BITCLR(MCI_STATUS_EN)
),
MCESEV(
PANIC, Processor context corrupt,
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, MCIP not set in MCA handler,
-   MCGMASK(MCG_STATUS_MCIP, 0)
+   EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover - PANIC */
MCESEV(
PANIC, Neither restart nor error IP,
-   MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+   EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, In kernel and no restart IP,
-   KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   ),
+   MCESEV(
+   DEFERRED, Deferred error,
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, Corrected error,
@@ -89,7 +97,7 @@ static struct severity {
 
/* ignore OVER for UCNA */
MCESEV

Re: [PATCH v3 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-11-11 Thread Chen Yucong

On Tue, 2014-11-11 at 18:44 +, Luck, Tony wrote:
> >> The bank 7 error reported as severity 0 because EN=0 ... so we took no 
> >> action for it.
> >
> > How come EN is 0? Bank7 error reporting is not enabled? Why? Or the
> > error injection thing doesn't do it?
> 
> The "EN" bit is poorly named, and not well documented.  Here's a clip from 
> the SDM:
> 
> One of bullets in 15.10.4.1 Machine-Check Exception Handler for Error Recovery
> 
>  When the EN flag is zero but the VAL and UC flags are one in the
>  IA32_MCi_STATUS register, the reported uncorrected error in this bank
>  is not enabled. As uncorrected errors with the EN flag = 0 are not the
>  source of machine check exceptions, the MCE handler should log and clear
>  non-enabled errors when the S bit is set and should continue searching
>  for enabled errors from the other IA32_MCi_STATUS registers. Note that
>  when IA32_MCG_CAP [24] is 0, any uncorrected error condition (VAL =1
>  and UC=1) including the one with the EN flag cleared are fatal and the
>  handler must signal the operating system to reset the system. For the
>  errors that do not generate machine check exceptions, the EN flag has
>  no meaning. See Chapter 19: Table 19-15 to find the errors that do not
>  generate machine check exceptions.
> 
> Unfortunately the reference to chapter 19 is stale (that is now all about
> performance monitoring - I'll log a bug with the SDM editor to find the
> right reference and fix this).
> 
> What this is trying to say is that the "EN" bit is to enable signaling
> of machine checks - so it only has meaning when checking banks from the
> machine check handler.  Errors that are logged, but not signaled, or signaled
> as CMCI will have MCi_STATUS.EN=0
> 
> 
> >> The bank 3 error got past that hurdle, then through the next BIT(8) set 
> >> indicates a
> >> cache error. Fell at the last check because ADDRV=0.
> >
> > I guess you could tweak the injection path to write in a default address
> > so that that check gets bypassed...
> 
> I don't think this is an injection artifact. I think on this processor the 
> mid-level-cache
> just isn't providing an address in this case.  It doesn't help to make one up 
> - our whole
> game plan is to offline a page with a UC error - and we must have an address 
> to know
> which page to offline.
> 
> Perhaps the severity table entries for UCNA and DEFERRED errors should look 
> to see
> if ADDRV is set - if not, don't report this as UCNA/DEFERRED?
> 
We can also find the following snippet from AMD APM Volume 2:

9.3.2 Error-Reporting Register Banks - MCi_STATUS

EN—Bit 60. When set to 1, this bit indicates that the error condition is
enabled in the corresponding error-reporting control register (MCi_CTL).
Errors disabled by MCi_CTL do not cause a `machine-check exception'.

Just as what you said, the severity table entry for the "EN" check
should have been skipped when calling from the CMCI/Poll handler.
As shown below:

MCESEV(
NO, "Not enabled",
EXCP, BITCLR(MCI_STATUS_EN)
),

thx!
cyc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH v3 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-11-11 Thread Chen Yucong

On Tue, 2014-11-11 at 18:44 +, Luck, Tony wrote:
  The bank 7 error reported as severity 0 because EN=0 ... so we took no 
  action for it.
 
  How come EN is 0? Bank7 error reporting is not enabled? Why? Or the
  error injection thing doesn't do it?
 
 The EN bit is poorly named, and not well documented.  Here's a clip from 
 the SDM:
 
 One of bullets in 15.10.4.1 Machine-Check Exception Handler for Error Recovery
 
  When the EN flag is zero but the VAL and UC flags are one in the
  IA32_MCi_STATUS register, the reported uncorrected error in this bank
  is not enabled. As uncorrected errors with the EN flag = 0 are not the
  source of machine check exceptions, the MCE handler should log and clear
  non-enabled errors when the S bit is set and should continue searching
  for enabled errors from the other IA32_MCi_STATUS registers. Note that
  when IA32_MCG_CAP [24] is 0, any uncorrected error condition (VAL =1
  and UC=1) including the one with the EN flag cleared are fatal and the
  handler must signal the operating system to reset the system. For the
  errors that do not generate machine check exceptions, the EN flag has
  no meaning. See Chapter 19: Table 19-15 to find the errors that do not
  generate machine check exceptions.
 
 Unfortunately the reference to chapter 19 is stale (that is now all about
 performance monitoring - I'll log a bug with the SDM editor to find the
 right reference and fix this).
 
 What this is trying to say is that the EN bit is to enable signaling
 of machine checks - so it only has meaning when checking banks from the
 machine check handler.  Errors that are logged, but not signaled, or signaled
 as CMCI will have MCi_STATUS.EN=0
 
 
  The bank 3 error got past that hurdle, then through the next BIT(8) set 
  indicates a
  cache error. Fell at the last check because ADDRV=0.
 
  I guess you could tweak the injection path to write in a default address
  so that that check gets bypassed...
 
 I don't think this is an injection artifact. I think on this processor the 
 mid-level-cache
 just isn't providing an address in this case.  It doesn't help to make one up 
 - our whole
 game plan is to offline a page with a UC error - and we must have an address 
 to know
 which page to offline.
 
 Perhaps the severity table entries for UCNA and DEFERRED errors should look 
 to see
 if ADDRV is set - if not, don't report this as UCNA/DEFERRED?
 
We can also find the following snippet from AMD APM Volume 2:

9.3.2 Error-Reporting Register Banks - MCi_STATUS

EN—Bit 60. When set to 1, this bit indicates that the error condition is
enabled in the corresponding error-reporting control register (MCi_CTL).
Errors disabled by MCi_CTL do not cause a `machine-check exception'.

Just as what you said, the severity table entry for the EN check
should have been skipped when calling from the CMCI/Poll handler.
As shown below:

MCESEV(
NO, Not enabled,
EXCP, BITCLR(MCI_STATUS_EN)
),

thx!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-07 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a uncorrected recoverable
machine check error that is not signaled via a machine check exception
and, instead, is reported to system software as a corrected machine
check error. UCNA errors indicate that some data in the system is
corrupted, but the data has not been consumed and the processor state
is valid and you may continue execution on this processor. UCNA errors
require no action from system software to continue execution. Note that
UCNA errors are supported by the processor only when IA32_MCG_CAP[24]
(MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- AMD64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce.c |   50 ++
 1 file changed, 50 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..4b6e4cdf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,41 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool memory_error(struct mce *m)
+{
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   if (c->x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   return (m->status & 0xff00) == BIT(8);
+   } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the "filter" bit.
+*/
+   return (m->status & 0xef80) == BIT(7) ||
+  (m->status & 0xef00) == BIT(8) ||
+  (m->status & 0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -595,6 +630,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
struct mce m;
+   int severity;
int i;
 
this_cpu_inc(mce_poll_count);
@@ -630,6 +666,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+   severity = mce_severity(, mca_cfg.tolerant, NULL, false);
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY && memory_error()) {
+   if (m.status & MCI_STATUS_ADDRV) {
+   mce_ring_add(m.addr >> PAGE_SHIFT);
+   mce_schedule_work();
+   }
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vg

[PATCH v3 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-11-07 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |4 +++-
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   21 -
 arch/x86/kernel/cpu/mcheck/mce.c  |   14 --
 drivers/edac/mce_amd.h|3 ---
 5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..51b26e89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
charattrname[ATTR_LEN]; /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..c61feb3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+   unsigned char excp;
unsigned char covered;
char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER  .context = IN_USER
 #define  SER   .ser = SER_REQUIRED
 #define  NOSER .ser = NO_SER
+#define  EXCP  .excp = EXCP_CONTEXT
+#define  NOEXCP.excp = NO_EXCP
 #define  BITCLR(x) .mask = x, .result = 0
 #define  BITSET(x) .mask = x, .result = x
 #define  MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, "MCIP not set in MCA handler",
-   MCGMASK(MCG_STATUS_MCIP, 0)
+   EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover -> PANIC */
MCESEV(
PANIC, "Neither restart nor error IP",
-   MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+   EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, "In kernel and no restart IP",
-   KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   ),
+   MCESEV(
+   DEFERRED, "Deferred error",
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, "Corrected error",
@@ -89,7 +97,7 @@ static struct severity {
 
/* ignore OVER for UCNA */
MCESEV(
-   KEEP, "Uncorrected no action required",
+   UCNA, "Uncorrected no action required",
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
@@ -178,8 +186,9 @@ static int error_conte

[PATCH v3 0/2]RAS: add the support for handling UCNA/DEFERRED error

2014-11-07 Thread Chen Yucong

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

v2:
The first patch have also eliminated a big hack to make mce_severity() 
work when called from non-exception context on the advice of Tony and 
Boris.
  Link: https://lkml.org/lkml/2014/10/27/1017

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.

V3:
According to Boris, the second patch have also split `memory_error' 
from mem_deferred_error so that the memory_error() function can be used
in other code paths separately. 
  Link: https://lkml.org/lkml/2014/11/6/452

Boris also reported the warning about "MCI_STATUS_POISON" and  
"MCI_STATUS_POISON"
redefined.

thx!
cyc

[PATCH v3 1/2] x86, mce, severity: extend the the mce_severity
[PATCH v3 2/2] x86, mce: support memory error recovery for both UCNA
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 0/2]RAS: add the support for handling UCNA/DEFERRED error

2014-11-07 Thread Chen Yucong

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

v2:
The first patch have also eliminated a big hack to make mce_severity() 
work when called from non-exception context on the advice of Tony and 
Boris.
  Link: https://lkml.org/lkml/2014/10/27/1017

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.

V3:
According to Boris, the second patch have also split `memory_error' 
from mem_deferred_error so that the memory_error() function can be used
in other code paths separately. 
  Link: https://lkml.org/lkml/2014/11/6/452

Boris also reported the warning about MCI_STATUS_POISON and  
MCI_STATUS_POISON
redefined.

thx!
cyc

[PATCH v3 1/2] x86, mce, severity: extend the the mce_severity
[PATCH v3 2/2] x86, mce: support memory error recovery for both UCNA
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH v3 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-11-07 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |4 +++-
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   21 -
 arch/x86/kernel/cpu/mcheck/mce.c  |   14 --
 drivers/edac/mce_amd.h|3 ---
 5 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..51b26e89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
charattrname[ATTR_LEN]; /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..c61feb3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+   unsigned char excp;
unsigned char covered;
char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER  .context = IN_USER
 #define  SER   .ser = SER_REQUIRED
 #define  NOSER .ser = NO_SER
+#define  EXCP  .excp = EXCP_CONTEXT
+#define  NOEXCP.excp = NO_EXCP
 #define  BITCLR(x) .mask = x, .result = 0
 #define  BITSET(x) .mask = x, .result = x
 #define  MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, MCIP not set in MCA handler,
-   MCGMASK(MCG_STATUS_MCIP, 0)
+   EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover - PANIC */
MCESEV(
PANIC, Neither restart nor error IP,
-   MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+   EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, In kernel and no restart IP,
-   KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   ),
+   MCESEV(
+   DEFERRED, Deferred error,
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, Corrected error,
@@ -89,7 +97,7 @@ static struct severity {
 
/* ignore OVER for UCNA */
MCESEV(
-   KEEP, Uncorrected no action required,
+   UCNA, Uncorrected no action required,
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
@@ -178,8 +186,9 @@ static int error_context(struct mce *m)
return ((m-cs  3) == 3) ? IN_USER : IN_KERNEL;
 }
 
-int mce_severity(struct mce *m

[PATCH v3 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-07 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a uncorrected recoverable
machine check error that is not signaled via a machine check exception
and, instead, is reported to system software as a corrected machine
check error. UCNA errors indicate that some data in the system is
corrupted, but the data has not been consumed and the processor state
is valid and you may continue execution on this processor. UCNA errors
require no action from system software to continue execution. Note that
UCNA errors are supported by the processor only when IA32_MCG_CAP[24]
(MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- AMD64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce.c |   50 ++
 1 file changed, 50 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..4b6e4cdf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,41 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool memory_error(struct mce *m)
+{
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   if (c-x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   return (m-status  0xff00) == BIT(8);
+   } else if (c-x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the filter bit.
+*/
+   return (m-status  0xef80) == BIT(7) ||
+  (m-status  0xef00) == BIT(8) ||
+  (m-status  0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -595,6 +630,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
struct mce m;
+   int severity;
int i;
 
this_cpu_inc(mce_poll_count);
@@ -630,6 +666,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags  MCP_TIMESTAMP))
m.tsc = 0;
+
+   severity = mce_severity(m, mca_cfg.tolerant, NULL, false);
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY  memory_error(m)) {
+   if (m.status  MCI_STATUS_ADDRV) {
+   mce_ring_add(m.addr  PAGE_SHIFT);
+   mce_schedule_work();
+   }
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org

Re: [PATCH 1/2 v2] x86, mce, severity: extend the the mce_severity

2014-11-05 Thread Chen Yucong

On Wed, 2014-11-05 at 10:27 -0800, Tony Luck wrote:
> > +#define  ONEXCP.excp = NO_EXCP
> 
I'm sorry, this is a typing error. Thanks!

> Shouldn't this be named "NOEXCP" and used in the initializations
> for the deferred and UCNA table entries?
> 
In fact, "NOEXCP" can be used in the initialization for the deferred
and UCNA table entries. But it may affect the following snippet in
do_machine_check().

 /*
  * When machine check was for corrected/deferred handler don't
  * touch, unless we're panicing.
  */
 if ((severity == MCE_KEEP_SEVERITY ||
  severity == MCE_UCNA_SEVERITY) && !no_way_out)
  continue;

If `no_way_out' equals 1, we may need to dump/decode corrected/deferred 
error information. So if we use "NOEXCP" to initialize the deferred and
UCNA table entries, do_machine_check will skip checking deferred/UCNA
entry when `no_way_out' is set to 1.

thx!
cyc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 1/2 v2] x86, mce, severity: extend the the mce_severity

2014-11-05 Thread Chen Yucong

On Wed, 2014-11-05 at 10:27 -0800, Tony Luck wrote:
  +#define  ONEXCP.excp = NO_EXCP
 
I'm sorry, this is a typing error. Thanks!

 Shouldn't this be named NOEXCP and used in the initializations
 for the deferred and UCNA table entries?
 
In fact, NOEXCP can be used in the initialization for the deferred
and UCNA table entries. But it may affect the following snippet in
do_machine_check().

 /*
  * When machine check was for corrected/deferred handler don't
  * touch, unless we're panicing.
  */
 if ((severity == MCE_KEEP_SEVERITY ||
  severity == MCE_UCNA_SEVERITY)  !no_way_out)
  continue;

If `no_way_out' equals 1, we may need to dump/decode corrected/deferred 
error information. So if we use NOEXCP to initialize the deferred and
UCNA table entries, do_machine_check will skip checking deferred/UCNA
entry when `no_way_out' is set to 1.

thx!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2 v2] x86, mce, severity: extend the the mce_severity

2014-11-04 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |4 +++-
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   21 -
 arch/x86/kernel/cpu/mcheck/mce.c  |   14 --
 4 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..40b35a5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
charattrname[ATTR_LEN]; /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..d31618d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+   unsigned char excp;
unsigned char covered;
char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER  .context = IN_USER
 #define  SER   .ser = SER_REQUIRED
 #define  NOSER .ser = NO_SER
+#define  EXCP  .excp = EXCP_CONTEXT
+#define  ONEXCP.excp = NO_EXCP
 #define  BITCLR(x) .mask = x, .result = 0
 #define  BITSET(x) .mask = x, .result = x
 #define  MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, "MCIP not set in MCA handler",
-   MCGMASK(MCG_STATUS_MCIP, 0)
+   EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover -> PANIC */
MCESEV(
PANIC, "Neither restart nor error IP",
-   MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+   EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, "In kernel and no restart IP",
-   KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   ),
+   MCESEV(
+   DEFERRED, "Deferred error",
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, "Corrected error",
@@ -89,7 +97,7 @@ static struct severity {
 
/* ignore OVER for UCNA */
MCESEV(
-   KEEP, "Uncorrected no action required",
+   UCNA, "Uncorrected no action required",
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
@@ -178,8 +186,9 @@ static int error_context(struct mce *m)
return ((m->cs & 3) == 3) ? IN_US

[PATCH 2/2 v2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-04 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a UCR error that is not
signaled via a machine check exception and, instead, is reported to
system software as a corrected machine check error. UCNA errors indicate
that some data in the system is corrupted, but the data has not been
consumed and the processor state is valid and you may continue execution
on this processor. UCNA errors require no action from system software
to continue execution. Note that UCNA errors are supported by the
processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- ADM64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce.c |   50 ++
 1 file changed, 50 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..37f7649 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,46 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool mem_deferred_error(struct mce *m)
+{
+   int severity;
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   severity = mce_severity(m, mca_cfg.tolerant, NULL, false);
+
+   if (c->x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY)
+   return  (m->status & 0xff00) == BIT(8);
+   } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the "filter" bit.
+*/
+   if (severity == MCE_UCNA_SEVERITY)
+   return (m->status & 0xef80) == BIT(7) ||
+  (m->status & 0xef00) == BIT(8) ||
+  (m->status & 0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +670,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (mem_deferred_error() && (m.status & MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr >> PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2 v2] RAS: add the support for handling UCNA/DEFERRED error

2014-11-04 Thread Chen Yucong

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

The first patch have also eliminated a big hack to make mce_severity() work
when called from non-exception context on the advice of Tony and Boris.
  Link: https://lkml.org/lkml/2014/10/27/1017

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.
 
[PATCH 1/2] x86, mce, severity: extend the the mce_severity
[PATCH 2/2] x86, mce: support memory error recovery for both UCNA

thx!
cyc
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2 v2] RAS: add the support for handling UCNA/DEFERRED error

2014-11-04 Thread Chen Yucong

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

The first patch have also eliminated a big hack to make mce_severity() work
when called from non-exception context on the advice of Tony and Boris.
  Link: https://lkml.org/lkml/2014/10/27/1017

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.
 
[PATCH 1/2] x86, mce, severity: extend the the mce_severity
[PATCH 2/2] x86, mce: support memory error recovery for both UCNA

thx!
cyc
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2 v2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-04 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a UCR error that is not
signaled via a machine check exception and, instead, is reported to
system software as a corrected machine check error. UCNA errors indicate
that some data in the system is corrupted, but the data has not been
consumed and the processor state is valid and you may continue execution
on this processor. UCNA errors require no action from system software
to continue execution. Note that UCNA errors are supported by the
processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- ADM64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce.c |   50 ++
 1 file changed, 50 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..37f7649 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,46 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool mem_deferred_error(struct mce *m)
+{
+   int severity;
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   severity = mce_severity(m, mca_cfg.tolerant, NULL, false);
+
+   if (c-x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY)
+   return  (m-status  0xff00) == BIT(8);
+   } else if (c-x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the filter bit.
+*/
+   if (severity == MCE_UCNA_SEVERITY)
+   return (m-status  0xef80) == BIT(7) ||
+  (m-status  0xef00) == BIT(8) ||
+  (m-status  0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +670,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags  MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (mem_deferred_error(m)  (m.status  MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr  PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2 v2] x86, mce, severity: extend the the mce_severity

2014-11-04 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |4 +++-
 arch/x86/kernel/cpu/mcheck/mce-severity.c |   21 -
 arch/x86/kernel/cpu/mcheck/mce.c  |   14 --
 4 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..40b35a5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
charattrname[ATTR_LEN]; /* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..d31618d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
unsigned char mcgres;
unsigned char ser;
unsigned char context;
+   unsigned char excp;
unsigned char covered;
char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER  .context = IN_USER
 #define  SER   .ser = SER_REQUIRED
 #define  NOSER .ser = NO_SER
+#define  EXCP  .excp = EXCP_CONTEXT
+#define  ONEXCP.excp = NO_EXCP
 #define  BITCLR(x) .mask = x, .result = 0
 #define  BITSET(x) .mask = x, .result = x
 #define  MCGMASK(x, y) .mcgmask = x, .mcgres = y
@@ -71,16 +75,20 @@ static struct severity {
/* When MCIP is not set something is very confused */
MCESEV(
PANIC, MCIP not set in MCA handler,
-   MCGMASK(MCG_STATUS_MCIP, 0)
+   EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
),
/* Neither return not error IP -- no chance to recover - PANIC */
MCESEV(
PANIC, Neither restart nor error IP,
-   MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+   EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
),
MCESEV(
PANIC, In kernel and no restart IP,
-   KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+   ),
+   MCESEV(
+   DEFERRED, Deferred error,
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
),
MCESEV(
KEEP, Corrected error,
@@ -89,7 +97,7 @@ static struct severity {
 
/* ignore OVER for UCNA */
MCESEV(
-   KEEP, Uncorrected no action required,
+   UCNA, Uncorrected no action required,
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
@@ -178,8 +186,9 @@ static int error_context(struct mce *m)
return ((m-cs  3) == 3) ? IN_USER : IN_KERNEL;
 }
 
-int mce_severity(struct mce *m, int tolerant, char **msg)
+int mce_severity(struct mce *m, int

Re: [PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-03 Thread Chen Yucong

On Wed, 2014-10-29 at 10:16 +0800, Chen Yucong wrote:
> On Mon, 2014-10-27 at 23:10 +, Luck, Tony wrote:
> > +   m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
> > +   severity = mce_severity(m, mca_cfg.tolerant, NULL);
> > 
> > This seems a big hack to make mce_severity() work when called from
> > CMCI context (when MCG_STATUS register is not set).  It would also
> > be confusing as the subsequent logged entries would show MCIP and RIPV
> > bits set in the mcg_status.
> > 
> > If someone can think of a less hacky way to do this, that would be good. 
> > Otherwise
> > the code needs a comment, and should reset m->mcg_status to avoid making 
> > logs
> > that have incorrect data.
> > 
> Hi all,
> 
> At the suggestion of Tony, this patch add a comment, and restore m->mcgstatus 
> to avoid
> making logs that have incorrect data.
> 

Hi Tony,

Do you have any more comments for the two patches?

thx!
cyc
> 
> From: Chen Yucong 
> 
> Signed-off-by: Chen Yucong 
> ---
>  arch/x86/kernel/cpu/mcheck/mce.c |   64 
> --
>  1 file changed, 62 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c 
> b/arch/x86/kernel/cpu/mcheck/mce.c
> index fdc422e..d285d26 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -575,6 +575,56 @@ static void mce_read_aux(struct mce *m, int i)
>   }
>  }
>  
> +static bool mem_deferred_error(struct mce *m)
> +{
> + int severity;
> + u8 mcgs = m->mcgstatus & 0xff;
> + struct cpuinfo_x86 *c = _cpu_data;
> +
> + /*
> +  * mce_severity is specific to machine check exception, and it will
> +  * check MCIP/EIPV/RIPV bits. In order to get pass the check, we need
> +  * to set MCIP and RIPV.
> +  */
> + m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
> + severity = mce_severity(m, mca_cfg.tolerant, NULL);
> +
> + /* restore the original value of m->mcgstatus */
> + m->mcgstatus = (m->mcgstatus & ~0xff) | mcgs;
> +
> + if (c->x86_vendor == X86_VENDOR_AMD) {
> + /*
> +  * AMD BKDGs - Machine Check Error Codes
> +  *
> +  * Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
> +  * a memory-specific error. Note that this field encodes info-
> +  * rmation about memory-hierarchy level involved in the error.
> +  */
> + if (severity == MCE_DEFERRED_SEVERITY)
> + return  (m->status & 0xff00) == BIT(8);
> + } else if (c->x86_vendor == X86_VENDOR_INTEL) {
> + /*
> +  * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
> +  *
> +  * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
> +  * indicating a memory error. Bit 8 is used for indicating a
> +  * cache hierarchy error. The combination of bit 2 and bit 3
> +  * is used for indicating a `generic' cache hierarchy error
> +  * But we can't just blindly check the above bits, because if
> +  * bit 11 is set, then it is a bus/interconnect error - and
> +  * either way the above bits just gives more detail on what
> +  * bus/interconnect error happened. Note that bit 12 can be
> +  * ignored, as it's the "filter" bit.
> +  */
> + if (severity == MCE_UCNA_SEVERITY)
> + return (m->status & 0xef80) == BIT(7) ||
> +(m->status & 0xef00) == BIT(8) ||
> +(m->status & 0xeffc) == 0xc;
> + }
> +
> + return false;
> +}
> +
>  DEFINE_PER_CPU(unsigned, mce_poll_count);
>  
>  /*
> @@ -630,6 +680,16 @@ void machine_check_poll(enum mcp_flags flags, 
> mce_banks_t *b)
>  
>   if (!(flags & MCP_TIMESTAMP))
>   m.tsc = 0;
> +
> + /*
> +  * In the cases where we don't have a valid address after all,
> +  * do not add it into the ring buffer.
> +  */
> + if (mem_deferred_error() && (m.status & MCI_STATUS_ADDRV)) {
> + mce_ring_add(m.addr >> PAGE_SHIFT);
> + mce_schedule_work();
> + }
> +
>   /*
>* Don't get the IP here because it's unlikely to
>* have anything to do with the actual error location.
> @@ -1098,8 +1158,8 @@ void do_machine_check(struct pt_regs *regs, long

Re: [PATCH 2/2] x86, mce, amd: extend mce-inject for support threshold interrupt event injection on AMD platform

2014-11-03 Thread Chen Yucong

On Mon, 2014-11-03 at 19:00 +0100, Borislav Petkov wrote:
> On Mon, Nov 03, 2014 at 11:51:47AM -0600, Aravind Gopalakrishnan wrote:
> > On 11/3/2014 11:05 AM, Aravind Gopalakrishnan wrote:
> > >
> > >There are three ways that have been used to report machine check event.
> > >And they are MCE, CMCI/Threshold Interrupt, and POLL. On the Intel
> > >platform, CMCI/Threshold Interrupt and POLL share the same event handler
> > >- machine_check_poll(). However, on the AMD platform, they have a
> > >separate event handler. amd_threshold_interrupt() is used for handling
> > >Threshold Interrupt event. And machine_check_poll() has been used for
> > >polling other events.
> > >
> > >This patch introduces a new flag MCJ_INTERRUPT that will be used to
> > >separate CMCI/Threshold Interrupt and POLL handler in mce-inject.
> > >
> > >Signed-off-by: Chen Yucong mailto:sla...@gmail.com>>
> > >---
> > > arch/x86/include/asm/mce.h  |5 +++--
> > > arch/x86/kernel/cpu/mcheck/mce-inject.c |   16 
> > > arch/x86/kernel/cpu/mcheck/threshold.c  |1 +
> > > 3 files changed, 20 insertions(+), 2 deletions(-)
> > >
> > 
> > 
> > We currently test decoding logic on AMD by performing mce injections using
> > edac/mce_amd_inj.c,
> > So instead of modifying mce-inject just for testing
> > amd_threshold_interrupt(),
> > Why not put it under mce_amd_inj? (It's AMD specific code anyway)
> 
> Right, I think this is supposed to be vendor-agnostic as it is calling
> mce_threshold_vector() directly.
> 
I'm not sure I understand your point. But mce_threshold_vector is shared
by AMD and Intel. 

> Btw, I wouldn't mind if someone would sit down and unify those injection
> methods and come up with a saner interface which can actually be used by
> humans, not those yucky files you feed mce-inject with...
> 
Anyway, I think it can be work well for testing EDAC/raddaemon/mcelog
decoding logic. So I suggest you try to use it, and you can add it to
your list of test tools.

thx!
cyc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] x86, mce, amd: extend mce-inject for support threshold interrupt event injection on AMD platform

2014-11-03 Thread Chen Yucong

On Mon, 2014-11-03 at 11:51 -0600, Aravind Gopalakrishnan wrote:
> On 11/3/2014 11:05 AM, Aravind Gopalakrishnan wrote:
> >
> > There are three ways that have been used to report machine check event.
> > And they are MCE, CMCI/Threshold Interrupt, and POLL. On the Intel
> > platform, CMCI/Threshold Interrupt and POLL share the same event handler
> > - machine_check_poll(). However, on the AMD platform, they have a
> > separate event handler. amd_threshold_interrupt() is used for handling
> > Threshold Interrupt event. And machine_check_poll() has been used for
> > polling other events.
> >
> > This patch introduces a new flag MCJ_INTERRUPT that will be used to
> > separate CMCI/Threshold Interrupt and POLL handler in mce-inject.
> >
> > Signed-off-by: Chen Yucong mailto:sla...@gmail.com>>
> > ---
> >  arch/x86/include/asm/mce.h  |5 +++--
> >  arch/x86/kernel/cpu/mcheck/mce-inject.c |   16 
> >  arch/x86/kernel/cpu/mcheck/threshold.c  |1 +
> >  3 files changed, 20 insertions(+), 2 deletions(-)
> >
> 
> 
> We currently test decoding logic on AMD by performing mce injections 
> using edac/mce_amd_inj.c,
> So instead of modifying mce-inject just for testing 
> amd_threshold_interrupt(),
> Why not put it under mce_amd_inj? (It's AMD specific code anyway)
> 
Until now, edac/mce_amd_inj.c is just used for testing EDAC decoding
logic on AMD. But there are some tools that can be also used to decode
machine check error information, such as `rasdaemon' and `mcelog'. If
we want to use mce_amd_inj.c for error injection, we may need to move
it.

In addition, EDAC decoding logic does not need to access machine check 
specific `MSRs', so edac/mce_amd_inj.c can work well for error
injection.

Finally, amd_threshold_interrupt is AMD specific code,
intel_threshold_interrupt is also Intel specific code.

thx!
cyc


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] x86, mce, amd: extend mce-inject for support threshold interrupt event injection on AMD platform

2014-11-03 Thread Chen Yucong

On Mon, 2014-11-03 at 11:51 -0600, Aravind Gopalakrishnan wrote:
 On 11/3/2014 11:05 AM, Aravind Gopalakrishnan wrote:
 
  There are three ways that have been used to report machine check event.
  And they are MCE, CMCI/Threshold Interrupt, and POLL. On the Intel
  platform, CMCI/Threshold Interrupt and POLL share the same event handler
  - machine_check_poll(). However, on the AMD platform, they have a
  separate event handler. amd_threshold_interrupt() is used for handling
  Threshold Interrupt event. And machine_check_poll() has been used for
  polling other events.
 
  This patch introduces a new flag MCJ_INTERRUPT that will be used to
  separate CMCI/Threshold Interrupt and POLL handler in mce-inject.
 
  Signed-off-by: Chen Yucong sla...@gmail.com mailto:sla...@gmail.com
  ---
   arch/x86/include/asm/mce.h  |5 +++--
   arch/x86/kernel/cpu/mcheck/mce-inject.c |   16 
   arch/x86/kernel/cpu/mcheck/threshold.c  |1 +
   3 files changed, 20 insertions(+), 2 deletions(-)
 
 
 
 We currently test decoding logic on AMD by performing mce injections 
 using edac/mce_amd_inj.c,
 So instead of modifying mce-inject just for testing 
 amd_threshold_interrupt(),
 Why not put it under mce_amd_inj? (It's AMD specific code anyway)
 
Until now, edac/mce_amd_inj.c is just used for testing EDAC decoding
logic on AMD. But there are some tools that can be also used to decode
machine check error information, such as `rasdaemon' and `mcelog'. If
we want to use mce_amd_inj.c for error injection, we may need to move
it.

In addition, EDAC decoding logic does not need to access machine check 
specific `MSRs', so edac/mce_amd_inj.c can work well for error
injection.

Finally, amd_threshold_interrupt is AMD specific code,
intel_threshold_interrupt is also Intel specific code.

thx!
cyc


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] x86, mce, amd: extend mce-inject for support threshold interrupt event injection on AMD platform

2014-11-03 Thread Chen Yucong

On Mon, 2014-11-03 at 19:00 +0100, Borislav Petkov wrote:
 On Mon, Nov 03, 2014 at 11:51:47AM -0600, Aravind Gopalakrishnan wrote:
  On 11/3/2014 11:05 AM, Aravind Gopalakrishnan wrote:
  
  There are three ways that have been used to report machine check event.
  And they are MCE, CMCI/Threshold Interrupt, and POLL. On the Intel
  platform, CMCI/Threshold Interrupt and POLL share the same event handler
  - machine_check_poll(). However, on the AMD platform, they have a
  separate event handler. amd_threshold_interrupt() is used for handling
  Threshold Interrupt event. And machine_check_poll() has been used for
  polling other events.
  
  This patch introduces a new flag MCJ_INTERRUPT that will be used to
  separate CMCI/Threshold Interrupt and POLL handler in mce-inject.
  
  Signed-off-by: Chen Yucong sla...@gmail.com mailto:sla...@gmail.com
  ---
   arch/x86/include/asm/mce.h  |5 +++--
   arch/x86/kernel/cpu/mcheck/mce-inject.c |   16 
   arch/x86/kernel/cpu/mcheck/threshold.c  |1 +
   3 files changed, 20 insertions(+), 2 deletions(-)
  
  
  
  We currently test decoding logic on AMD by performing mce injections using
  edac/mce_amd_inj.c,
  So instead of modifying mce-inject just for testing
  amd_threshold_interrupt(),
  Why not put it under mce_amd_inj? (It's AMD specific code anyway)
 
 Right, I think this is supposed to be vendor-agnostic as it is calling
 mce_threshold_vector() directly.
 
I'm not sure I understand your point. But mce_threshold_vector is shared
by AMD and Intel. 

 Btw, I wouldn't mind if someone would sit down and unify those injection
 methods and come up with a saner interface which can actually be used by
 humans, not those yucky files you feed mce-inject with...
 
Anyway, I think it can be work well for testing EDAC/raddaemon/mcelog
decoding logic. So I suggest you try to use it, and you can add it to
your list of test tools.

thx!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-11-03 Thread Chen Yucong

On Wed, 2014-10-29 at 10:16 +0800, Chen Yucong wrote:
 On Mon, 2014-10-27 at 23:10 +, Luck, Tony wrote:
  +   m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
  +   severity = mce_severity(m, mca_cfg.tolerant, NULL);
  
  This seems a big hack to make mce_severity() work when called from
  CMCI context (when MCG_STATUS register is not set).  It would also
  be confusing as the subsequent logged entries would show MCIP and RIPV
  bits set in the mcg_status.
  
  If someone can think of a less hacky way to do this, that would be good. 
  Otherwise
  the code needs a comment, and should reset m-mcg_status to avoid making 
  logs
  that have incorrect data.
  
 Hi all,
 
 At the suggestion of Tony, this patch add a comment, and restore m-mcgstatus 
 to avoid
 making logs that have incorrect data.
 

Hi Tony,

Do you have any more comments for the two patches?

thx!
cyc
 
 From: Chen Yucong sla...@gmail.com
 
 Signed-off-by: Chen Yucong sla...@gmail.com
 ---
  arch/x86/kernel/cpu/mcheck/mce.c |   64 
 --
  1 file changed, 62 insertions(+), 2 deletions(-)
 
 diff --git a/arch/x86/kernel/cpu/mcheck/mce.c 
 b/arch/x86/kernel/cpu/mcheck/mce.c
 index fdc422e..d285d26 100644
 --- a/arch/x86/kernel/cpu/mcheck/mce.c
 +++ b/arch/x86/kernel/cpu/mcheck/mce.c
 @@ -575,6 +575,56 @@ static void mce_read_aux(struct mce *m, int i)
   }
  }
  
 +static bool mem_deferred_error(struct mce *m)
 +{
 + int severity;
 + u8 mcgs = m-mcgstatus  0xff;
 + struct cpuinfo_x86 *c = boot_cpu_data;
 +
 + /*
 +  * mce_severity is specific to machine check exception, and it will
 +  * check MCIP/EIPV/RIPV bits. In order to get pass the check, we need
 +  * to set MCIP and RIPV.
 +  */
 + m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
 + severity = mce_severity(m, mca_cfg.tolerant, NULL);
 +
 + /* restore the original value of m-mcgstatus */
 + m-mcgstatus = (m-mcgstatus  ~0xff) | mcgs;
 +
 + if (c-x86_vendor == X86_VENDOR_AMD) {
 + /*
 +  * AMD BKDGs - Machine Check Error Codes
 +  *
 +  * Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
 +  * a memory-specific error. Note that this field encodes info-
 +  * rmation about memory-hierarchy level involved in the error.
 +  */
 + if (severity == MCE_DEFERRED_SEVERITY)
 + return  (m-status  0xff00) == BIT(8);
 + } else if (c-x86_vendor == X86_VENDOR_INTEL) {
 + /*
 +  * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
 +  *
 +  * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
 +  * indicating a memory error. Bit 8 is used for indicating a
 +  * cache hierarchy error. The combination of bit 2 and bit 3
 +  * is used for indicating a `generic' cache hierarchy error
 +  * But we can't just blindly check the above bits, because if
 +  * bit 11 is set, then it is a bus/interconnect error - and
 +  * either way the above bits just gives more detail on what
 +  * bus/interconnect error happened. Note that bit 12 can be
 +  * ignored, as it's the filter bit.
 +  */
 + if (severity == MCE_UCNA_SEVERITY)
 + return (m-status  0xef80) == BIT(7) ||
 +(m-status  0xef00) == BIT(8) ||
 +(m-status  0xeffc) == 0xc;
 + }
 +
 + return false;
 +}
 +
  DEFINE_PER_CPU(unsigned, mce_poll_count);
  
  /*
 @@ -630,6 +680,16 @@ void machine_check_poll(enum mcp_flags flags, 
 mce_banks_t *b)
  
   if (!(flags  MCP_TIMESTAMP))
   m.tsc = 0;
 +
 + /*
 +  * In the cases where we don't have a valid address after all,
 +  * do not add it into the ring buffer.
 +  */
 + if (mem_deferred_error(m)  (m.status  MCI_STATUS_ADDRV)) {
 + mce_ring_add(m.addr  PAGE_SHIFT);
 + mce_schedule_work();
 + }
 +
   /*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
 @@ -1098,8 +1158,8 @@ void do_machine_check(struct pt_regs *regs, long 
 error_code)
   severity = mce_severity(m, cfg-tolerant, NULL);
  
   /*
 -  * When machine check was for corrected handler don't touch,
 -  * unless we're panicing.
 +  * When machine check was for corrected/deferred handler don't
 +  * touch, unless we're panicing.
*/
   if ((severity == MCE_KEEP_SEVERITY ||
severity == MCE_UCNA_SEVERITY)  !no_way_out)


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord

[PATCH] separate CMCI/Threshold Interrupt and POLL in mce-inject

2014-10-30 Thread Chen Yucong

This patch introduces a new flag MCJ_INTERRUPT that will be used
to separate CMCI/Threshold Interrupt and POLL in mce-inject.

Signed-off-by: Chen Yucong 
---
 mce.h   |5 +++--
 mce.lex |1 +
 mce.y   |6 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/mce.h b/mce.h
index c0668ad..d0bd39a 100644
--- a/mce.h
+++ b/mce.h
@@ -38,8 +38,9 @@
 #define MCJ_CTX_PROCESS1/* inject context: process */
 #define MCJ_CTX_IRQ2/* inject context: IRQ */
 #define MCJ_NMI_BROADCAST  4/* do NMI broadcasting */
-#define MCJ_EXCEPTION  8/* raise as exception */
-#define MCJ_IRQ_BRAODCAST  0x10 /* do IRQ broadcasting */
+#define MCJ_IRQ_BRAODCAST  8/* do IRQ broadcasting */
+#define MCJ_EXCEPTION  0x10 /* raise as exception */
+#define MCJ_INTERRUPT  0x20 /* raise as interrupt */
 
 #define MCJ_CTX_SET(flags, ctx)\
do {\
diff --git a/mce.lex b/mce.lex
index ce8a9ae..ce4ea69 100644
--- a/mce.lex
+++ b/mce.lex
@@ -83,6 +83,7 @@ static struct key {
KEY(IN_IRQ),
KEY(IN_PROC),
KEY(POLL),
+   KEY(INTERRUPT),
KEY(EXCP),
KEYVAL(CORRECTED, MCI_STATUS_VAL|MCI_STATUS_EN),// checkme
KEYVAL(UNCORRECTED, MCI_STATUS_VAL|MCI_STATUS_UC|MCI_STATUS_EN),
diff --git a/mce.y b/mce.y
index a9421ee..84095a1 100644
--- a/mce.y
+++ b/mce.y
@@ -43,7 +43,7 @@ static void init(void);
 %token STATUS RIP TSC ADDR MISC CPU BANK MCGSTATUS HOLD
 %token NOBROADCAST IRQBROADCAST NMIBROADCAST 
 %token IN_IRQ IN_PROC PROCESSOR TIME SOCKETID APICID MCGCAP
-%token POLL EXCP
+%token POLL INTERRUPT EXCP
 %token CORRECTED UNCORRECTED FATAL MCE
 %token NUMBER
 %token SYMBOL
@@ -94,7 +94,11 @@ mce_term:   STATUS status_list  { m.status = $2; }
  | IN_IRQ { MCJ_CTX_SET(m.inject_flags, MCJ_CTX_IRQ); }
  | IN_PROC{ MCJ_CTX_SET(m.inject_flags, MCJ_CTX_PROCESS); }
  | POLL   { mce_flags |= MCE_RAISE_MODE;
+m.inject_flags &= ~MCJ_INTERRUPT;
 m.inject_flags &= ~MCJ_EXCEPTION; }
+ | INTERRUPT  { mce_flags |= MCE_RAISE_MODE;
+m.inject_flags &= ~MCJ_EXCEPTION;
+m.inject_flags |= MCJ_INTERRUPT; }
  | EXCP   { mce_flags |= MCE_RAISE_MODE;
 m.inject_flags |= MCJ_EXCEPTION; }
  ;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] x86, mce, amd: extend mce-inject for support threshold interrupt event injection on AMD platform

2014-10-30 Thread Chen Yucong

There are three ways that have been used to report machine check event.
And they are MCE, CMCI/Threshold Interrupt, and POLL. On the Intel
platform, CMCI/Threshold Interrupt and POLL share the same event handler
- machine_check_poll(). However, on the AMD platform, they have a
separate event handler. amd_threshold_interrupt() is used for handling
Threshold Interrupt event. And machine_check_poll() has been used for
polling other events.

This patch introduces a new flag MCJ_INTERRUPT that will be used to
separate CMCI/Threshold Interrupt and POLL handler in mce-inject.

Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h  |5 +++--
 arch/x86/kernel/cpu/mcheck/mce-inject.c |   16 
 arch/x86/kernel/cpu/mcheck/threshold.c  |1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3a430ad..cf25839 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -70,8 +70,9 @@
 #define MCJ_CTX_PROCESS0x1  /* inject context: process */
 #define MCJ_CTX_IRQ0x2  /* inject context: IRQ */
 #define MCJ_NMI_BROADCAST  0x4  /* do NMI broadcasting */
-#define MCJ_EXCEPTION  0x8  /* raise as exception */
-#define MCJ_IRQ_BROADCAST  0x10 /* do IRQ broadcasting */
+#define MCJ_IRQ_BROADCAST  0x8  /* do IRQ broadcasting */
+#define MCJ_EXCEPTION  0x10  /* raise as exception */
+#define MCJ_INTERRUPT  0x20  /* raise as interruption */
 
 #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c 
b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba43..8428746 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -59,6 +59,16 @@ static void raise_poll(struct mce *m)
m->finished = 0;
 }
 
+static void raise_interrupt(struct mce *m)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   mce_threshold_vector();
+   local_irq_restore(flags);
+   m->finished = 0;
+}
+
 static void raise_exception(struct mce *m, struct pt_regs *pregs)
 {
struct pt_regs regs;
@@ -89,6 +99,8 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs 
*regs)
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
raise_exception(m, regs);
+   else if (m->inject_flags & MCJ_INTERRUPT)
+   raise_interrupt(m);
else if (m->status)
raise_poll(m);
return NMI_HANDLED;
@@ -132,6 +144,10 @@ static int raise_local(void)
ret = -EINVAL;
}
printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
+   } else if (m->inject_flags & MCJ_INTERRUPT) {
+   printk(KERN_INFO "Raising threshold interrupt on CPU %d\n", 
cpu);
+   raise_interrupt(m);
+   printk(KERN_INFO "Threshold interrupt done on CPU %d\n", cpu);
} else if (m->status) {
printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
raise_poll(m);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c 
b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..e324bf9 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,6 +17,7 @@ static void default_threshold_interrupt(void)
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
+EXPORT_SYMBOL_GPL(mce_threshold_vector);
 
 static inline void __smp_threshold_interrupt(void)
 {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] mce-inject: extend mce-inject for support threshold interrupt event injection on ADM platform

2014-10-30 Thread Chen Yucong



The work based on Boris's ras-for-3.19 branch.
https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git -b ras-for-3.19

Until now, `mce-inject' is unable to inject threshold interrupt event 
on AMD platform. That's because both Threshold Interrupt and POLL have a
separate event handler. amd_threshold_interrupt() is used for handling
Threshold Interrupt event. And machine_check_poll() has been used for
polling other events, such as `deferred' error. The main items of this
work include:

  * apply MCE MSR wrappers to AMD-specific threshold interrupt handler 
for supporting mce-inject
  * introduces a new flag MCJ_INTERRUPT that is used to separate 
CMCI/Threshold Interrupt and POLL in mce-inject.

Note that Linux machine check injector tool - mce-inject should be also
updated for accommodating the above changes in kernel-space.
  * [PATCH] separate CMCI/Threshold Interrupt and POLL in mce-inject
  * https://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] x86, mce: apply MCE MSR wrappers to AMD platform for testing threshold interrupt handler

2014-10-30 Thread Chen Yucong

Until now, the `mce-inject' mechanism does not support error injection
for threshold interrupt event in AMD platform.

This patch aims to apply MCE MSR wrappers to AMD-specific threshold
interrupt handler for supporting mce-inject.

Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h   |4 
 arch/x86/kernel/cpu/mcheck/mce.c |   25 +++--
 arch/x86/kernel/cpu/mcheck/mce_amd.c |6 +++---
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..3a430ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -185,6 +185,10 @@ enum mcp_flags {
 };
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
+u64 mce_rdmsrl(u32 msr);
+void mce_wrmsrl(u32 msr, u64 v);
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high);
+
 int mce_notify_irq(void);
 void mce_notify_process(void);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..b8fe5ae 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -391,7 +391,7 @@ static int msr_to_offset(u32 msr)
 }
 
 /* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+u64 mce_rdmsrl(u32 msr)
 {
u64 v;
 
@@ -416,7 +416,7 @@ static u64 mce_rdmsrl(u32 msr)
return v;
 }
 
-static void mce_wrmsrl(u32 msr, u64 v)
+void mce_wrmsrl(u32 msr, u64 v)
 {
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
@@ -428,6 +428,27 @@ static void mce_wrmsrl(u32 msr, u64 v)
wrmsrl(msr, v);
 }
 
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high) 
+{
+   int err = -1;
+   u64 val;
+
+   if (__this_cpu_read(injectm.finished)) {
+   int offset = msr_to_offset(msr);
+
+   if (offset < 0)
+   val = 0;
+   val = *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
+   err = 0;
+   } else
+   err = rdmsrl_safe(msr, );
+
+   (*low) = (u32)val;
+   (*high) = (u32)(val >> 32);
+
+   return err;
+}
+
 /*
  * Collect all global (w.r.t. this processor) status about this machine
  * check into our "mce" struct so that we can use it later to assess
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 6606523..926e8a3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -292,7 +292,7 @@ static void amd_threshold_interrupt(void)
++address;
}
 
-   if (rdmsr_safe(address, , ))
+   if (mce_rdmsr_safe(address, , ))
break;
 
if (!(high & MASK_VALID_HI)) {
@@ -318,12 +318,12 @@ static void amd_threshold_interrupt(void)
 
 log:
mce_setup();
-   rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+   m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(bank));
m.misc = ((u64)high << 32) | low;
m.bank = bank;
mce_log();
 
-   wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+   mce_wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
 }
 
 /*
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] mce-inject: extend mce-inject for support threshold interrupt event injection on ADM platform

2014-10-30 Thread Chen Yucong



The work based on Boris's ras-for-3.19 branch.
https://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git -b ras-for-3.19

Until now, `mce-inject' is unable to inject threshold interrupt event 
on AMD platform. That's because both Threshold Interrupt and POLL have a
separate event handler. amd_threshold_interrupt() is used for handling
Threshold Interrupt event. And machine_check_poll() has been used for
polling other events, such as `deferred' error. The main items of this
work include:

  * apply MCE MSR wrappers to AMD-specific threshold interrupt handler 
for supporting mce-inject
  * introduces a new flag MCJ_INTERRUPT that is used to separate 
CMCI/Threshold Interrupt and POLL in mce-inject.

Note that Linux machine check injector tool - mce-inject should be also
updated for accommodating the above changes in kernel-space.
  * [PATCH] separate CMCI/Threshold Interrupt and POLL in mce-inject
  * https://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] x86, mce: apply MCE MSR wrappers to AMD platform for testing threshold interrupt handler

2014-10-30 Thread Chen Yucong

Until now, the `mce-inject' mechanism does not support error injection
for threshold interrupt event in AMD platform.

This patch aims to apply MCE MSR wrappers to AMD-specific threshold
interrupt handler for supporting mce-inject.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h   |4 
 arch/x86/kernel/cpu/mcheck/mce.c |   25 +++--
 arch/x86/kernel/cpu/mcheck/mce_amd.c |6 +++---
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..3a430ad 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -185,6 +185,10 @@ enum mcp_flags {
 };
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
+u64 mce_rdmsrl(u32 msr);
+void mce_wrmsrl(u32 msr, u64 v);
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high);
+
 int mce_notify_irq(void);
 void mce_notify_process(void);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..b8fe5ae 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -391,7 +391,7 @@ static int msr_to_offset(u32 msr)
 }
 
 /* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+u64 mce_rdmsrl(u32 msr)
 {
u64 v;
 
@@ -416,7 +416,7 @@ static u64 mce_rdmsrl(u32 msr)
return v;
 }
 
-static void mce_wrmsrl(u32 msr, u64 v)
+void mce_wrmsrl(u32 msr, u64 v)
 {
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
@@ -428,6 +428,27 @@ static void mce_wrmsrl(u32 msr, u64 v)
wrmsrl(msr, v);
 }
 
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high) 
+{
+   int err = -1;
+   u64 val;
+
+   if (__this_cpu_read(injectm.finished)) {
+   int offset = msr_to_offset(msr);
+
+   if (offset  0)
+   val = 0;
+   val = *(u64 *)((char *)__get_cpu_var(injectm) + offset);
+   err = 0;
+   } else
+   err = rdmsrl_safe(msr, val);
+
+   (*low) = (u32)val;
+   (*high) = (u32)(val  32);
+
+   return err;
+}
+
 /*
  * Collect all global (w.r.t. this processor) status about this machine
  * check into our mce struct so that we can use it later to assess
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 6606523..926e8a3 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -292,7 +292,7 @@ static void amd_threshold_interrupt(void)
++address;
}
 
-   if (rdmsr_safe(address, low, high))
+   if (mce_rdmsr_safe(address, low, high))
break;
 
if (!(high  MASK_VALID_HI)) {
@@ -318,12 +318,12 @@ static void amd_threshold_interrupt(void)
 
 log:
mce_setup(m);
-   rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+   m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(bank));
m.misc = ((u64)high  32) | low;
m.bank = bank;
mce_log(m);
 
-   wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+   mce_wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
 }
 
 /*
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] x86, mce, amd: extend mce-inject for support threshold interrupt event injection on AMD platform

2014-10-30 Thread Chen Yucong

There are three ways that have been used to report machine check event.
And they are MCE, CMCI/Threshold Interrupt, and POLL. On the Intel
platform, CMCI/Threshold Interrupt and POLL share the same event handler
- machine_check_poll(). However, on the AMD platform, they have a
separate event handler. amd_threshold_interrupt() is used for handling
Threshold Interrupt event. And machine_check_poll() has been used for
polling other events.

This patch introduces a new flag MCJ_INTERRUPT that will be used to
separate CMCI/Threshold Interrupt and POLL handler in mce-inject.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h  |5 +++--
 arch/x86/kernel/cpu/mcheck/mce-inject.c |   16 
 arch/x86/kernel/cpu/mcheck/threshold.c  |1 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 3a430ad..cf25839 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -70,8 +70,9 @@
 #define MCJ_CTX_PROCESS0x1  /* inject context: process */
 #define MCJ_CTX_IRQ0x2  /* inject context: IRQ */
 #define MCJ_NMI_BROADCAST  0x4  /* do NMI broadcasting */
-#define MCJ_EXCEPTION  0x8  /* raise as exception */
-#define MCJ_IRQ_BROADCAST  0x10 /* do IRQ broadcasting */
+#define MCJ_IRQ_BROADCAST  0x8  /* do IRQ broadcasting */
+#define MCJ_EXCEPTION  0x10  /* raise as exception */
+#define MCJ_INTERRUPT  0x20  /* raise as interruption */
 
 #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c 
b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 4cfba43..8428746 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -59,6 +59,16 @@ static void raise_poll(struct mce *m)
m-finished = 0;
 }
 
+static void raise_interrupt(struct mce *m)
+{
+   unsigned long flags;
+
+   local_irq_save(flags);
+   mce_threshold_vector();
+   local_irq_restore(flags);
+   m-finished = 0;
+}
+
 static void raise_exception(struct mce *m, struct pt_regs *pregs)
 {
struct pt_regs regs;
@@ -89,6 +99,8 @@ static int mce_raise_notify(unsigned int cmd, struct pt_regs 
*regs)
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m-inject_flags  MCJ_EXCEPTION)
raise_exception(m, regs);
+   else if (m-inject_flags  MCJ_INTERRUPT)
+   raise_interrupt(m);
else if (m-status)
raise_poll(m);
return NMI_HANDLED;
@@ -132,6 +144,10 @@ static int raise_local(void)
ret = -EINVAL;
}
printk(KERN_INFO MCE exception done on CPU %d\n, cpu);
+   } else if (m-inject_flags  MCJ_INTERRUPT) {
+   printk(KERN_INFO Raising threshold interrupt on CPU %d\n, 
cpu);
+   raise_interrupt(m);
+   printk(KERN_INFO Threshold interrupt done on CPU %d\n, cpu);
} else if (m-status) {
printk(KERN_INFO Starting machine check poll CPU %d\n, cpu);
raise_poll(m);
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c 
b/arch/x86/kernel/cpu/mcheck/threshold.c
index 7245980..e324bf9 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -17,6 +17,7 @@ static void default_threshold_interrupt(void)
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
+EXPORT_SYMBOL_GPL(mce_threshold_vector);
 
 static inline void __smp_threshold_interrupt(void)
 {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] separate CMCI/Threshold Interrupt and POLL in mce-inject

2014-10-30 Thread Chen Yucong

This patch introduces a new flag MCJ_INTERRUPT that will be used
to separate CMCI/Threshold Interrupt and POLL in mce-inject.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 mce.h   |5 +++--
 mce.lex |1 +
 mce.y   |6 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/mce.h b/mce.h
index c0668ad..d0bd39a 100644
--- a/mce.h
+++ b/mce.h
@@ -38,8 +38,9 @@
 #define MCJ_CTX_PROCESS1/* inject context: process */
 #define MCJ_CTX_IRQ2/* inject context: IRQ */
 #define MCJ_NMI_BROADCAST  4/* do NMI broadcasting */
-#define MCJ_EXCEPTION  8/* raise as exception */
-#define MCJ_IRQ_BRAODCAST  0x10 /* do IRQ broadcasting */
+#define MCJ_IRQ_BRAODCAST  8/* do IRQ broadcasting */
+#define MCJ_EXCEPTION  0x10 /* raise as exception */
+#define MCJ_INTERRUPT  0x20 /* raise as interrupt */
 
 #define MCJ_CTX_SET(flags, ctx)\
do {\
diff --git a/mce.lex b/mce.lex
index ce8a9ae..ce4ea69 100644
--- a/mce.lex
+++ b/mce.lex
@@ -83,6 +83,7 @@ static struct key {
KEY(IN_IRQ),
KEY(IN_PROC),
KEY(POLL),
+   KEY(INTERRUPT),
KEY(EXCP),
KEYVAL(CORRECTED, MCI_STATUS_VAL|MCI_STATUS_EN),// checkme
KEYVAL(UNCORRECTED, MCI_STATUS_VAL|MCI_STATUS_UC|MCI_STATUS_EN),
diff --git a/mce.y b/mce.y
index a9421ee..84095a1 100644
--- a/mce.y
+++ b/mce.y
@@ -43,7 +43,7 @@ static void init(void);
 %token STATUS RIP TSC ADDR MISC CPU BANK MCGSTATUS HOLD
 %token NOBROADCAST IRQBROADCAST NMIBROADCAST 
 %token IN_IRQ IN_PROC PROCESSOR TIME SOCKETID APICID MCGCAP
-%token POLL EXCP
+%token POLL INTERRUPT EXCP
 %token CORRECTED UNCORRECTED FATAL MCE
 %token NUMBER
 %token SYMBOL
@@ -94,7 +94,11 @@ mce_term:   STATUS status_list  { m.status = $2; }
  | IN_IRQ { MCJ_CTX_SET(m.inject_flags, MCJ_CTX_IRQ); }
  | IN_PROC{ MCJ_CTX_SET(m.inject_flags, MCJ_CTX_PROCESS); }
  | POLL   { mce_flags |= MCE_RAISE_MODE;
+m.inject_flags = ~MCJ_INTERRUPT;
 m.inject_flags = ~MCJ_EXCEPTION; }
+ | INTERRUPT  { mce_flags |= MCE_RAISE_MODE;
+m.inject_flags = ~MCJ_EXCEPTION;
+m.inject_flags |= MCJ_INTERRUPT; }
  | EXCP   { mce_flags |= MCE_RAISE_MODE;
 m.inject_flags |= MCJ_EXCEPTION; }
  ;
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-28 Thread Chen Yucong

On Mon, 2014-10-27 at 23:10 +, Luck, Tony wrote:
> + m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
> + severity = mce_severity(m, mca_cfg.tolerant, NULL);
> 
> This seems a big hack to make mce_severity() work when called from
> CMCI context (when MCG_STATUS register is not set).  It would also
> be confusing as the subsequent logged entries would show MCIP and RIPV
> bits set in the mcg_status.
> 
> If someone can think of a less hacky way to do this, that would be good. 
> Otherwise
> the code needs a comment, and should reset m->mcg_status to avoid making logs
> that have incorrect data.
> 
Hi all,

At the suggestion of Tony, this patch add a comment, and restore m->mcgstatus 
to avoid
making logs that have incorrect data.

thx!
cyc

From: Chen Yucong 

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce.c |   64 --
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fdc422e..d285d26 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,56 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool mem_deferred_error(struct mce *m)
+{
+   int severity;
+   u8 mcgs = m->mcgstatus & 0xff;
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   /*
+* mce_severity is specific to machine check exception, and it will
+* check MCIP/EIPV/RIPV bits. In order to get pass the check, we need
+* to set MCIP and RIPV.
+*/
+   m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
+   severity = mce_severity(m, mca_cfg.tolerant, NULL);
+
+   /* restore the original value of m->mcgstatus */
+   m->mcgstatus = (m->mcgstatus & ~0xff) | mcgs;
+
+   if (c->x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY)
+   return  (m->status & 0xff00) == BIT(8);
+   } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the "filter" bit.
+*/
+   if (severity == MCE_UCNA_SEVERITY)
+   return (m->status & 0xef80) == BIT(7) ||
+  (m->status & 0xef00) == BIT(8) ||
+  (m->status & 0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +680,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (mem_deferred_error() && (m.status & MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr >> PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
@@ -1098,8 +1158,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
severity = mce_severity(, cfg->tolerant, NULL);
 
/*
-* When machine check was for corrected handler don't touch,
-* unless we're panicing.
+* When machine check was for corrected/deferred handler don't
+* touch, unless we're panicing.
 */
if ((severity == MCE_KEEP_SEVERITY ||
 severity == MCE_UCNA_SEVERITY) && !no_way_out)
-- 
1.7.10.4



--
To unsubscribe from this list: send the line "unsub

Re: [PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-28 Thread Chen Yucong

On Mon, 2014-10-27 at 23:10 +, Luck, Tony wrote:
 + m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
 + severity = mce_severity(m, mca_cfg.tolerant, NULL);
 
 This seems a big hack to make mce_severity() work when called from
 CMCI context (when MCG_STATUS register is not set).  It would also
 be confusing as the subsequent logged entries would show MCIP and RIPV
 bits set in the mcg_status.
 
 If someone can think of a less hacky way to do this, that would be good. 
 Otherwise
 the code needs a comment, and should reset m-mcg_status to avoid making logs
 that have incorrect data.
 
Hi all,

At the suggestion of Tony, this patch add a comment, and restore m-mcgstatus 
to avoid
making logs that have incorrect data.

thx!
cyc

From: Chen Yucong sla...@gmail.com

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce.c |   64 --
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fdc422e..d285d26 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,56 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool mem_deferred_error(struct mce *m)
+{
+   int severity;
+   u8 mcgs = m-mcgstatus  0xff;
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   /*
+* mce_severity is specific to machine check exception, and it will
+* check MCIP/EIPV/RIPV bits. In order to get pass the check, we need
+* to set MCIP and RIPV.
+*/
+   m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
+   severity = mce_severity(m, mca_cfg.tolerant, NULL);
+
+   /* restore the original value of m-mcgstatus */
+   m-mcgstatus = (m-mcgstatus  ~0xff) | mcgs;
+
+   if (c-x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY)
+   return  (m-status  0xff00) == BIT(8);
+   } else if (c-x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the filter bit.
+*/
+   if (severity == MCE_UCNA_SEVERITY)
+   return (m-status  0xef80) == BIT(7) ||
+  (m-status  0xef00) == BIT(8) ||
+  (m-status  0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +680,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags  MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (mem_deferred_error(m)  (m.status  MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr  PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
@@ -1098,8 +1158,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
severity = mce_severity(m, cfg-tolerant, NULL);
 
/*
-* When machine check was for corrected handler don't touch,
-* unless we're panicing.
+* When machine check was for corrected/deferred handler don't
+* touch, unless we're panicing.
 */
if ((severity == MCE_KEEP_SEVERITY ||
 severity == MCE_UCNA_SEVERITY)  !no_way_out)
-- 
1.7.10.4



--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ

Re: [PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-27 Thread Chen Yucong

On Mon, 2014-10-27 at 23:10 +, Luck, Tony wrote:
> + m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
> + severity = mce_severity(m, mca_cfg.tolerant, NULL);
> 
> This seems a big hack to make mce_severity() work when called from
> CMCI context (when MCG_STATUS register is not set).  It would also
> be confusing as the subsequent logged entries would show MCIP and RIPV
> bits set in the mcg_status.
> 
In fact, I have already noticed this issue from the start. But the 
Intel SDM document that MCIP/RIPV/EIPV are specific to machine check
exception. And I don't know if the above flag bits will be checked in
CMCI context by error log/decode handlers. 

> If someone can think of a less hacky way to do this, that would be good. 
> Otherwise
> the code needs a comment, and should reset m->mcg_status to avoid making logs
> that have incorrect data.
> 
Yes! the above code snippet should be commented. And another method 
that can be used for restoring m->mcgstatus is shown below.

+u8 mcgs = m->mcgstatus & 0xff;
+
+m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
+severity = mce_severity(m, mca_cfg.tolerant, NULL);
+m->mcgstatus = (m->mcgstatus & ~0xff) | mcgs;

thx!
cyc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/2] RAS: add the support for handling UCNA/DEFERRED error

2014-10-27 Thread Chen Yucong

On Mon, 2014-10-27 at 08:56 +0800, Chen Yucong wrote:

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.
 
[PATCH 1/2] x86, mce, severity: extend the the mce_severity
[PATCH 2/2] x86, mce: support memory error recovery for both UCNA

thx!
cyc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 0/2] RAS: add the support for handling UCNA/DEFERRED error

2014-10-27 Thread Chen Yucong

On Mon, 2014-10-27 at 08:56 +0800, Chen Yucong wrote:

Hi all,

At the suggestion of Boris, the first patch extends the mce_severity
mechanism for handling UCNA/DEFERRED error.
  Link: https://lkml.org/lkml/2014/10/23/190

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.
 
[PATCH 1/2] x86, mce, severity: extend the the mce_severity
[PATCH 2/2] x86, mce: support memory error recovery for both UCNA

thx!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-27 Thread Chen Yucong

On Mon, 2014-10-27 at 23:10 +, Luck, Tony wrote:
 + m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
 + severity = mce_severity(m, mca_cfg.tolerant, NULL);
 
 This seems a big hack to make mce_severity() work when called from
 CMCI context (when MCG_STATUS register is not set).  It would also
 be confusing as the subsequent logged entries would show MCIP and RIPV
 bits set in the mcg_status.
 
In fact, I have already noticed this issue from the start. But the 
Intel SDM document that MCIP/RIPV/EIPV are specific to machine check
exception. And I don't know if the above flag bits will be checked in
CMCI context by error log/decode handlers. 

 If someone can think of a less hacky way to do this, that would be good. 
 Otherwise
 the code needs a comment, and should reset m-mcg_status to avoid making logs
 that have incorrect data.
 
Yes! the above code snippet should be commented. And another method 
that can be used for restoring m-mcgstatus is shown below.

+u8 mcgs = m-mcgstatus  0xff;
+
+m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
+severity = mce_severity(m, mca_cfg.tolerant, NULL);
+m-mcgstatus = (m-mcgstatus  ~0xff) | mcgs;

thx!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-26 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a UCR error that is not
signaled via a machine check exception and, instead, is reported to
system software as a corrected machine check error. UCNA errors indicate
that some data in the system is corrupted, but the data has not been
consumed and the processor state is valid and you may continue execution
on this processor. UCNA errors require no action from system software
to continue execution. Note that UCNA errors are supported by the
processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- ADM64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce.c |   55 --
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fdc422e..7439077 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,47 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool mem_deferred_error(struct mce *m)
+{
+   int severity;
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   m->mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
+   severity = mce_severity(m, mca_cfg.tolerant, NULL);
+
+   if (c->x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY)
+   return  (m->status & 0xff00) == BIT(8);
+   } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the "filter" bit.
+*/
+   if (severity == MCE_UCNA_SEVERITY)
+   return (m->status & 0xef80) == BIT(7) ||
+  (m->status & 0xef00) == BIT(8) ||
+  (m->status & 0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +671,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (mem_deferred_error() && (m.status & MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr >> PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
@@ -1098,8 +1149,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
severity = mce_severity(, cfg->tolerant, NULL);
 
/*
-* When machine check was for corrected handler don't touch,
-* unless we're panicing.
+* When machine

[PATCH 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-10-26 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the
severity of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it
is not able to filter out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for
handling UCNA/DEFERRED error. In order to do this, the patch
introduces a new severity level - MCE_UCNA/DEFERRED_SEVERITY.

Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |2 ++
 arch/x86/kernel/cpu/mcheck/mce-severity.c |6 +-
 arch/x86/kernel/cpu/mcheck/mce.c  |3 ++-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..40b35a5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..d32fcbb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..c12e0a7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -83,13 +83,17 @@ static struct severity {
KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
+   DEFERRED, "Deferred error",
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
+   ),
+   MCESEV(
KEEP, "Corrected error",
NOSER, BITCLR(MCI_STATUS_UC)
),
 
/* ignore OVER for UCNA */
MCESEV(
-   KEEP, "Uncorrected no action required",
+   UCNA, "Uncorrected no action required",
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..fdc422e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1101,7 +1101,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
 * When machine check was for corrected handler don't touch,
 * unless we're panicing.
 */
-   if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+   if ((severity == MCE_KEEP_SEVERITY ||
+severity == MCE_UCNA_SEVERITY) && !no_way_out)
continue;
__set_bit(i, toclear);
if (severity == MCE_NO_SEVERITY) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] RAS: add the support for handling UCNA/DEFERRED error

2014-10-26 Thread Chen Yucong

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.

[PATCH 1/2] x86, mce, severity: extend the the mce_severity
[PATCH 2/2] x86, mce: support memory error recovery for both UCNA
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 0/2] RAS: add the support for handling UCNA/DEFERRED error

2014-10-26 Thread Chen Yucong

And on the basis of the first patch, the second patch adds the support
for identifying and handling UCNA/DEFERRED error in machine_check_poll.

[PATCH 1/2] x86, mce, severity: extend the the mce_severity
[PATCH 2/2] x86, mce: support memory error recovery for both UCNA
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error

2014-10-26 Thread Chen Yucong

Until now, the mce_severity mechanism can only identify the
severity of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it
is not able to filter out DEFERRED error for ADM platform.

This patch aims to extend the mce_severity mechanism for
handling UCNA/DEFERRED error. In order to do this, the patch
introduces a new severity level - MCE_UCNA/DEFERRED_SEVERITY.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h|4 
 arch/x86/kernel/cpu/mcheck/mce-internal.h |2 ++
 arch/x86/kernel/cpu/mcheck/mce-severity.c |6 +-
 arch/x86/kernel/cpu/mcheck/mce.c  |3 ++-
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..40b35a5 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED(1ULL44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON  (1ULL43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..d32fcbb 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
MCE_NO_SEVERITY,
+   MCE_DEFERRED_SEVERITY,
+   MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
MCE_KEEP_SEVERITY,
MCE_SOME_SEVERITY,
MCE_AO_SEVERITY,
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..c12e0a7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -83,13 +83,17 @@ static struct severity {
KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
),
MCESEV(
+   DEFERRED, Deferred error,
+   NOSER, 
MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
+   ),
+   MCESEV(
KEEP, Corrected error,
NOSER, BITCLR(MCI_STATUS_UC)
),
 
/* ignore OVER for UCNA */
MCESEV(
-   KEEP, Uncorrected no action required,
+   UCNA, Uncorrected no action required,
SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
),
MCESEV(
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..fdc422e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1101,7 +1101,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
 * When machine check was for corrected handler don't touch,
 * unless we're panicing.
 */
-   if (severity == MCE_KEEP_SEVERITY  !no_way_out)
+   if ((severity == MCE_KEEP_SEVERITY ||
+severity == MCE_UCNA_SEVERITY)  !no_way_out)
continue;
__set_bit(i, toclear);
if (severity == MCE_NO_SEVERITY) {
-- 
1.7.10.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-26 Thread Chen Yucong

Uncorrected no action required (UCNA) - is a UCR error that is not
signaled via a machine check exception and, instead, is reported to
system software as a corrected machine check error. UCNA errors indicate
that some data in the system is corrupted, but the data has not been
consumed and the processor state is valid and you may continue execution
on this processor. UCNA errors require no action from system software
to continue execution. Note that UCNA errors are supported by the
processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- ADM64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce.c |   55 --
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index fdc422e..7439077 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,47 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool mem_deferred_error(struct mce *m)
+{
+   int severity;
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   m-mcgstatus |= (MCG_STATUS_MCIP|MCG_STATUS_RIPV);
+   severity = mce_severity(m, mca_cfg.tolerant, NULL);
+
+   if (c-x86_vendor == X86_VENDOR_AMD) {
+   /*
+* AMD BKDGs - Machine Check Error Codes
+*
+* Bit 8 of ErrCode[15:0] of MCi_STATUS is used for indicating
+* a memory-specific error. Note that this field encodes info-
+* rmation about memory-hierarchy level involved in the error.
+*/
+   if (severity == MCE_DEFERRED_SEVERITY)
+   return  (m-status  0xff00) == BIT(8);
+   } else if (c-x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. Bit 8 is used for indicating a
+* cache hierarchy error. The combination of bit 2 and bit 3
+* is used for indicating a `generic' cache hierarchy error
+* But we can't just blindly check the above bits, because if
+* bit 11 is set, then it is a bus/interconnect error - and
+* either way the above bits just gives more detail on what
+* bus/interconnect error happened. Note that bit 12 can be
+* ignored, as it's the filter bit.
+*/
+   if (severity == MCE_UCNA_SEVERITY)
+   return (m-status  0xef80) == BIT(7) ||
+  (m-status  0xef00) == BIT(8) ||
+  (m-status  0xeffc) == 0xc;
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +671,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags  MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (mem_deferred_error(m)  (m.status  MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr  PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
@@ -1098,8 +1149,8 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
severity = mce_severity(m, cfg-tolerant, NULL);
 
/*
-* When machine check was for corrected handler don't touch,
-* unless we're panicing.
+* When machine check was for corrected/deferred handler don't
+* touch

Re: Fwd: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-10-22 Thread Chen Yucong

On Wed, 2014-10-22 at 10:16 +0200, Borislav Petkov wrote:
> On Wed, Oct 22, 2014 at 09:51:18AM +0800, Chen Yucong wrote:
> > Can you check the following link? The link contains my reply about
> > "x86, MCE, AMD: Move invariant code out from loop body". The reply was
> > sent to you on October 7, but until now, there aren't any comments
> > from you!
> 
> https://git.kernel.org/cgit/linux/kernel/git/bp/bp.git/commit/?h=ras-for-3.19=69b957583580bf40624553c64d802fefb54199cb

I have checked this link! I mean that there is another reply that you
may not have noticed.

thx!
cyc


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] x86, MCE: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-22 Thread Chen Yucong

On Fri, 2014-10-10 at 14:03 +0800, Chen Yucong wrote:
> From: Chen Yucong 
> 
> dram_ce_error() stems from Boris's patch set. Thanks!
> Link: http://lkml.org/lkml/2014/7/1/545
> 
> Uncorrected no action required (UCNA) - is a UCR error that is not
> signaled via a machine check exception and, instead, is reported to
> system software as a corrected machine check error. UCNA errors indicate
> that some data in the system is corrupted, but the data has not been
> consumed and the processor state is valid and you may continue execution
> on this processor. UCNA errors require no action from system software
> to continue execution. Note that UCNA errors are supported by the
> processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
>-- Intel SDM Volume 3B
> 
> Deferred errors are errors that cannot be corrected by hardware, but
> do not cause an immediate interruption in program flow, loss of data
> integrity, or corruption of processor state. These errors indicate
> that data has been corrupted but not consumed. Hardware writes information
> to the status and address registers in the corresponding bank that
> identifies the source of the error if deferred errors are enabled for
> logging. Deferred errors are not reported via machine check exceptions;
> they can be seen by polling the MCi_STATUS registers.
> -- ADM64 APM Volume 2
> 
> Above two items, both UCNA and Deferred errors belong to detected
> errors, but they can't be corrected by hardware, and this is very
> similar to Software Recoverable Action Optional (SRAO) errors.
> Therefore, we can take some actions that have been used for handling
> SRAO errors to handle UCNA and Deferred errors.
> 
> Signed-off-by: Chen Yucong 
> ---
>  arch/x86/include/asm/mce.h   |4 
>  arch/x86/kernel/cpu/mcheck/mce.c |   39 
> ++
>  2 files changed, 43 insertions(+)
> 
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 958b90f..c9ac7df4 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -34,6 +34,10 @@
>  #define MCI_STATUS_S  (1ULL<<56)  /* Signaled machine check */
>  #define MCI_STATUS_AR (1ULL<<55)  /* Action required */
>  
> +/* AMD-specific bits */
> +#define MCI_STATUS_DEFERRED (1ULL<<44)  /* declare an uncorrected error 
> */
> +#define MCI_STATUS_POISON   (1ULL<<43)  /* access poisonous data */
> +
>  /*
>   * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
>   * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c 
> b/arch/x86/kernel/cpu/mcheck/mce.c
> index 61a9668ce..4030c77 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -575,6 +575,35 @@ static void mce_read_aux(struct mce *m, int i)
>   }
>  }
>  
> +static bool dram_ce_error(struct mce *m)
> +{
> + struct cpuinfo_x86 *c = _cpu_data;
> +
> + if (c->x86_vendor == X86_VENDOR_AMD) {
> + /* ErrCodeExt[20:16] */
> + u8 xec = (m->status >> 16) & 0x1f;
> +
> + if (m->status & MCI_STATUS_DEFERRED)
> + return (xec == 0x0 || xec == 0x8);
> + } else if (c->x86_vendor == X86_VENDOR_INTEL) {
> + /*
> +  * SDM Volume 3B - 15.9.2 Compound Error Codes (Table 15-9)
> +  *
> +  * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
> +  * indicating a memory error. But we can't just blindly check
> +  * bit 7 because if bit 8 is set, then this is a cache error,
> +  * and if bit 11 is set, then it is a bus/ interconnect error
> +  * - and either way bit 7 just gives more detail on what
> +  * cache/bus/interconnect error happened. Note that we can
> +  * ignore bit 12, as it's the "filter" bit.
> +  */
> + if ((m->mcgcap & MCG_SER_P) && (m->status & MCI_STATUS_UC))
> + return (m->status & 0xef80) == BIT(7);
> + }
> +
> + return false;
> +}
> +
>  DEFINE_PER_CPU(unsigned, mce_poll_count);
>  
>  /*
> @@ -630,6 +659,16 @@ void machine_check_poll(enum mcp_flags flags, 
> mce_banks_t *b)
>  
>   if (!(flags & MCP_TIMESTAMP))
>   m.tsc = 0;
> +
> + /*
> +  * In the cases where we don't have a valid address after all,
> +  * do not add it into the ring buffer.
> +  */
> +

Re: [PATCH] x86, MCE: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-22 Thread Chen Yucong

On Fri, 2014-10-10 at 14:03 +0800, Chen Yucong wrote:
 From: Chen Yucong sla...@gmail.com
 
 dram_ce_error() stems from Boris's patch set. Thanks!
 Link: http://lkml.org/lkml/2014/7/1/545
 
 Uncorrected no action required (UCNA) - is a UCR error that is not
 signaled via a machine check exception and, instead, is reported to
 system software as a corrected machine check error. UCNA errors indicate
 that some data in the system is corrupted, but the data has not been
 consumed and the processor state is valid and you may continue execution
 on this processor. UCNA errors require no action from system software
 to continue execution. Note that UCNA errors are supported by the
 processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
-- Intel SDM Volume 3B
 
 Deferred errors are errors that cannot be corrected by hardware, but
 do not cause an immediate interruption in program flow, loss of data
 integrity, or corruption of processor state. These errors indicate
 that data has been corrupted but not consumed. Hardware writes information
 to the status and address registers in the corresponding bank that
 identifies the source of the error if deferred errors are enabled for
 logging. Deferred errors are not reported via machine check exceptions;
 they can be seen by polling the MCi_STATUS registers.
 -- ADM64 APM Volume 2
 
 Above two items, both UCNA and Deferred errors belong to detected
 errors, but they can't be corrected by hardware, and this is very
 similar to Software Recoverable Action Optional (SRAO) errors.
 Therefore, we can take some actions that have been used for handling
 SRAO errors to handle UCNA and Deferred errors.
 
 Signed-off-by: Chen Yucong sla...@gmail.com
 ---
  arch/x86/include/asm/mce.h   |4 
  arch/x86/kernel/cpu/mcheck/mce.c |   39 
 ++
  2 files changed, 43 insertions(+)
 
 diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
 index 958b90f..c9ac7df4 100644
 --- a/arch/x86/include/asm/mce.h
 +++ b/arch/x86/include/asm/mce.h
 @@ -34,6 +34,10 @@
  #define MCI_STATUS_S  (1ULL56)  /* Signaled machine check */
  #define MCI_STATUS_AR (1ULL55)  /* Action required */
  
 +/* AMD-specific bits */
 +#define MCI_STATUS_DEFERRED (1ULL44)  /* declare an uncorrected error 
 */
 +#define MCI_STATUS_POISON   (1ULL43)  /* access poisonous data */
 +
  /*
   * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
   * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
 diff --git a/arch/x86/kernel/cpu/mcheck/mce.c 
 b/arch/x86/kernel/cpu/mcheck/mce.c
 index 61a9668ce..4030c77 100644
 --- a/arch/x86/kernel/cpu/mcheck/mce.c
 +++ b/arch/x86/kernel/cpu/mcheck/mce.c
 @@ -575,6 +575,35 @@ static void mce_read_aux(struct mce *m, int i)
   }
  }
  
 +static bool dram_ce_error(struct mce *m)
 +{
 + struct cpuinfo_x86 *c = boot_cpu_data;
 +
 + if (c-x86_vendor == X86_VENDOR_AMD) {
 + /* ErrCodeExt[20:16] */
 + u8 xec = (m-status  16)  0x1f;
 +
 + if (m-status  MCI_STATUS_DEFERRED)
 + return (xec == 0x0 || xec == 0x8);
 + } else if (c-x86_vendor == X86_VENDOR_INTEL) {
 + /*
 +  * SDM Volume 3B - 15.9.2 Compound Error Codes (Table 15-9)
 +  *
 +  * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
 +  * indicating a memory error. But we can't just blindly check
 +  * bit 7 because if bit 8 is set, then this is a cache error,
 +  * and if bit 11 is set, then it is a bus/ interconnect error
 +  * - and either way bit 7 just gives more detail on what
 +  * cache/bus/interconnect error happened. Note that we can
 +  * ignore bit 12, as it's the filter bit.
 +  */
 + if ((m-mcgcap  MCG_SER_P)  (m-status  MCI_STATUS_UC))
 + return (m-status  0xef80) == BIT(7);
 + }
 +
 + return false;
 +}
 +
  DEFINE_PER_CPU(unsigned, mce_poll_count);
  
  /*
 @@ -630,6 +659,16 @@ void machine_check_poll(enum mcp_flags flags, 
 mce_banks_t *b)
  
   if (!(flags  MCP_TIMESTAMP))
   m.tsc = 0;
 +
 + /*
 +  * In the cases where we don't have a valid address after all,
 +  * do not add it into the ring buffer.
 +  */
 + if (dram_ce_error(m)  (m.status  MCI_STATUS_ADDRV)) {
 + mce_ring_add(m.addr  PAGE_SHIFT);
 + mce_schedule_work();
 + }
 +
   /*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.

Hi Boris,

Do you have any comments on this patch?

thx!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord

Re: Fwd: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-10-22 Thread Chen Yucong

On Wed, 2014-10-22 at 10:16 +0200, Borislav Petkov wrote:
 On Wed, Oct 22, 2014 at 09:51:18AM +0800, Chen Yucong wrote:
  Can you check the following link? The link contains my reply about
  x86, MCE, AMD: Move invariant code out from loop body. The reply was
  sent to you on October 7, but until now, there aren't any comments
  from you!
 
 https://git.kernel.org/cgit/linux/kernel/git/bp/bp.git/commit/?h=ras-for-3.19id=69b957583580bf40624553c64d802fefb54199cb

I have checked this link! I mean that there is another reply that you
may not have noticed.

thx!
cyc


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Fwd: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-10-21 Thread Chen Yucong

On Tue, 2014-10-21 at 22:28 +0200, Borislav Petkov wrote:
> On Thu, Oct 09, 2014 at 02:01:06PM -0500, Aravind Gopalakrishnan wrote:
> > I actually agree with this approach. So no argument:)
> 
> Ok, thanks, here's a patch.
> 
> Btw, I'm pushing the whole queue to a ras-for-3.19 branch at
> https://git.kernel.org/cgit/linux/kernel/git/bp/bp.git if you'd like to
> take a look and see whether we haven't forgotten anything before I send
> it to tip guys.
> 
Hi Boris,

Can you check the following link? The link contains my reply about 
"x86, MCE, AMD: Move invariant code out from loop body". The reply
was sent to you on October 7, but until now, there aren't any comments
from you!

https://lkml.org/lkml/2014/10/7/84

Thanks!
cyc

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: Fwd: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-10-21 Thread Chen Yucong

On Tue, 2014-10-21 at 22:28 +0200, Borislav Petkov wrote:
 On Thu, Oct 09, 2014 at 02:01:06PM -0500, Aravind Gopalakrishnan wrote:
  I actually agree with this approach. So no argument:)
 
 Ok, thanks, here's a patch.
 
 Btw, I'm pushing the whole queue to a ras-for-3.19 branch at
 https://git.kernel.org/cgit/linux/kernel/git/bp/bp.git if you'd like to
 take a look and see whether we haven't forgotten anything before I send
 it to tip guys.
 
Hi Boris,

Can you check the following link? The link contains my reply about 
x86, MCE, AMD: Move invariant code out from loop body. The reply
was sent to you on October 7, but until now, there aren't any comments
from you!

https://lkml.org/lkml/2014/10/7/84

Thanks!
cyc

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86, MCE: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-10 Thread Chen Yucong

From: Chen Yucong 

dram_ce_error() stems from Boris's patch set. Thanks!
Link: http://lkml.org/lkml/2014/7/1/545

Uncorrected no action required (UCNA) - is a UCR error that is not
signaled via a machine check exception and, instead, is reported to
system software as a corrected machine check error. UCNA errors indicate
that some data in the system is corrupted, but the data has not been
consumed and the processor state is valid and you may continue execution
on this processor. UCNA errors require no action from system software
to continue execution. Note that UCNA errors are supported by the
processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- ADM64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong 
---
 arch/x86/include/asm/mce.h   |4 
 arch/x86/kernel/cpu/mcheck/mce.c |   39 ++
 2 files changed, 43 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..c9ac7df4 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED (1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON   (1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..4030c77 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,35 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool dram_ce_error(struct mce *m)
+{
+   struct cpuinfo_x86 *c = _cpu_data;
+
+   if (c->x86_vendor == X86_VENDOR_AMD) {
+   /* ErrCodeExt[20:16] */
+   u8 xec = (m->status >> 16) & 0x1f;
+
+   if (m->status & MCI_STATUS_DEFERRED)
+   return (xec == 0x0 || xec == 0x8);
+   } else if (c->x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* SDM Volume 3B - 15.9.2 Compound Error Codes (Table 15-9)
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. But we can't just blindly check
+* bit 7 because if bit 8 is set, then this is a cache error,
+* and if bit 11 is set, then it is a bus/ interconnect error
+* - and either way bit 7 just gives more detail on what
+* cache/bus/interconnect error happened. Note that we can
+* ignore bit 12, as it's the "filter" bit.
+*/
+   if ((m->mcgcap & MCG_SER_P) && (m->status & MCI_STATUS_UC))
+   return (m->status & 0xef80) == BIT(7);
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +659,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (dram_ce_error() && (m.status & MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr >> PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86, MCE: support memory error recovery for both UCNA and Deferred error in machine_check_poll

2014-10-10 Thread Chen Yucong

From: Chen Yucong sla...@gmail.com

dram_ce_error() stems from Boris's patch set. Thanks!
Link: http://lkml.org/lkml/2014/7/1/545

Uncorrected no action required (UCNA) - is a UCR error that is not
signaled via a machine check exception and, instead, is reported to
system software as a corrected machine check error. UCNA errors indicate
that some data in the system is corrupted, but the data has not been
consumed and the processor state is valid and you may continue execution
on this processor. UCNA errors require no action from system software
to continue execution. Note that UCNA errors are supported by the
processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set.
   -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
-- ADM64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/include/asm/mce.h   |4 
 arch/x86/kernel/cpu/mcheck/mce.c |   39 ++
 2 files changed, 43 insertions(+)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..c9ac7df4 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S(1ULL56)  /* Signaled machine check */
 #define MCI_STATUS_AR   (1ULL55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED (1ULL44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON   (1ULL43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..4030c77 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,35 @@ static void mce_read_aux(struct mce *m, int i)
}
 }
 
+static bool dram_ce_error(struct mce *m)
+{
+   struct cpuinfo_x86 *c = boot_cpu_data;
+
+   if (c-x86_vendor == X86_VENDOR_AMD) {
+   /* ErrCodeExt[20:16] */
+   u8 xec = (m-status  16)  0x1f;
+
+   if (m-status  MCI_STATUS_DEFERRED)
+   return (xec == 0x0 || xec == 0x8);
+   } else if (c-x86_vendor == X86_VENDOR_INTEL) {
+   /*
+* SDM Volume 3B - 15.9.2 Compound Error Codes (Table 15-9)
+*
+* Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+* indicating a memory error. But we can't just blindly check
+* bit 7 because if bit 8 is set, then this is a cache error,
+* and if bit 11 is set, then it is a bus/ interconnect error
+* - and either way bit 7 just gives more detail on what
+* cache/bus/interconnect error happened. Note that we can
+* ignore bit 12, as it's the filter bit.
+*/
+   if ((m-mcgcap  MCG_SER_P)  (m-status  MCI_STATUS_UC))
+   return (m-status  0xef80) == BIT(7);
+   }
+
+   return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -630,6 +659,16 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
 
if (!(flags  MCP_TIMESTAMP))
m.tsc = 0;
+
+   /*
+* In the cases where we don't have a valid address after all,
+* do not add it into the ring buffer.
+*/
+   if (dram_ce_error(m)  (m.status  MCI_STATUS_ADDRV)) {
+   mce_ring_add(m.addr  PAGE_SHIFT);
+   mce_schedule_work();
+   }
+
/*
 * Don't get the IP here because it's unlikely to
 * have anything to do with the actual error location.
-- 
1.7.10.4




--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] x86, MCE, AMD: move invariant code out from loop body

2014-10-07 Thread Chen Yucong

On Mon, 2014-10-06 at 23:27 +0200, Borislav Petkov wrote:
> On Thu, Oct 02, 2014 at 11:20:12PM +0800, Chen Yucong wrote:
> > From: Chen Yucong 
> > Subject: [PATCH] x86, MCE, AMD: move invariant code out from loop body
> > 
> > "mce_threshold_vector = amd_threshold_interrupt;" is loop invariant code
> > in mce_amd_feature_init(). So it should be moved out from loop body.
> > 
> > Signed-off-by: Chen Yucong 
> > ---
> >  arch/x86/kernel/cpu/mcheck/mce_amd.c |3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
> > b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > index 5d4999f..f727701 100644
> > --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> > @@ -253,9 +253,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
> > }
> >  
> > mce_threshold_block_init(, offset);
> > -   mce_threshold_vector = amd_threshold_interrupt;
> > }
> > }
> > +
> > +   mce_threshold_vector = amd_threshold_interrupt;
> 
> Looking at this more, it is theoretically possible that we break out
> of the both loops without *any* thresholding registers detected and to
> still assign a thresholding interrupt vector which would be clearly
> wrong.
Yes! In this case, mce_threshold_vector should be `default_threshold_interrupt' 
rather than
amd_threshold_interrupt.
 
> Thus I think something like below should be much safer (I tried it with
> a label and goto already but it is uglier):
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
> b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> index 9ce6499d..9af7bd74828b 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
> @@ -253,7 +253,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
>   }
>  
>   mce_threshold_block_init(, offset);
> - mce_threshold_vector = amd_threshold_interrupt;
> +
> + if (mce_threshold_vector != amd_threshold_interrupt)
> + mce_threshold_vector = amd_threshold_interrupt;
Perhaps the above assignment operation should be put into 

if (b.interrupt_capable) {
... ...

if (mce_threshold_vector != amd_threshold_interrupt)
mce_threshold_vector = amd_threshold_interrupt;
}

If IntP (Thresholding Interrupt Supported) bit is zero, this indicates that the 
reporting
of threshold overflow via interrupt isn't supported. So there's no need to 
execute the
above assignment operation. 

>   }
>   }
>  }
> 
> Looking at the asm, we still go and fetch those addresses so not really
> a win:
> 
>   cmpq$amd_threshold_interrupt, mce_threshold_vector(%rip)#, 
> mce_threshold_vector
>   je  .L235   #,
>   incl%r13d   # block
>   movq$amd_threshold_interrupt, mce_threshold_vector(%rip)#, 
> mce_threshold_vector
>   cmpl    $9, %r13d   #, block
> 
> but this way the code is relatively clean. Unless you can come up with
> a nicer, cleaner version to handle the breaking out in the success and
> failure case...
Seems like I don't have any better idea than this.

thx!
cyc


From: Chen Yucong 
Subject: [PATCH] x86, MCE, AMD: avoid inappropriate assignment operation in
 mce_amd_feature_init

Before executing "mce_threshold_vector = amd_threshold_interrupt;", a few
conditions should be checked for avoiding inappropriate assignment operations,
for example, IntP (Thresholding Interrupt Supported) bit of MCx_MISCi.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5d4999f..31bf792 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -250,10 +250,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (b.interrupt_capable) {
int new = (high & MASK_LVTOFF_HI) >> 20;
offset  = setup_APIC_mce(offset, new);
+
+   if (offset == new &&
+mce_threshold_vector != 
amd_threshold_interrupt)
+   mce_threshold_vector = 
amd_threshold_interrupt;
}
 
mce_threshold_block_init(, offset);
-   mce_threshold_vector = amd_threshold_interrupt;
}
}
 }
-- 
1.7.10.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] x86, MCE, AMD: move invariant code out from loop body

2014-10-07 Thread Chen Yucong

On Mon, 2014-10-06 at 23:27 +0200, Borislav Petkov wrote:
 On Thu, Oct 02, 2014 at 11:20:12PM +0800, Chen Yucong wrote:
  From: Chen Yucong sla...@gmail.com
  Subject: [PATCH] x86, MCE, AMD: move invariant code out from loop body
  
  mce_threshold_vector = amd_threshold_interrupt; is loop invariant code
  in mce_amd_feature_init(). So it should be moved out from loop body.
  
  Signed-off-by: Chen Yucong sla...@gmail.com
  ---
   arch/x86/kernel/cpu/mcheck/mce_amd.c |3 ++-
   1 file changed, 2 insertions(+), 1 deletion(-)
  
  diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
  b/arch/x86/kernel/cpu/mcheck/mce_amd.c
  index 5d4999f..f727701 100644
  --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
  +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
  @@ -253,9 +253,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
  }
   
  mce_threshold_block_init(b, offset);
  -   mce_threshold_vector = amd_threshold_interrupt;
  }
  }
  +
  +   mce_threshold_vector = amd_threshold_interrupt;
 
 Looking at this more, it is theoretically possible that we break out
 of the both loops without *any* thresholding registers detected and to
 still assign a thresholding interrupt vector which would be clearly
 wrong.
Yes! In this case, mce_threshold_vector should be `default_threshold_interrupt' 
rather than
amd_threshold_interrupt.
 
 Thus I think something like below should be much safer (I tried it with
 a label and goto already but it is uglier):
 
 diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
 b/arch/x86/kernel/cpu/mcheck/mce_amd.c
 index 9ce6499d..9af7bd74828b 100644
 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
 +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
 @@ -253,7 +253,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
   }
  
   mce_threshold_block_init(b, offset);
 - mce_threshold_vector = amd_threshold_interrupt;
 +
 + if (mce_threshold_vector != amd_threshold_interrupt)
 + mce_threshold_vector = amd_threshold_interrupt;
Perhaps the above assignment operation should be put into 

if (b.interrupt_capable) {
... ...

if (mce_threshold_vector != amd_threshold_interrupt)
mce_threshold_vector = amd_threshold_interrupt;
}

If IntP (Thresholding Interrupt Supported) bit is zero, this indicates that the 
reporting
of threshold overflow via interrupt isn't supported. So there's no need to 
execute the
above assignment operation. 

   }
   }
  }
 
 Looking at the asm, we still go and fetch those addresses so not really
 a win:
 
   cmpq$amd_threshold_interrupt, mce_threshold_vector(%rip)#, 
 mce_threshold_vector
   je  .L235   #,
   incl%r13d   # block
   movq$amd_threshold_interrupt, mce_threshold_vector(%rip)#, 
 mce_threshold_vector
   cmpl$9, %r13d   #, block
 
 but this way the code is relatively clean. Unless you can come up with
 a nicer, cleaner version to handle the breaking out in the success and
 failure case...
Seems like I don't have any better idea than this.

thx!
cyc


From: Chen Yucong sla...@gmail.com
Subject: [PATCH] x86, MCE, AMD: avoid inappropriate assignment operation in
 mce_amd_feature_init

Before executing mce_threshold_vector = amd_threshold_interrupt;, a few
conditions should be checked for avoiding inappropriate assignment operations,
for example, IntP (Thresholding Interrupt Supported) bit of MCx_MISCi.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5d4999f..31bf792 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -250,10 +250,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (b.interrupt_capable) {
int new = (high  MASK_LVTOFF_HI)  20;
offset  = setup_APIC_mce(offset, new);
+
+   if (offset == new 
+mce_threshold_vector != 
amd_threshold_interrupt)
+   mce_threshold_vector = 
amd_threshold_interrupt;
}
 
mce_threshold_block_init(b, offset);
-   mce_threshold_vector = amd_threshold_interrupt;
}
}
 }
-- 
1.7.10.4



--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86, MCE, AMD: move invariant code out from loop body

2014-10-02 Thread Chen Yucong

On Thu, 2014-10-02 at 16:38 +0200, Borislav Petkov wrote:
> 
> On Mon, Sep 22, 2014 at 09:11:00PM +0200, Borislav Petkov wrote:
> > On Mon, Sep 22, 2014 at 05:23:32PM +0800, Chen Yucong wrote:
> > >  Hi Boris,
> > > 
> > > I have found the following code snippet in mce_amd.c. 
> > > 
> > > /* cpu init entry point, called from mce.c with preempt off */
> > > void mce_amd_feature_init(struct cpuinfo_x86 *c)
> > > {
> > > ... ...
> > > for (bank = 0; bank < mca_cfg.banks; ++bank) {
> > > for (block = 0; block < NR_BLOCKS; ++block) {
> > > ... ...
> > > mce_threshold_block_init(, offset);
> > > mce_threshold_vector = amd_threshold_interrupt;
> > > }
> > > }
> > > }
> > > 
> > > Why should "mce_threshold_vector = amd_threshold_interrupt" be placed in
> > > the inner loop body?
> > 
> > Yeah, it was added sloppily with b276268631af3, I'm not surprised. Feel
> > free to send a fix.
> 
> do you still want to send a fix or should I fix it up quickly?
> 

From: Chen Yucong 
Subject: [PATCH] x86, MCE, AMD: move invariant code out from loop body

"mce_threshold_vector = amd_threshold_interrupt;" is loop invariant code
in mce_amd_feature_init(). So it should be moved out from loop body.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5d4999f..f727701 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -253,9 +253,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
}
 
mce_threshold_block_init(, offset);
-   mce_threshold_vector = amd_threshold_interrupt;
}
}
+
+   mce_threshold_vector = amd_threshold_interrupt;
 }
 
 /*
-- 
1.7.10.4




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-10-02 Thread Chen Yucong

On Thu, 2014-10-02 at 15:12 +0200, Borislav Petkov wrote:
> 
> Ok, this return is still bugging me - we're logging the error which
> caused the counter overflow but we go and explicitly clear _STATUS so
> that machine_check_poll doesn't pick up the same error again.
> 
> Even though, machine_check_poll is intended to log the thresholding
> error.
> 
> Which actually makes me think that that machine_check_poll is actually
> completely useless there. IOW, how about that instead: 

amd_threshold_interrup() is just used for logging threshold events. And
any 'valid' threshold events can be checked/logged in loop body.
Moreover, machine_check_poll() is unable to check additional MCx_MISCi.
So I agree with you on this change.

Thanks!
cyc 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-10-02 Thread Chen Yucong

On Thu, 2014-10-02 at 15:12 +0200, Borislav Petkov wrote:
 
 Ok, this return is still bugging me - we're logging the error which
 caused the counter overflow but we go and explicitly clear _STATUS so
 that machine_check_poll doesn't pick up the same error again.
 
 Even though, machine_check_poll is intended to log the thresholding
 error.
 
 Which actually makes me think that that machine_check_poll is actually
 completely useless there. IOW, how about that instead: 

amd_threshold_interrup() is just used for logging threshold events. And
any 'valid' threshold events can be checked/logged in loop body.
Moreover, machine_check_poll() is unable to check additional MCx_MISCi.
So I agree with you on this change.

Thanks!
cyc 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH] x86, MCE, AMD: move invariant code out from loop body

2014-10-02 Thread Chen Yucong

On Thu, 2014-10-02 at 16:38 +0200, Borislav Petkov wrote:
 
 On Mon, Sep 22, 2014 at 09:11:00PM +0200, Borislav Petkov wrote:
  On Mon, Sep 22, 2014 at 05:23:32PM +0800, Chen Yucong wrote:
Hi Boris,
   
   I have found the following code snippet in mce_amd.c. 
   
   /* cpu init entry point, called from mce.c with preempt off */
   void mce_amd_feature_init(struct cpuinfo_x86 *c)
   {
   ... ...
   for (bank = 0; bank  mca_cfg.banks; ++bank) {
   for (block = 0; block  NR_BLOCKS; ++block) {
   ... ...
   mce_threshold_block_init(b, offset);
   mce_threshold_vector = amd_threshold_interrupt;
   }
   }
   }
   
   Why should mce_threshold_vector = amd_threshold_interrupt be placed in
   the inner loop body?
  
  Yeah, it was added sloppily with b276268631af3, I'm not surprised. Feel
  free to send a fix.
 
 do you still want to send a fix or should I fix it up quickly?
 

From: Chen Yucong sla...@gmail.com
Subject: [PATCH] x86, MCE, AMD: move invariant code out from loop body

mce_threshold_vector = amd_threshold_interrupt; is loop invariant code
in mce_amd_feature_init(). So it should be moved out from loop body.

Signed-off-by: Chen Yucong sla...@gmail.com
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5d4999f..f727701 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -253,9 +253,10 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
}
 
mce_threshold_block_init(b, offset);
-   mce_threshold_vector = amd_threshold_interrupt;
}
}
+
+   mce_threshold_vector = amd_threshold_interrupt;
 }
 
 /*
-- 
1.7.10.4




--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Re: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-09-30 Thread Chen Yucong

kernel/cpu/mcheck/mce-inject.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c	2014-09-18 01:22:16.0 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c	2014-09-30 22:38:30.138557839 +0800
@@ -54,7 +54,10 @@
 
 	memset(, 0xff, sizeof(mce_banks_t));
 	local_irq_save(flags);
-	machine_check_poll(0, );
+	if (!amd_inject)
+		machine_check_poll(0, );
+	else 
+		mce_threshold_vector();
 	local_irq_restore(flags);
 	m->finished = 0;
 }
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c	2014-09-18 01:22:16.0 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c	2014-10-01 08:49:06.140738192 +0800
@@ -17,6 +17,7 @@
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
+EXPORT_SYMBOL_GPL(mce_threshold_vector);
 
 static inline void __smp_threshold_interrupt(void)
 {
/*
 * Copyright Chen Yucong 2014 
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define MASK_OVERFLOW  0x0001

/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
{
	struct mce *i = _cpu(injectm, m->extcpu);

	/* Make sure no one reads partially written injectm */
	i->finished = 0;
	mb();
	m->finished = 0;
	/* First set the fields after finished */
	i->extcpu = m->extcpu;
	mb();
	/* Now write record in order, finished last (except above) */
	memcpy(i, m, sizeof(struct mce));
	/* Finally activate it */
	mb();
	i->finished = 1;
}

static void raise_mce(void)
{
	struct mce m;

	mce_setup();
	m.status = 0X8C00;
	m.misc = 0XC008 | MASK_OVERFLOW;
	//m.misc = 0XC008;
	m.bank = 4;
	m.addr = 0xabcdef;
	inject_mce();

	raise_amd_threshold_event();
}

static int __init amd_inject_init(void)
{
	raise_mce();
	pr_info("amd_inject module loaded ...\n");

	return 0;
}

static void __exit amd_inject_exit(void)
{
	pr_info("amd_inject module unloaded ...\n");
}

module_init(amd_inject_init);
module_exit(amd_inject_exit);

/*
 * Cannot tolerate unloading currently because we cannot
 * guarantee all openers of mce_chrdev will get a reference to us.
 */
MODULE_LICENSE("GPL");

Re: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it

2014-09-30 Thread Chen Yucong

On Tue, 2014-09-30 at 12:09 +0200, Borislav Petkov wrote:
> On Tue, Sep 30, 2014 at 05:56:31PM +0800, Chen Yucong wrote:
> > I just clear it to avoid that the mce_log() call logs the above
> > threshold event again in machine_check_poll().
> 
> Ok, that's a good point, please put it in the commit message.
> 
> > It is just used for scanning other banks for recording other valid
> > error information.
> 
> This is actually not what we want - we want to log the errors which
> cause the overflow first and then the rest. So you don't need the goto
> but simply have the machine_check_poll() at the end. 


From: Chen Yucong 

machine_check_poll() will reset IA32_MCi_STATUS register to zero.
So we need to save the content of IA32_MCi_STATUS MSRs before
calling machine_check_poll() for logging threshold interrupt event.

mce_setup() does not gather the content of IA32_MCG_STATUS, so it
should be read explicitly. Moreover, we need to clear IA32_MCx_STATUS
to avoid that mce_log() logs the processed threshold event again
at next time.

Signed-off-by: Chen Yucong 
---
 arch/x86/kernel/cpu/mcheck/mce_amd.c |   18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f8c56bd..643e6a2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -274,6 +274,7 @@ static void amd_threshold_interrupt(void)
struct mce m;
 
mce_setup();
+   rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
 
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -305,24 +306,27 @@ static void amd_threshold_interrupt(void)
 (high & MASK_LOCKED_HI))
continue;
 
-   /*
-* Log the machine check that caused the threshold
-* event.
-*/
-   machine_check_poll(MCP_TIMESTAMP,
-   this_cpu_ptr(_poll_banks));
-
if (high & MASK_OVERFLOW_HI) {
rdmsrl(address, m.misc);
rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+   if (m.status & MCI_STATUS_ADDRV)
+   rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
m.bank = K8_MCE_THRESHOLD_BASE
   + bank * NR_BLOCKS
   + block;
mce_log();
+
+   wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
return;
}
}
}
+
+   /*
+* Log the machine check that caused the threshold event.
+*/
+   machine_check_poll(MCP_TIMESTAMP,
+   this_cpu_ptr(_poll_banks));
 }
 
 /*
-- 
1.7.10.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

1 2 3 >

1 - 100 of 218 matches

Mail list logo