RE: [PATCH 01/43] x86/decoder: Add new TEST instruction pattern

2017-12-01 Thread Robert Elliott (Persistent Memory)
> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> ow...@vger.kernel.org] On Behalf Of Ingo Molnar
> Sent: Friday, November 24, 2017 3:14 AM
> To: linux-kernel@vger.kernel.org
> Subject: [PATCH 01/43] x86/decoder: Add new TEST instruction pattern
> 
> From: Masami Hiramatsu <mhira...@kernel.org>
> 
...
> diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
> index 12e377184ee4..c4d55919fac1 100644

I think this patch (commit 12a78d43de76, also posted for 3.18, 4.4, and 4.9) 
also needs to update these:
tools/objtool/arch/x86/lib/x86-opcode-map.txt
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt

to avoid warnings like:

Warning: synced file at 'tools/objtool/arch/x86/lib/x86-opcode-map.txt' differs 
from latest kernel version at 'arch/x86/lib/x86-opcode-map.txt'
  LINK /home/user/linux/tools/objtool/objtool


---
Robert Elliott, HPE Persistent Memory




RE: [PATCH 01/43] x86/decoder: Add new TEST instruction pattern

2017-12-01 Thread Robert Elliott (Persistent Memory)
> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> ow...@vger.kernel.org] On Behalf Of Ingo Molnar
> Sent: Friday, November 24, 2017 3:14 AM
> To: linux-kernel@vger.kernel.org
> Subject: [PATCH 01/43] x86/decoder: Add new TEST instruction pattern
> 
> From: Masami Hiramatsu 
> 
...
> diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
> index 12e377184ee4..c4d55919fac1 100644

I think this patch (commit 12a78d43de76, also posted for 3.18, 4.4, and 4.9) 
also needs to update these:
tools/objtool/arch/x86/lib/x86-opcode-map.txt
tools/perf/util/intel-pt-decoder/x86-opcode-map.txt

to avoid warnings like:

Warning: synced file at 'tools/objtool/arch/x86/lib/x86-opcode-map.txt' differs 
from latest kernel version at 'arch/x86/lib/x86-opcode-map.txt'
  LINK /home/user/linux/tools/objtool/objtool


---
Robert Elliott, HPE Persistent Memory




[tip:efi/core] efi: Add Persistent Memory type name

2016-02-03 Thread tip-bot for Robert Elliott
Commit-ID:  35575e0e8ba633fc8276509a21f89b599b4f9006
Gitweb: http://git.kernel.org/tip/35575e0e8ba633fc8276509a21f89b599b4f9006
Author: Robert Elliott 
AuthorDate: Mon, 1 Feb 2016 22:07:07 +
Committer:  Ingo Molnar 
CommitDate: Wed, 3 Feb 2016 11:41:20 +0100

efi: Add Persistent Memory type name

Add the "Persistent Memory" string for type 14 introduced in
UEFI 2.5.  This is used when printing the UEFI memory map.

old:
  efi: mem61: [type=14|   |  |  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
  efi: mem61: [Persistent Memory  |   |  |  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott 
Signed-off-by: Matt Fleming 
Reviewed-by: Laszlo Ersek 
Cc: Andy Lutomirski 
Cc: Ard Biesheuvel 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dan Williams 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ross Zwisler 
Cc: Taku Izumi 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1454364428-494-14-git-send-email-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar 
---
 drivers/firmware/efi/efi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index f437048..3a69ed5 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -555,7 +555,8 @@ static __initdata char memory_type_name[][20] = {
"ACPI Memory NVS",
"Memory Mapped I/O",
"MMIO Port Space",
-   "PAL Code"
+   "PAL Code",
+   "Persistent Memory",
 };
 
 char * __init efi_md_typeattr_format(char *buf, size_t size,


[tip:efi/core] efi: Add NV memory attribute

2016-02-03 Thread tip-bot for Robert Elliott
Commit-ID:  c016ca08f89c6c78ed815f025262bdb87aba3f4c
Gitweb: http://git.kernel.org/tip/c016ca08f89c6c78ed815f025262bdb87aba3f4c
Author: Robert Elliott 
AuthorDate: Mon, 1 Feb 2016 22:07:06 +
Committer:  Ingo Molnar 
CommitDate: Wed, 3 Feb 2016 11:41:20 +0100

efi: Add NV memory attribute

Add the NV memory attribute introduced in UEFI 2.5 and add a
column for it in the types and attributes string used when
printing the UEFI memory map.

old:
  efi: mem61: [type=14|   |  |  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
  efi: mem61: [type=14|   |  |NV|  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott 
Signed-off-by: Matt Fleming 
Reviewed-by: Laszlo Ersek 
Cc: Andy Lutomirski 
Cc: Ard Biesheuvel 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Dan Williams 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Ross Zwisler 
Cc: Taku Izumi 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1454364428-494-13-git-send-email-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar 
---
 drivers/firmware/efi/efi.c | 5 -
 include/linux/efi.h| 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 20451c2..f437048 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -582,13 +582,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t 
size,
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+EFI_MEMORY_NV |
 EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]",
 (unsigned long long)attr);
else
-   snprintf(pos, size, 
"|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+   snprintf(pos, size,
+"|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+attr & EFI_MEMORY_NV  ? "NV"  : "",
 attr & EFI_MEMORY_XP  ? "XP"  : "",
 attr & EFI_MEMORY_RP  ? "RP"  : "",
 attr & EFI_MEMORY_WP  ? "WP"  : "",
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 09f1559..3c6cbbd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -97,6 +97,7 @@ typedef   struct {
 #define EFI_MEMORY_WP  ((u64)0x1000ULL)/* 
write-protect */
 #define EFI_MEMORY_RP  ((u64)0x2000ULL)/* read-protect 
*/
 #define EFI_MEMORY_XP  ((u64)0x4000ULL)/* 
execute-protect */
+#define EFI_MEMORY_NV  ((u64)0x8000ULL)/* non-volatile 
*/
 #define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0001ULL)/* higher 
reliability */
 #define EFI_MEMORY_RO  ((u64)0x0002ULL)/* read-only */


[tip:efi/core] x86/efi: Show actual ending addresses in efi_print_memmap

2016-02-03 Thread tip-bot for Robert Elliott
Commit-ID:  1e82b94790709fb2a22d16d53bb04d751fb3878d
Gitweb: http://git.kernel.org/tip/1e82b94790709fb2a22d16d53bb04d751fb3878d
Author: Robert Elliott 
AuthorDate: Mon, 1 Feb 2016 22:07:05 +
Committer:  Ingo Molnar 
CommitDate: Wed, 3 Feb 2016 11:41:20 +0100

x86/efi: Show actual ending addresses in efi_print_memmap

Adjust efi_print_memmap to print the real end address of each
range, not 1 byte beyond. This matches other prints like those
for SRAT and nosave memory.

While investigating grub persistent memory corruption issues, it
was helpful to make this table match the ending address
convention used by:
* the kernel's e820 table prints
BIOS-e820: [mem 0x00168000-0x001c7fff] reserved
* the kernel's nosave memory prints
PM: Registered nosave memory: [mem 0x88000-0xc7fff]
* the kernel's ACPI System Resource Affinity Table prints
SRAT: Node 1 PXM 1 [mem 0x48000-0x87fff]
* grub's lsmmap and lsefimmap commands
reserved  00168000-001c7fff 0060 24GiB UC WC WT 
WB NV
* the UEFI shell's memmap command
Reserved   7FC0-7FFF 0400 
0001

For example, if you grep all the various logs for c7fff, you
won't find the kernel's line if it uses c8000.

Also, change the closing ) to ] to match the opening [.

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c8000) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16384MB)

Signed-off-by: Robert Elliott 
Signed-off-by: Matt Fleming 
Reviewed-by: Laszlo Ersek 
Cc: Andy Lutomirski 
Cc: Ard Biesheuvel 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Leif Lindholm 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1454364428-494-12-git-send-email-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar 
---
 arch/x86/platform/efi/efi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index bdd9477..e80826e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
char buf[64];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
-   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
 #endif  /*  EFI_DEBUG  */


[tip:efi/core] efi: Add Persistent Memory type name

2016-02-03 Thread tip-bot for Robert Elliott
Commit-ID:  35575e0e8ba633fc8276509a21f89b599b4f9006
Gitweb: http://git.kernel.org/tip/35575e0e8ba633fc8276509a21f89b599b4f9006
Author: Robert Elliott <elli...@hpe.com>
AuthorDate: Mon, 1 Feb 2016 22:07:07 +
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 3 Feb 2016 11:41:20 +0100

efi: Add Persistent Memory type name

Add the "Persistent Memory" string for type 14 introduced in
UEFI 2.5.  This is used when printing the UEFI memory map.

old:
  efi: mem61: [type=14|   |  |  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
  efi: mem61: [Persistent Memory  |   |  |  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
Signed-off-by: Matt Fleming <m...@codeblueprint.co.uk>
Reviewed-by: Laszlo Ersek <ler...@redhat.com>
Cc: Andy Lutomirski <l...@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheu...@linaro.org>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Brian Gerst <brge...@gmail.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Denys Vlasenko <dvlas...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ross Zwisler <ross.zwis...@linux.intel.com>
Cc: Taku Izumi <izumi.t...@jp.fujitsu.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1454364428-494-14-git-send-email-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 drivers/firmware/efi/efi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index f437048..3a69ed5 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -555,7 +555,8 @@ static __initdata char memory_type_name[][20] = {
"ACPI Memory NVS",
"Memory Mapped I/O",
"MMIO Port Space",
-   "PAL Code"
+   "PAL Code",
+   "Persistent Memory",
 };
 
 char * __init efi_md_typeattr_format(char *buf, size_t size,


[tip:efi/core] x86/efi: Show actual ending addresses in efi_print_memmap

2016-02-03 Thread tip-bot for Robert Elliott
Commit-ID:  1e82b94790709fb2a22d16d53bb04d751fb3878d
Gitweb: http://git.kernel.org/tip/1e82b94790709fb2a22d16d53bb04d751fb3878d
Author: Robert Elliott <elli...@hpe.com>
AuthorDate: Mon, 1 Feb 2016 22:07:05 +
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 3 Feb 2016 11:41:20 +0100

x86/efi: Show actual ending addresses in efi_print_memmap

Adjust efi_print_memmap to print the real end address of each
range, not 1 byte beyond. This matches other prints like those
for SRAT and nosave memory.

While investigating grub persistent memory corruption issues, it
was helpful to make this table match the ending address
convention used by:
* the kernel's e820 table prints
BIOS-e820: [mem 0x00168000-0x001c7fff] reserved
* the kernel's nosave memory prints
PM: Registered nosave memory: [mem 0x88000-0xc7fff]
* the kernel's ACPI System Resource Affinity Table prints
SRAT: Node 1 PXM 1 [mem 0x48000-0x87fff]
* grub's lsmmap and lsefimmap commands
reserved  00168000-001c7fff 0060 24GiB UC WC WT 
WB NV
* the UEFI shell's memmap command
Reserved   7FC0-7FFF 0400 
0001

For example, if you grep all the various logs for c7fff, you
won't find the kernel's line if it uses c8000.

Also, change the closing ) to ] to match the opening [.

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c8000) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16384MB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
Signed-off-by: Matt Fleming <m...@codeblueprint.co.uk>
Reviewed-by: Laszlo Ersek <ler...@redhat.com>
Cc: Andy Lutomirski <l...@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheu...@linaro.org>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Brian Gerst <brge...@gmail.com>
Cc: Denys Vlasenko <dvlas...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Leif Lindholm <leif.lindh...@linaro.org>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1454364428-494-12-git-send-email-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 arch/x86/platform/efi/efi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index bdd9477..e80826e 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
char buf[64];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
-   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
 #endif  /*  EFI_DEBUG  */


[tip:efi/core] efi: Add NV memory attribute

2016-02-03 Thread tip-bot for Robert Elliott
Commit-ID:  c016ca08f89c6c78ed815f025262bdb87aba3f4c
Gitweb: http://git.kernel.org/tip/c016ca08f89c6c78ed815f025262bdb87aba3f4c
Author: Robert Elliott <elli...@hpe.com>
AuthorDate: Mon, 1 Feb 2016 22:07:06 +
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 3 Feb 2016 11:41:20 +0100

efi: Add NV memory attribute

Add the NV memory attribute introduced in UEFI 2.5 and add a
column for it in the types and attributes string used when
printing the UEFI memory map.

old:
  efi: mem61: [type=14|   |  |  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
  efi: mem61: [type=14|   |  |NV|  |  |  |  | |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
Signed-off-by: Matt Fleming <m...@codeblueprint.co.uk>
Reviewed-by: Laszlo Ersek <ler...@redhat.com>
Cc: Andy Lutomirski <l...@amacapital.net>
Cc: Ard Biesheuvel <ard.biesheu...@linaro.org>
Cc: Borislav Petkov <b...@alien8.de>
Cc: Brian Gerst <brge...@gmail.com>
Cc: Dan Williams <dan.j.willi...@intel.com>
Cc: Denys Vlasenko <dvlas...@redhat.com>
Cc: H. Peter Anvin <h...@zytor.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Ross Zwisler <ross.zwis...@linux.intel.com>
Cc: Taku Izumi <izumi.t...@jp.fujitsu.com>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1454364428-494-13-git-send-email-m...@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 drivers/firmware/efi/efi.c | 5 -
 include/linux/efi.h| 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 20451c2..f437048 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -582,13 +582,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t 
size,
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+EFI_MEMORY_NV |
 EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]",
 (unsigned long long)attr);
else
-   snprintf(pos, size, 
"|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+   snprintf(pos, size,
+"|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+attr & EFI_MEMORY_NV  ? "NV"  : "",
 attr & EFI_MEMORY_XP  ? "XP"  : "",
 attr & EFI_MEMORY_RP  ? "RP"  : "",
 attr & EFI_MEMORY_WP  ? "WP"  : "",
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 09f1559..3c6cbbd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -97,6 +97,7 @@ typedef   struct {
 #define EFI_MEMORY_WP  ((u64)0x1000ULL)/* 
write-protect */
 #define EFI_MEMORY_RP  ((u64)0x2000ULL)/* read-protect 
*/
 #define EFI_MEMORY_XP  ((u64)0x4000ULL)/* 
execute-protect */
+#define EFI_MEMORY_NV  ((u64)0x8000ULL)/* non-volatile 
*/
 #define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0001ULL)/* higher 
reliability */
 #define EFI_MEMORY_RO  ((u64)0x0002ULL)/* read-only */


[PATCH v2 1/4] x86/efi: show actual ending addresses in efi_print_memmap

2015-12-23 Thread Robert Elliott
Adjust efi_print_memmap to print the real end address of each
range, not 1 byte beyond. This matches other prints like those for
SRAT and nosave memory.

While investigating grub persistent memory corruption issues, it
was helpful to make this table match the ending address convention
used by:
* the kernel's e820 table prints
BIOS-e820: [mem 0x00168000-0x001c7fff] reserved
* the kernel's nosave memory prints
PM: Registered nosave memory: [mem 0x88000-0xc7fff]
* the kernel's ACPI System Resource Affinity Table prints
SRAT: Node 1 PXM 1 [mem 0x48000-0x87fff]
* grub's lsmmap and lsefimmap commands
reserved  00168000-001c7fff 0060 24GiB UC WC WT 
WB NV
* the UEFI shell's memmap command
Reserved   7FC0-7FFF 0400 
0001

For example, if you grep all the various logs for c7fff, you
won't find the kernel's line if it uses c8000.

Also, change the closing ) to ] to match the opening [.

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c8000) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16384MB)

Signed-off-by: Robert Elliott 
---
Changes in v2:
 - Expanded rationale in the commit message
---
 arch/x86/platform/efi/efi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad28540..635a955 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
char buf[64];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
-   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
 #endif  /*  EFI_DEBUG  */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Documentation/kernel-parameters: update KMG units

2015-12-23 Thread Robert Elliott
Since commit e004f3c7780d ("lib/cmdline.c: add size unit t/p/e to
memparse") expanded memparse() to support T, P, and E units in addition
to K, M, and G, all the kernel parameters that use that function became
capable of more than [KMG] mentioned in kernel-parameters.txt.

Expand the introduction to the units and change all existing [KMG]
descriptions to [KMGTPE].  cma only had [MG]; reservelow only had [K].

Add [KMGTPE] for hugepagesz and memory_corruption_check_size, which also
use memparse().

Update two source code files with comments mentioning [KMG].

Signed-off-by: Robert Elliott 
---
 Documentation/kernel-parameters.txt | 101 +++-
 kernel/crash_dump.c |   2 +-
 mm/page_alloc.c |   2 +-
 3 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 742f69d..3f77290 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -159,10 +159,16 @@ a fixed number of characters. This limit depends on the 
architecture
 and is between 256 and 4096 characters. It is defined in the file
 ./include/asm/setup.h as COMMAND_LINE_SIZE.
 
-Finally, the [KMG] suffix is commonly described after a number of kernel
-parameter values. These 'K', 'M', and 'G' letters represent the _binary_
-multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
-bytes respectively. Such letter suffixes can also be entirely omitted.
+Finally, the [KMGTPE] suffix is commonly described after a number
+of kernel parameter values. These letters represent the _binary_
+multipliers:
+   'K' = Ki (2^10)
+   'M' = Mi (2^20)
+   'G' = Gi (2^30)
+   'T' = Ti (2^40)
+   'P' = Pi (2^50)
+   'E' = Ei (2^60)
+Such letter suffixes can also be entirely omitted.
 
 
acpi=   [HW,ACPI,X86,ARM64]
@@ -663,8 +669,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
Also note the kernel might malfunction if you disable
some critical bits.
 
-   cma=nn[MG]@[start[MG][-end[MG]]]
-   [ARM,X86,KNL]
+   cma=nn[KMGTPE]@[start[KMGTPE][-end[KMGTPE]]] [ARM,X86,KNL]
Sets the size of kernel global memory area for
contiguous memory allocations and optionally the
placement constraint by the physical address range of
@@ -679,7 +684,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
a hypervisor.
Default: yes
 
-   coherent_pool=nn[KMG]   [ARM,KNL]
+   coherent_pool=nn[KMGTPE][ARM,KNL]
Sets the size of memory pool for coherent, atomic dma
allocations, by default set to 256K.
 
@@ -763,7 +768,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
Format:
,,,[,]
 
-   crashkernel=size[KMG][@offset[KMG]]
+   crashkernel=size[KMGTPE][@offset[KMGTPE]]
[KNL] Using kexec, Linux can switch to a 'crash kernel'
upon panic. This parameter reserves the physical
memory region [offset, offset + size] for that kernel
@@ -775,18 +780,18 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
start-[end] where start and end are both
-   a memory unit (amount[KMG]). See also
+   a memory unit (amount[KMGTPE]). See also
Documentation/kdump/kdump.txt for an example.
 
-   crashkernel=size[KMG],high
-   [KNL, x86_64] range could be above 4G. Allow kernel
+   crashkernel=size[KMGTPE],high [KNL, x86_64]
+   range could be above 4G. Allow kernel
to allocate physical memory region from top, so could
be above 4G if system have more than 4G ram installed.
Otherwise memory region will be allocated below 4G, if
available.
It will be ignored if crashkernel=X is specified.
-   crashkernel=size[KMG],low
-   [KNL, x86_64] range under 4G. When crashkernel=X,high
+   crashkernel=size[KMGTPE],low [KNL, x86_64]
+   range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
@@ -,7 +1116,9 @@ bytes respectively. Such letter suffixes can also be

[PATCH] init, Documentation: Remove ramdisk_blocksize mentions

2015-12-23 Thread Robert Elliott
The brd driver has never supported the ramdisk_blocksize kernel
parameter that was in the rd driver it replaced, so remove
mention of this parameter from comments and Documentation.

Commit 9db5579be4bb ("rewrite rd") replaced rd with brd, keeping
a brd_blocksize variable in struct brd_device but never using it.

Commit a2cba2913c76 ("brd: get rid of unused members from struct
brd_device") removed the unused variable.

Commit f5abc8e75815 ("Documentation/blockdev/ramdisk.txt: updates")
removed mentions of ramdisk_blocksize from that file.

Signed-off-by: Robert Elliott 
---
 Documentation/kernel-parameters.txt | 3 ---
 init/do_mounts_rd.c | 7 ---
 2 files changed, 10 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 742f69d..461686e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3050,9 +3050,6 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
raid=   [HW,RAID]
See Documentation/md.txt.
 
-   ramdisk_blocksize=  [RAM]
-   See Documentation/blockdev/ramdisk.txt.
-
ramdisk_size=   [RAM] Sizes of RAM disks in kilobytes
See Documentation/blockdev/ramdisk.txt.
 
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index e5d059e..8a09b32 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -216,13 +216,6 @@ int __init rd_load_image(char *from)
/*
 * NOTE NOTE: nblocks is not actually blocks but
 * the number of kibibytes of data to load into a ramdisk.
-* So any ramdisk block size that is a multiple of 1KiB should
-* work when the appropriate ramdisk_blocksize is specified
-* on the command line.
-*
-* The default ramdisk_blocksize is 1KiB and it is generally
-* silly to use anything else, so make sure to use 1KiB
-* blocksize while generating ext2fs ramdisk-images.
 */
if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)_blocks) < 0)
rd_blocks = 0;
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ext2, ext4: include filesystem block size in error messages

2015-12-23 Thread Robert Elliott
Print the problematic value in messages about the filesystem
block size.

Normalize all of the blocksize messages that use "blocksize" to
use "filesystem block size". This helps distinguish this block size
from the underlying block device's logical block size (i.e.,
sector size) and physical block size.

Example old messages:
EXT2-fs (pmem0): unable to set blocksize
EXT4-fs (pmem0): error: unsupported blocksize for dax
EXT4-fs (pmem0): unsupported blocksize for fs encryption
EXT2-fs (pmem0): bad blocksize %d

Example new message:
EXT4-fs (pmem0): error: unsupported filesystem block size 2048 for dax

Signed-off-by: Robert Elliott 
---
 fs/ext2/super.c | 17 +++--
 fs/ext4/super.c | 33 -
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35a..1186a5b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "ext2.h"
 #include "xattr.h"
@@ -822,7 +823,9 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
 */
blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
if (!blocksize) {
-   ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
+   ext2_msg(sb, KERN_ERR,
+"error: unable to set filesystem block size "
+__stringify(BLOCK_SIZE));
goto failed_sbi;
}
 
@@ -921,7 +924,8 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
if (blocksize != PAGE_SIZE) {
ext2_msg(sb, KERN_ERR,
-   "error: unsupported blocksize for dax");
+"error: unsupported filesystem block size %d for dax",
+blocksize);
goto failed_mount;
}
if (!sb->s_bdev->bd_disk->fops->direct_access) {
@@ -937,7 +941,7 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
 
if (!sb_set_blocksize(sb, blocksize)) {
ext2_msg(sb, KERN_ERR,
-   "error: bad blocksize %d", blocksize);
+"error: bad filesystem block size %d", blocksize);
goto failed_sbi;
}
 
@@ -1007,14 +1011,15 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
 
if (sb->s_blocksize != bh->b_size) {
if (!silent)
-   ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
+   ext2_msg(sb, KERN_ERR,
+"error: unsupported filesystem block size %lu",
+sb->s_blocksize);
goto failed_mount;
}
 
if (sb->s_blocksize != sbi->s_frag_size) {
ext2_msg(sb, KERN_ERR,
-   "error: fragsize %lu != blocksize %lu"
-   "(not supported yet)",
+"error: fragsize %lu != filesystem block size %lu (not supported yet)",
sbi->s_frag_size, sb->s_blocksize);
goto failed_mount;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c9ab67d..9e1c049 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -42,6 +42,7 @@
 
 #include 
 #include 
+#include 
 
 #include "ext4.h"
 #include "ext4_extents.h"  /* Needed for trace points definition */
@@ -1750,8 +1751,9 @@ static int parse_options(char *options, struct 
super_block *sb,
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
if (blocksize < PAGE_CACHE_SIZE) {
-   ext4_msg(sb, KERN_ERR, "can't mount with "
-"dioread_nolock if block size != PAGE_SIZE");
+   ext4_msg(sb, KERN_ERR,
+   "can't mount with dioread_nolock if filesystem 
block size %d != PAGE_SIZE",
+   blocksize);
return 0;
}
}
@@ -3147,7 +3149,9 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
ret = -EINVAL;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) {
-   ext4_msg(sb, KERN_ERR, "unable to set blocksize");
+   ext4_msg(sb, KERN_ERR,
+"unable to set filesystem block size "
+__stringify(EXT4_MIN_BLOCK_SIZE));

[PATCH v2 1/4] x86/efi: show actual ending addresses in efi_print_memmap

2015-12-23 Thread Robert Elliott
Adjust efi_print_memmap to print the real end address of each
range, not 1 byte beyond. This matches other prints like those for
SRAT and nosave memory.

While investigating grub persistent memory corruption issues, it
was helpful to make this table match the ending address convention
used by:
* the kernel's e820 table prints
BIOS-e820: [mem 0x00168000-0x001c7fff] reserved
* the kernel's nosave memory prints
PM: Registered nosave memory: [mem 0x88000-0xc7fff]
* the kernel's ACPI System Resource Affinity Table prints
SRAT: Node 1 PXM 1 [mem 0x48000-0x87fff]
* grub's lsmmap and lsefimmap commands
reserved  00168000-001c7fff 0060 24GiB UC WC WT 
WB NV
* the UEFI shell's memmap command
Reserved   7FC0-7FFF 0400 
0001

For example, if you grep all the various logs for c7fff, you
won't find the kernel's line if it uses c8000.

Also, change the closing ) to ] to match the opening [.

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c8000) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16384MB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
Changes in v2:
 - Expanded rationale in the commit message
---
 arch/x86/platform/efi/efi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad28540..635a955 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
char buf[64];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
-   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
 #endif  /*  EFI_DEBUG  */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ext2, ext4: include filesystem block size in error messages

2015-12-23 Thread Robert Elliott
Print the problematic value in messages about the filesystem
block size.

Normalize all of the blocksize messages that use "blocksize" to
use "filesystem block size". This helps distinguish this block size
from the underlying block device's logical block size (i.e.,
sector size) and physical block size.

Example old messages:
EXT2-fs (pmem0): unable to set blocksize
EXT4-fs (pmem0): error: unsupported blocksize for dax
EXT4-fs (pmem0): unsupported blocksize for fs encryption
EXT2-fs (pmem0): bad blocksize %d

Example new message:
EXT4-fs (pmem0): error: unsupported filesystem block size 2048 for dax

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 fs/ext2/super.c | 17 +++--
 fs/ext4/super.c | 33 -
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 748d35a..1186a5b 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "ext2.h"
 #include "xattr.h"
@@ -822,7 +823,9 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
 */
blocksize = sb_min_blocksize(sb, BLOCK_SIZE);
if (!blocksize) {
-   ext2_msg(sb, KERN_ERR, "error: unable to set blocksize");
+   ext2_msg(sb, KERN_ERR,
+"error: unable to set filesystem block size "
+__stringify(BLOCK_SIZE));
goto failed_sbi;
}
 
@@ -921,7 +924,8 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
if (blocksize != PAGE_SIZE) {
ext2_msg(sb, KERN_ERR,
-   "error: unsupported blocksize for dax");
+"error: unsupported filesystem block size %d for dax",
+blocksize);
goto failed_mount;
}
if (!sb->s_bdev->bd_disk->fops->direct_access) {
@@ -937,7 +941,7 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
 
if (!sb_set_blocksize(sb, blocksize)) {
ext2_msg(sb, KERN_ERR,
-   "error: bad blocksize %d", blocksize);
+"error: bad filesystem block size %d", blocksize);
goto failed_sbi;
}
 
@@ -1007,14 +1011,15 @@ static int ext2_fill_super(struct super_block *sb, void 
*data, int silent)
 
if (sb->s_blocksize != bh->b_size) {
if (!silent)
-   ext2_msg(sb, KERN_ERR, "error: unsupported blocksize");
+   ext2_msg(sb, KERN_ERR,
+"error: unsupported filesystem block size %lu",
+sb->s_blocksize);
goto failed_mount;
}
 
if (sb->s_blocksize != sbi->s_frag_size) {
ext2_msg(sb, KERN_ERR,
-   "error: fragsize %lu != blocksize %lu"
-   "(not supported yet)",
+"error: fragsize %lu != filesystem block size %lu (not supported yet)",
sbi->s_frag_size, sb->s_blocksize);
goto failed_mount;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c9ab67d..9e1c049 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -42,6 +42,7 @@
 
 #include 
 #include 
+#include 
 
 #include "ext4.h"
 #include "ext4_extents.h"  /* Needed for trace points definition */
@@ -1750,8 +1751,9 @@ static int parse_options(char *options, struct 
super_block *sb,
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
if (blocksize < PAGE_CACHE_SIZE) {
-   ext4_msg(sb, KERN_ERR, "can't mount with "
-"dioread_nolock if block size != PAGE_SIZE");
+   ext4_msg(sb, KERN_ERR,
+   "can't mount with dioread_nolock if filesystem 
block size %d != PAGE_SIZE",
+   blocksize);
return 0;
}
}
@@ -3147,7 +3149,9 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
ret = -EINVAL;
blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
if (!blocksize) {
-   ext4_msg(sb, KERN_ERR, "unable to set blocksize");
+   ext4_msg(sb, KERN_ERR,
+"unable to set filesystem block size "
+__stringify(EXT4_M

[PATCH] init, Documentation: Remove ramdisk_blocksize mentions

2015-12-23 Thread Robert Elliott
The brd driver has never supported the ramdisk_blocksize kernel
parameter that was in the rd driver it replaced, so remove
mention of this parameter from comments and Documentation.

Commit 9db5579be4bb ("rewrite rd") replaced rd with brd, keeping
a brd_blocksize variable in struct brd_device but never using it.

Commit a2cba2913c76 ("brd: get rid of unused members from struct
brd_device") removed the unused variable.

Commit f5abc8e75815 ("Documentation/blockdev/ramdisk.txt: updates")
removed mentions of ramdisk_blocksize from that file.

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 Documentation/kernel-parameters.txt | 3 ---
 init/do_mounts_rd.c | 7 ---
 2 files changed, 10 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 742f69d..461686e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3050,9 +3050,6 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
raid=   [HW,RAID]
See Documentation/md.txt.
 
-   ramdisk_blocksize=  [RAM]
-   See Documentation/blockdev/ramdisk.txt.
-
ramdisk_size=   [RAM] Sizes of RAM disks in kilobytes
See Documentation/blockdev/ramdisk.txt.
 
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index e5d059e..8a09b32 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -216,13 +216,6 @@ int __init rd_load_image(char *from)
/*
 * NOTE NOTE: nblocks is not actually blocks but
 * the number of kibibytes of data to load into a ramdisk.
-* So any ramdisk block size that is a multiple of 1KiB should
-* work when the appropriate ramdisk_blocksize is specified
-* on the command line.
-*
-* The default ramdisk_blocksize is 1KiB and it is generally
-* silly to use anything else, so make sure to use 1KiB
-* blocksize while generating ext2fs ramdisk-images.
 */
if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)_blocks) < 0)
rd_blocks = 0;
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Documentation/kernel-parameters: update KMG units

2015-12-23 Thread Robert Elliott
Since commit e004f3c7780d ("lib/cmdline.c: add size unit t/p/e to
memparse") expanded memparse() to support T, P, and E units in addition
to K, M, and G, all the kernel parameters that use that function became
capable of more than [KMG] mentioned in kernel-parameters.txt.

Expand the introduction to the units and change all existing [KMG]
descriptions to [KMGTPE].  cma only had [MG]; reservelow only had [K].

Add [KMGTPE] for hugepagesz and memory_corruption_check_size, which also
use memparse().

Update two source code files with comments mentioning [KMG].

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 Documentation/kernel-parameters.txt | 101 +++-
 kernel/crash_dump.c |   2 +-
 mm/page_alloc.c |   2 +-
 3 files changed, 56 insertions(+), 49 deletions(-)

diff --git a/Documentation/kernel-parameters.txt 
b/Documentation/kernel-parameters.txt
index 742f69d..3f77290 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -159,10 +159,16 @@ a fixed number of characters. This limit depends on the 
architecture
 and is between 256 and 4096 characters. It is defined in the file
 ./include/asm/setup.h as COMMAND_LINE_SIZE.
 
-Finally, the [KMG] suffix is commonly described after a number of kernel
-parameter values. These 'K', 'M', and 'G' letters represent the _binary_
-multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
-bytes respectively. Such letter suffixes can also be entirely omitted.
+Finally, the [KMGTPE] suffix is commonly described after a number
+of kernel parameter values. These letters represent the _binary_
+multipliers:
+   'K' = Ki (2^10)
+   'M' = Mi (2^20)
+   'G' = Gi (2^30)
+   'T' = Ti (2^40)
+   'P' = Pi (2^50)
+   'E' = Ei (2^60)
+Such letter suffixes can also be entirely omitted.
 
 
acpi=   [HW,ACPI,X86,ARM64]
@@ -663,8 +669,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
Also note the kernel might malfunction if you disable
some critical bits.
 
-   cma=nn[MG]@[start[MG][-end[MG]]]
-   [ARM,X86,KNL]
+   cma=nn[KMGTPE]@[start[KMGTPE][-end[KMGTPE]]] [ARM,X86,KNL]
Sets the size of kernel global memory area for
contiguous memory allocations and optionally the
placement constraint by the physical address range of
@@ -679,7 +684,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
a hypervisor.
Default: yes
 
-   coherent_pool=nn[KMG]   [ARM,KNL]
+   coherent_pool=nn[KMGTPE][ARM,KNL]
Sets the size of memory pool for coherent, atomic dma
allocations, by default set to 256K.
 
@@ -763,7 +768,7 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
Format:
,,,[,]
 
-   crashkernel=size[KMG][@offset[KMG]]
+   crashkernel=size[KMGTPE][@offset[KMGTPE]]
[KNL] Using kexec, Linux can switch to a 'crash kernel'
upon panic. This parameter reserves the physical
memory region [offset, offset + size] for that kernel
@@ -775,18 +780,18 @@ bytes respectively. Such letter suffixes can also be 
entirely omitted.
[KNL] Same as above, but depends on the memory
in the running system. The syntax of range is
start-[end] where start and end are both
-   a memory unit (amount[KMG]). See also
+   a memory unit (amount[KMGTPE]). See also
Documentation/kdump/kdump.txt for an example.
 
-   crashkernel=size[KMG],high
-   [KNL, x86_64] range could be above 4G. Allow kernel
+   crashkernel=size[KMGTPE],high [KNL, x86_64]
+   range could be above 4G. Allow kernel
to allocate physical memory region from top, so could
be above 4G if system have more than 4G ram installed.
Otherwise memory region will be allocated below 4G, if
available.
It will be ignored if crashkernel=X is specified.
-   crashkernel=size[KMG],low
-   [KNL, x86_64] range under 4G. When crashkernel=X,high
+   crashkernel=size[KMGTPE],low [KNL, x86_64]
+   range under 4G. When crashkernel=X,high
is passed, kernel could allocate physical memory region
above 4G, that cause second kernel crash on system
that require some amount of low memory, e.g. swiotlb
@@ -,7 +1116,9 @@ bytes respectively. Such let

[PATCH 1/4] x86/efi: show actual ending addresses in efi_print_memmap

2015-12-17 Thread Robert Elliott
Adjust efi_print_memmap to print the real end address of each
range, not 1 byte beyond. This matches other prints like those for
SRAT and nosave memory.

Change the closing ) to ] to match the opening [.

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c8000) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16384MB)

Example other address range prints:
SRAT: Node 1 PXM 1 [mem 0x48000-0x87fff]
PM: Registered nosave memory: [mem 0x88000-0xc7fff]

Signed-off-by: Robert Elliott 
---
 arch/x86/platform/efi/efi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad28540..635a955 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
char buf[64];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
-   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
 #endif  /*  EFI_DEBUG  */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[no subject]

2015-12-17 Thread Robert Elliott
Date: Thu, 17 Dec 2015 17:40:55 -0600
Subject: [PATCH 0/4] x86/efi: support persistent memory in efi_print_memmap

This series adds support for persistent memory type and NV attribute 
in the efi_print_memmap function, which is only used if EFI_DEBUG is true
(which is the case for x86).

Robert Elliott (4):
  x86/efi: show actual ending addresses in efi_print_memmap
  efi: add NV memory attribute
  efi: add Persistent Memory type name
  x86/efi: print size and base in binary units in efi_print_memmap

 arch/x86/platform/efi/efi.c | 29 +
 drivers/firmware/efi/efi.c  |  8 ++--
 include/linux/efi.h |  1 +
 3 files changed, 32 insertions(+), 6 deletions(-)

-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] efi: add Persistent Memory type name

2015-12-17 Thread Robert Elliott
Add the "Persistent Memory" string for type 14 introduced in
UEFI 2.5.  This is used when printing the UEFI memory map.

old:
efi: mem61: [type=14|   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott 
---
 drivers/firmware/efi/efi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 4dd5464..0b16e88 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -584,7 +584,8 @@ static __initdata char memory_type_name[][20] = {
"ACPI Memory NVS",
"Memory Mapped I/O",
"MMIO Port Space",
-   "PAL Code"
+   "PAL Code",
+   "Persistent Memory"
 };
 
 char * __init efi_md_typeattr_format(char *buf, size_t size,
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4] efi: add NV memory attribute

2015-12-17 Thread Robert Elliott
Add the NV memory attribute introduced in UEFI 2.5 and add a column
for it in the types and attributes string used when printing the UEFI
memory map.

old:
efi: mem61: [type=14|   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
efi: mem61: [type=14|   |  |NV|  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott 
---
 drivers/firmware/efi/efi.c | 5 -
 include/linux/efi.h| 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 027ca21..4dd5464 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -611,13 +611,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t 
size,
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+EFI_MEMORY_NV |
 EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]",
 (unsigned long long)attr);
else
-   snprintf(pos, size, 
"|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+   snprintf(pos, size,
+"|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+attr & EFI_MEMORY_NV  ? "NV"  : "",
 attr & EFI_MEMORY_XP  ? "XP"  : "",
 attr & EFI_MEMORY_RP  ? "RP"  : "",
 attr & EFI_MEMORY_WP  ? "WP"  : "",
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 569b5a8..9ce9e9e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -97,6 +97,7 @@ typedef   struct {
 #define EFI_MEMORY_WP  ((u64)0x1000ULL)/* 
write-protect */
 #define EFI_MEMORY_RP  ((u64)0x2000ULL)/* read-protect 
*/
 #define EFI_MEMORY_XP  ((u64)0x4000ULL)/* 
execute-protect */
+#define EFI_MEMORY_NV  ((u64)0x8000ULL)/* non-volatile 
*/
 #define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0001ULL)/* higher 
reliability */
 #define EFI_MEMORY_RO  ((u64)0x0002ULL)/* read-only */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4] x86/efi: print size and base in binary units in efi_print_memmap

2015-12-17 Thread Robert Elliott
Print the base address for each range in decimal alongside the size.
Use a "(size @ base)" format similar to the fake_memmap kernel parameter.

Print the range and base in the best-fit B, KiB, MiB, etc. units rather
than always MiB.  This avoids rounding, which can be misleading.

Use proper IEC binary units (KiB, MiB, etc.) rather than misuse SI
decimal units (KB, MB, etc.).

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16 GiB @ 34 GiB)

Signed-off-by: Robert Elliott 
---
 arch/x86/platform/efi/efi.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 635a955..030ba91 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -222,6 +222,25 @@ int __init efi_memblock_x86_reserve_range(void)
return 0;
 }
 
+char * __init efi_size_format(char *buf, size_t size, u64 bytes)
+{
+   if (!bytes || (bytes & 0x3ff))
+   snprintf(buf, size, "%llu B", bytes);
+   else if (bytes & 0xf)
+   snprintf(buf, size, "%llu KiB", bytes >> 10);
+   else if (bytes & 0x3fff)
+   snprintf(buf, size, "%llu MiB", bytes >> 20);
+   else if (bytes & 0xff)
+   snprintf(buf, size, "%llu GiB", bytes >> 30);
+   else if (bytes & 0x3)
+   snprintf(buf, size, "%llu TiB", bytes >> 40);
+   else if (bytes & 0xfff)
+   snprintf(buf, size, "%llu PiB", bytes >> 50);
+   else
+   snprintf(buf, size, "%llu EiB", bytes >> 60);
+   return buf;
+}
+
 void __init efi_print_memmap(void)
 {
 #ifdef EFI_DEBUG
@@ -232,14 +251,16 @@ void __init efi_print_memmap(void)
for (p = memmap.map, i = 0;
 p < memmap.map_end;
 p += memmap.desc_size, i++) {
-   char buf[64];
+   char buf[64], buf2[32], buf3[32];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%s @ %s)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
-   (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+   efi_size_format(buf3, sizeof(buf3),
+   md->num_pages << EFI_PAGE_SHIFT),
+   efi_size_format(buf2, sizeof(buf2), md->phys_addr));
}
 #endif  /*  EFI_DEBUG  */
 }
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 4/4] x86/efi: print size and base in binary units in efi_print_memmap

2015-12-17 Thread Robert Elliott
Print the base address for each range in decimal alongside the size.
Use a "(size @ base)" format similar to the fake_memmap kernel parameter.

Print the range and base in the best-fit B, KiB, MiB, etc. units rather
than always MiB.  This avoids rounding, which can be misleading.

Use proper IEC binary units (KiB, MiB, etc.) rather than misuse SI
decimal units (KB, MB, etc.).

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16 GiB @ 34 GiB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 arch/x86/platform/efi/efi.c | 27 ---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 635a955..030ba91 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -222,6 +222,25 @@ int __init efi_memblock_x86_reserve_range(void)
return 0;
 }
 
+char * __init efi_size_format(char *buf, size_t size, u64 bytes)
+{
+   if (!bytes || (bytes & 0x3ff))
+   snprintf(buf, size, "%llu B", bytes);
+   else if (bytes & 0xf)
+   snprintf(buf, size, "%llu KiB", bytes >> 10);
+   else if (bytes & 0x3fff)
+   snprintf(buf, size, "%llu MiB", bytes >> 20);
+   else if (bytes & 0xff)
+   snprintf(buf, size, "%llu GiB", bytes >> 30);
+   else if (bytes & 0x3)
+   snprintf(buf, size, "%llu TiB", bytes >> 40);
+   else if (bytes & 0xfff)
+   snprintf(buf, size, "%llu PiB", bytes >> 50);
+   else
+   snprintf(buf, size, "%llu EiB", bytes >> 60);
+   return buf;
+}
+
 void __init efi_print_memmap(void)
 {
 #ifdef EFI_DEBUG
@@ -232,14 +251,16 @@ void __init efi_print_memmap(void)
for (p = memmap.map, i = 0;
 p < memmap.map_end;
 p += memmap.desc_size, i++) {
-   char buf[64];
+   char buf[64], buf2[32], buf3[32];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%s @ %s)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
-   (md->num_pages >> (20 - EFI_PAGE_SHIFT)));
+   efi_size_format(buf3, sizeof(buf3),
+   md->num_pages << EFI_PAGE_SHIFT),
+   efi_size_format(buf2, sizeof(buf2), md->phys_addr));
}
 #endif  /*  EFI_DEBUG  */
 }
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/4] efi: add NV memory attribute

2015-12-17 Thread Robert Elliott
Add the NV memory attribute introduced in UEFI 2.5 and add a column
for it in the types and attributes string used when printing the UEFI
memory map.

old:
efi: mem61: [type=14|   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
efi: mem61: [type=14|   |  |NV|  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 drivers/firmware/efi/efi.c | 5 -
 include/linux/efi.h| 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 027ca21..4dd5464 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -611,13 +611,16 @@ char * __init efi_md_typeattr_format(char *buf, size_t 
size,
if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
 EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO |
 EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP |
+EFI_MEMORY_NV |
 EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE))
snprintf(pos, size, "|attr=0x%016llx]",
 (unsigned long long)attr);
else
-   snprintf(pos, size, 
"|%3s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+   snprintf(pos, size,
+"|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
 attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "",
+attr & EFI_MEMORY_NV  ? "NV"  : "",
 attr & EFI_MEMORY_XP  ? "XP"  : "",
 attr & EFI_MEMORY_RP  ? "RP"  : "",
 attr & EFI_MEMORY_WP  ? "WP"  : "",
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 569b5a8..9ce9e9e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -97,6 +97,7 @@ typedef   struct {
 #define EFI_MEMORY_WP  ((u64)0x1000ULL)/* 
write-protect */
 #define EFI_MEMORY_RP  ((u64)0x2000ULL)/* read-protect 
*/
 #define EFI_MEMORY_XP  ((u64)0x4000ULL)/* 
execute-protect */
+#define EFI_MEMORY_NV  ((u64)0x8000ULL)/* non-volatile 
*/
 #define EFI_MEMORY_MORE_RELIABLE \
((u64)0x0001ULL)/* higher 
reliability */
 #define EFI_MEMORY_RO  ((u64)0x0002ULL)/* read-only */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/4] x86/efi: show actual ending addresses in efi_print_memmap

2015-12-17 Thread Robert Elliott
Adjust efi_print_memmap to print the real end address of each
range, not 1 byte beyond. This matches other prints like those for
SRAT and nosave memory.

Change the closing ) to ] to match the opening [.

old:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c8000) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff] (16384MB)

Example other address range prints:
SRAT: Node 1 PXM 1 [mem 0x48000-0x87fff]
PM: Registered nosave memory: [mem 0x88000-0xc7fff]

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 arch/x86/platform/efi/efi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad28540..635a955 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -235,10 +235,10 @@ void __init efi_print_memmap(void)
char buf[64];
 
md = p;
-   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx) (%lluMB)\n",
+   pr_info("mem%02u: %s range=[0x%016llx-0x%016llx] (%lluMB)\n",
i, efi_md_typeattr_format(buf, sizeof(buf), md),
md->phys_addr,
-   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
+   md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1,
(md->num_pages >> (20 - EFI_PAGE_SHIFT)));
}
 #endif  /*  EFI_DEBUG  */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[no subject]

2015-12-17 Thread Robert Elliott
Date: Thu, 17 Dec 2015 17:40:55 -0600
Subject: [PATCH 0/4] x86/efi: support persistent memory in efi_print_memmap

This series adds support for persistent memory type and NV attribute 
in the efi_print_memmap function, which is only used if EFI_DEBUG is true
(which is the case for x86).

Robert Elliott (4):
  x86/efi: show actual ending addresses in efi_print_memmap
  efi: add NV memory attribute
  efi: add Persistent Memory type name
  x86/efi: print size and base in binary units in efi_print_memmap

 arch/x86/platform/efi/efi.c | 29 +
 drivers/firmware/efi/efi.c  |  8 ++--
 include/linux/efi.h |  1 +
 3 files changed, 32 insertions(+), 6 deletions(-)

-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/4] efi: add Persistent Memory type name

2015-12-17 Thread Robert Elliott
Add the "Persistent Memory" string for type 14 introduced in
UEFI 2.5.  This is used when printing the UEFI memory map.

old:
efi: mem61: [type=14|   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

new:
efi: mem61: [Persistent Memory  |   |  |  |  |  |  |   |WB|WT|WC|UC] 
range=[0x00088000-0x000c7fff) (16384MB)

Signed-off-by: Robert Elliott <elli...@hpe.com>
---
 drivers/firmware/efi/efi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 4dd5464..0b16e88 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -584,7 +584,8 @@ static __initdata char memory_type_name[][20] = {
"ACPI Memory NVS",
"Memory Mapped I/O",
"MMIO Port Space",
-   "PAL Code"
+   "PAL Code",
+   "Persistent Memory"
 };
 
 char * __init efi_md_typeattr_format(char *buf, size_t size,
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] block: return error if too many reserved tags are requested

2014-09-09 Thread Robert Elliott
From: Robert Elliott 

Make blk_mq_alloc_tag_set return an error if set->reserved_tags
is greater than BLK_MQ_MAX_DEPTH minus the minimum number of
tags, since:
* set->queue_depth is truncated to that value
* set->reserved_tags needs to be less than set->queue_depth

Signed-off-by: Robert Elliott 
---
 block/blk-mq.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c49fe00..dc2970d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1936,16 +1936,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
return -EINVAL;
if (!set->queue_depth)
return -EINVAL;
+   if (set->reserved_tags > BLK_MQ_MAX_DEPTH - BLK_MQ_TAG_MIN)
+   return -EINVAL;
if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
return -EINVAL;
 
if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
return -EINVAL;
 
-   if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
+   if (set->queue_depth > BLK_MQ_MAX_DEPTH - set->reserved_tags) {
+   set->queue_depth = BLK_MQ_MAX_DEPTH - set->reserved_tags;
pr_info("blk-mq: reduced tag depth to %u\n",
-   BLK_MQ_MAX_DEPTH);
-   set->queue_depth = BLK_MQ_MAX_DEPTH;
+   set->queue_depth);
}
 
set->tags = kmalloc_node(set->nr_hw_queues *

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] block: rq_affinity default and reserved tag limits

2014-09-09 Thread Robert Elliott
The following series changes the default blk-mq rq_affinity
to handling completions on the submitting CPU, and handles
requests for too many reserved tags.

---

Robert Elliott (2):
  block: default to rq_affinity=2 for blk-mq
  block: return error if too many reserved tags are requested


 block/blk-mq.c |8 +---
 include/linux/blkdev.h |3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

-- 
Robert Elliott, HP Server Storage
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] block: default to rq_affinity=2 for blk-mq

2014-09-09 Thread Robert Elliott
From: Robert Elliott 

One change introduced by blk-mq is that it does all
the completion work in hard irq context rather than
soft irq context.

On a 6 core system, if all interrupts are routed to
one CPU, then you can easily run into this:
* 5 CPUs submitting IOs
* 1 CPU spending 100% of its time in hard irq context
processing IO completions, not able to submit anything
itself

Example with CPU5 receiving all interrupts:
   CPU usage:   CPU0   CPU1   CPU2   CPU3   CPU4   CPU5
%usr:   0.00   3.03   1.01   2.02   2.00   0.00
%sys:  14.58  75.76  14.14   4.04  78.00   0.00
%irq:   0.00   0.00   0.00   1.01   0.00 100.00
   %soft:   0.00   0.00   0.00   0.00   0.00   0.00
%iowait idle:  85.42  21.21  84.85  92.93  20.00   0.00
   %idle:   0.00   0.00   0.00   0.00   0.00   0.00

When the submitting CPUs are forced to process their own
completion interrupts, this steals time from new
submissions and self-throttles them.

Without that, there is no direct feedback to the
submitters to slow down.  The only feedback is:
* reaching max queue depth
* lots of timeouts, resulting in aborts, resets, soft
  lockups and self-detected stalls on CPU5, bogus
  clocksource tsc unstable reports, network
  drop-offs, etc.

The SCSI LLD can set affinity_hint for each of its
interrupts to request that a program like irqbalance
route the interrupts back to the submitting CPU.
The latest version of irqbalance ignores those hints,
though, instead offering an option to run a policy
script that could honor them. Otherwise, it balances
them based on its own algorithms. So, we cannot rely
on this.

Hardware might perform interrupt coalescing to help,
but it cannot help 1 CPU keep up with the work
generated by many other CPUs.

rq_affinity=2 helps by pushing most of the block layer
and SCSI midlayer completion work back to the submitting
CPU (via an IPI).

Change the default rq_affinity=2 under blk-mq
so there's at least some feedback to slow down the
submitters.

Signed-off-by: Robert Elliott 
---
 include/linux/blkdev.h |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b465..9f41a02 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -522,7 +522,8 @@ struct request_queue {
 (1 << QUEUE_FLAG_ADD_RANDOM))
 
 #define QUEUE_FLAG_MQ_DEFAULT  ((1 << QUEUE_FLAG_IO_STAT) |\
-(1 << QUEUE_FLAG_SAME_COMP))
+(1 << QUEUE_FLAG_SAME_COMP)|   \
+(1 << QUEUE_FLAG_SAME_FORCE))
 
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] block: rq_affinity default and reserved tag limits

2014-09-09 Thread Robert Elliott
The following series changes the default blk-mq rq_affinity
to handling completions on the submitting CPU, and handles
requests for too many reserved tags.

---

Robert Elliott (2):
  block: default to rq_affinity=2 for blk-mq
  block: return error if too many reserved tags are requested


 block/blk-mq.c |8 +---
 include/linux/blkdev.h |3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

-- 
Robert Elliott, HP Server Storage
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] block: default to rq_affinity=2 for blk-mq

2014-09-09 Thread Robert Elliott
From: Robert Elliott elli...@hp.com

One change introduced by blk-mq is that it does all
the completion work in hard irq context rather than
soft irq context.

On a 6 core system, if all interrupts are routed to
one CPU, then you can easily run into this:
* 5 CPUs submitting IOs
* 1 CPU spending 100% of its time in hard irq context
processing IO completions, not able to submit anything
itself

Example with CPU5 receiving all interrupts:
   CPU usage:   CPU0   CPU1   CPU2   CPU3   CPU4   CPU5
%usr:   0.00   3.03   1.01   2.02   2.00   0.00
%sys:  14.58  75.76  14.14   4.04  78.00   0.00
%irq:   0.00   0.00   0.00   1.01   0.00 100.00
   %soft:   0.00   0.00   0.00   0.00   0.00   0.00
%iowait idle:  85.42  21.21  84.85  92.93  20.00   0.00
   %idle:   0.00   0.00   0.00   0.00   0.00   0.00

When the submitting CPUs are forced to process their own
completion interrupts, this steals time from new
submissions and self-throttles them.

Without that, there is no direct feedback to the
submitters to slow down.  The only feedback is:
* reaching max queue depth
* lots of timeouts, resulting in aborts, resets, soft
  lockups and self-detected stalls on CPU5, bogus
  clocksource tsc unstable reports, network
  drop-offs, etc.

The SCSI LLD can set affinity_hint for each of its
interrupts to request that a program like irqbalance
route the interrupts back to the submitting CPU.
The latest version of irqbalance ignores those hints,
though, instead offering an option to run a policy
script that could honor them. Otherwise, it balances
them based on its own algorithms. So, we cannot rely
on this.

Hardware might perform interrupt coalescing to help,
but it cannot help 1 CPU keep up with the work
generated by many other CPUs.

rq_affinity=2 helps by pushing most of the block layer
and SCSI midlayer completion work back to the submitting
CPU (via an IPI).

Change the default rq_affinity=2 under blk-mq
so there's at least some feedback to slow down the
submitters.

Signed-off-by: Robert Elliott elli...@hp.com
---
 include/linux/blkdev.h |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b465..9f41a02 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -522,7 +522,8 @@ struct request_queue {
 (1  QUEUE_FLAG_ADD_RANDOM))
 
 #define QUEUE_FLAG_MQ_DEFAULT  ((1  QUEUE_FLAG_IO_STAT) |\
-(1  QUEUE_FLAG_SAME_COMP))
+(1  QUEUE_FLAG_SAME_COMP)|   \
+(1  QUEUE_FLAG_SAME_FORCE))
 
 static inline void queue_lockdep_assert_held(struct request_queue *q)
 {

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] block: return error if too many reserved tags are requested

2014-09-09 Thread Robert Elliott
From: Robert Elliott elli...@hp.com

Make blk_mq_alloc_tag_set return an error if set-reserved_tags
is greater than BLK_MQ_MAX_DEPTH minus the minimum number of
tags, since:
* set-queue_depth is truncated to that value
* set-reserved_tags needs to be less than set-queue_depth

Signed-off-by: Robert Elliott elli...@hp.com
---
 block/blk-mq.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c49fe00..dc2970d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1936,16 +1936,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
return -EINVAL;
if (!set-queue_depth)
return -EINVAL;
+   if (set-reserved_tags  BLK_MQ_MAX_DEPTH - BLK_MQ_TAG_MIN)
+   return -EINVAL;
if (set-queue_depth  set-reserved_tags + BLK_MQ_TAG_MIN)
return -EINVAL;
 
if (!set-nr_hw_queues || !set-ops-queue_rq || !set-ops-map_queue)
return -EINVAL;
 
-   if (set-queue_depth  BLK_MQ_MAX_DEPTH) {
+   if (set-queue_depth  BLK_MQ_MAX_DEPTH - set-reserved_tags) {
+   set-queue_depth = BLK_MQ_MAX_DEPTH - set-reserved_tags;
pr_info(blk-mq: reduced tag depth to %u\n,
-   BLK_MQ_MAX_DEPTH);
-   set-queue_depth = BLK_MQ_MAX_DEPTH;
+   set-queue_depth);
}
 
set-tags = kmalloc_node(set-nr_hw_queues *

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] fs: merge I/O error prints into one line

2014-08-27 Thread Robert Elliott
The following series merges I/O error prints into one
line and makes rate limited messages clearer.

---

Robert Elliott (2):
  fs: merge I/O error prints into one line
  fs: clarify rate limit suppressed buffer I/O errors


 fs/buffer.c |   38 +-
 1 files changed, 9 insertions(+), 29 deletions(-)

-- 
Rob Elliott, HP Server Storage  elli...@hp.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] fs: clarify rate limit suppressed buffer I/O errors

2014-08-27 Thread Robert Elliott
When quiet_error applies rate limiting to buffer_io_error calls, what the
they apply to is unclear because the name is so generic, particularly
if the messages are interleaved with others:

[ 1936.063572] quiet_error: 664293 callbacks suppressed
[ 1936.065297] Buffer I/O error on dev sdr, logical block 257429952, lost async 
page write
[ 1936.067814] Buffer I/O error on dev sdr, logical block 257429953, lost async 
page write

Also, the function uses printk_ratelimit(), although printk.h includes a
comment advising "Please don't use... Instead use printk_ratelimited()."

Change buffer_io_error to check the BH_Quiet bit itself, drop the
printk_ratelimit call, and print using printk_ratelimited.

This makes the messages look like:

[  387.208839] buffer_io_error: 676394 callbacks suppressed
[  387.210693] Buffer I/O error on dev sdr, logical block 211291776, lost async 
page write
[  387.213432] Buffer I/O error on dev sdr, logical block 211291777, lost async 
page write

Signed-off-by: Robert Elliott 
Reviewed-by: Webb Scales 
---
 fs/buffer.c |   23 +++
 1 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index c6cb0ee..3710a68 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -128,19 +128,13 @@ __clear_page_buffers(struct page *page)
page_cache_release(page);
 }
 
-
-static int quiet_error(struct buffer_head *bh)
-{
-   if (!test_bit(BH_Quiet, >b_state) && printk_ratelimit())
-   return 0;
-   return 1;
-}
-
-
 static void buffer_io_error(struct buffer_head *bh, char *msg)
 {
char b[BDEVNAME_SIZE];
-   printk(KERN_ERR "Buffer I/O error on dev %s, logical block %llu%s\n",
+
+   if (!test_bit(BH_Quiet, >b_state))
+   printk_ratelimited(KERN_ERR
+   "Buffer I/O error on dev %s, logical block %llu%s\n",
bdevname(bh->b_bdev, b),
(unsigned long long)bh->b_blocknr, msg);
 }
@@ -180,8 +174,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int 
uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh))
-   buffer_io_error(bh, ", lost sync page write");
+   buffer_io_error(bh, ", lost sync page write");
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
}
@@ -298,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, 
int uptodate)
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
-   if (!quiet_error(bh))
-   buffer_io_error(bh, ", async page read");
+   buffer_io_error(bh, ", async page read");
SetPageError(page);
}
 
@@ -358,8 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int 
uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh))
-   buffer_io_error(bh, ", lost async page write");
+   buffer_io_error(bh, ", lost async page write");
set_bit(AS_EIO, >mapping->flags);
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] block: make blk_update_request print prefix match ratelimited prefix

2014-08-27 Thread Robert Elliott
In blk_update_request, change the printk_ratelimited
prefix from end_request to blk_update_request so it
matches the name printed if rate limiting occurs.

Old:
[10234.933106] blk_update_request: 174 callbacks suppressed
[10234.934940] end_request: critical target error, dev sdr, sector 16
[10234.949788] end_request: critical target error, dev sdr, sector 16

New:
[16863.445173] blk_update_request: 398 callbacks suppressed
[16863.447029] blk_update_request: critical target error, dev sdr, sector
1442066176
[16863.449383] blk_update_request: critical target error, dev sdr, sector
802802888
[16863.451680] blk_update_request: critical target error, dev sdr, sector
1609535456

Signed-off-by: Robert Elliott 
Reviewed-by: Webb Scales 
---
 block/blk-core.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index c359d72..9c5a5b9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2450,8 +2450,8 @@ bool blk_update_request(struct request *req, int error, 
unsigned int nr_bytes)
error_type = "I/O";
break;
}
-   printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, 
sector %llu\n",
-  error_type, req->rq_disk ?
+   printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector 
%llu\n",
+  __func__, error_type, req->rq_disk ?
   req->rq_disk->disk_name : "?",
   (unsigned long long)blk_rq_pos(req));
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] block: improve I/O error print consistency

2014-08-27 Thread Robert Elliott
The following series improves some of the ratelimited
I/O error prints so what is being ratelimited is clearer.

---

Robert Elliott (2):
  block: make blk_update_request print prefix match ratelimited prefix
  block: include func name in __get_request prints


 block/blk-core.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

-- 
Rob Elliott, HP Server Storage  elli...@hp.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] fs: merge I/O error prints into one line

2014-08-27 Thread Robert Elliott
buffer.c uses two printk calls to print these messages:
[67353.422338] Buffer I/O error on device sdr, logical block 212868488
[67353.422338] lost page write due to I/O error on sdr

In a busy system, they may be interleaved with other prints,
losing the context for the second message.  Merge them into
one line with one printk call so the prints are atomic.

Also, differentiate between async page writes, sync page writes, and
async page reads.

Also, shorten "device" to "dev" to match the block layer prints:
[67353.467906] blk_update_request: critical target error, dev sdr, sector
1707107328

Also, use %llu rather than %Lu.

Resulting prints look like:
[ 1356.437006] blk_update_request: critical target error, dev sdr, sector 
1719693992
[ 1361.383522] quiet_error: 659876 callbacks suppressed
[ 1361.385816] Buffer I/O error on dev sdr, logical block 256902912, lost async 
page write
[ 1361.385819] Buffer I/O error on dev sdr, logical block 256903644, lost async 
page write

Signed-off-by: Robert Elliott 
Reviewed-by: Webb Scales 
---
 fs/buffer.c |   27 ---
 1 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111..c6cb0ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -137,12 +137,12 @@ static int quiet_error(struct buffer_head *bh)
 }
 
 
-static void buffer_io_error(struct buffer_head *bh)
+static void buffer_io_error(struct buffer_head *bh, char *msg)
 {
char b[BDEVNAME_SIZE];
-   printk(KERN_ERR "Buffer I/O error on device %s, logical block %Lu\n",
+   printk(KERN_ERR "Buffer I/O error on dev %s, logical block %llu%s\n",
bdevname(bh->b_bdev, b),
-   (unsigned long long)bh->b_blocknr);
+   (unsigned long long)bh->b_blocknr, msg);
 }
 
 /*
@@ -177,17 +177,11 @@ EXPORT_SYMBOL(end_buffer_read_sync);
 
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 {
-   char b[BDEVNAME_SIZE];
-
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh)) {
-   buffer_io_error(bh);
-   printk(KERN_WARNING "lost page write due to "
-   "I/O error on %s\n",
-  bdevname(bh->b_bdev, b));
-   }
+   if (!quiet_error(bh))
+   buffer_io_error(bh, ", lost sync page write");
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
}
@@ -305,7 +299,7 @@ static void end_buffer_async_read(struct buffer_head *bh, 
int uptodate)
} else {
clear_buffer_uptodate(bh);
if (!quiet_error(bh))
-   buffer_io_error(bh);
+   buffer_io_error(bh, ", async page read");
SetPageError(page);
}
 
@@ -353,7 +347,6 @@ still_busy:
  */
 void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
-   char b[BDEVNAME_SIZE];
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
@@ -365,12 +358,8 @@ void end_buffer_async_write(struct buffer_head *bh, int 
uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh)) {
-   buffer_io_error(bh);
-   printk(KERN_WARNING "lost page write due to "
-   "I/O error on %s\n",
-  bdevname(bh->b_bdev, b));
-   }
+   if (!quiet_error(bh))
+   buffer_io_error(bh, ", lost async page write");
set_bit(AS_EIO, >mapping->flags);
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] block: include func name in __get_request prints

2014-08-27 Thread Robert Elliott
In __get_request calls to printk_ratelimited, include the function name so
the callbacks suppressed message matches the messages that are printed,
and add "dev" before the device name so it matches other block layer
messages.

Signed-off-by: Robert Elliott 
Reviewed-by: Webb Scales 
---
 block/blk-core.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 9c5a5b9..204cbd3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1065,8 +1065,8 @@ fail_elvpriv:
 * shouldn't stall IO.  Treat this request as !elvpriv.  This will
 * disturb iosched and blkcg but weird is bettern than dead.
 */
-   printk_ratelimited(KERN_WARNING "%s: request aux data allocation 
failed, iosched may be disturbed\n",
-  dev_name(q->backing_dev_info.dev));
+   printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data 
allocation failed, iosched may be disturbed\n",
+  __func__, dev_name(q->backing_dev_info.dev));
 
rq->cmd_flags &= ~REQ_ELVPRIV;
rq->elv.icq = NULL;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] fs: merge I/O error prints into one line

2014-08-27 Thread Robert Elliott
buffer.c uses two printk calls to print these messages:
[67353.422338] Buffer I/O error on device sdr, logical block 212868488
[67353.422338] lost page write due to I/O error on sdr

In a busy system, they may be interleaved with other prints,
losing the context for the second message.  Merge them into
one line with one printk call so the prints are atomic.

Also, differentiate between async page writes, sync page writes, and
async page reads.

Also, shorten device to dev to match the block layer prints:
[67353.467906] blk_update_request: critical target error, dev sdr, sector
1707107328

Also, use %llu rather than %Lu.

Resulting prints look like:
[ 1356.437006] blk_update_request: critical target error, dev sdr, sector 
1719693992
[ 1361.383522] quiet_error: 659876 callbacks suppressed
[ 1361.385816] Buffer I/O error on dev sdr, logical block 256902912, lost async 
page write
[ 1361.385819] Buffer I/O error on dev sdr, logical block 256903644, lost async 
page write

Signed-off-by: Robert Elliott elli...@hp.com
Reviewed-by: Webb Scales web...@hp.com
---
 fs/buffer.c |   27 ---
 1 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111..c6cb0ee 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -137,12 +137,12 @@ static int quiet_error(struct buffer_head *bh)
 }
 
 
-static void buffer_io_error(struct buffer_head *bh)
+static void buffer_io_error(struct buffer_head *bh, char *msg)
 {
char b[BDEVNAME_SIZE];
-   printk(KERN_ERR Buffer I/O error on device %s, logical block %Lu\n,
+   printk(KERN_ERR Buffer I/O error on dev %s, logical block %llu%s\n,
bdevname(bh-b_bdev, b),
-   (unsigned long long)bh-b_blocknr);
+   (unsigned long long)bh-b_blocknr, msg);
 }
 
 /*
@@ -177,17 +177,11 @@ EXPORT_SYMBOL(end_buffer_read_sync);
 
 void end_buffer_write_sync(struct buffer_head *bh, int uptodate)
 {
-   char b[BDEVNAME_SIZE];
-
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh)) {
-   buffer_io_error(bh);
-   printk(KERN_WARNING lost page write due to 
-   I/O error on %s\n,
-  bdevname(bh-b_bdev, b));
-   }
+   if (!quiet_error(bh))
+   buffer_io_error(bh, , lost sync page write);
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
}
@@ -305,7 +299,7 @@ static void end_buffer_async_read(struct buffer_head *bh, 
int uptodate)
} else {
clear_buffer_uptodate(bh);
if (!quiet_error(bh))
-   buffer_io_error(bh);
+   buffer_io_error(bh, , async page read);
SetPageError(page);
}
 
@@ -353,7 +347,6 @@ still_busy:
  */
 void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 {
-   char b[BDEVNAME_SIZE];
unsigned long flags;
struct buffer_head *first;
struct buffer_head *tmp;
@@ -365,12 +358,8 @@ void end_buffer_async_write(struct buffer_head *bh, int 
uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh)) {
-   buffer_io_error(bh);
-   printk(KERN_WARNING lost page write due to 
-   I/O error on %s\n,
-  bdevname(bh-b_bdev, b));
-   }
+   if (!quiet_error(bh))
+   buffer_io_error(bh, , lost async page write);
set_bit(AS_EIO, page-mapping-flags);
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] block: include func name in __get_request prints

2014-08-27 Thread Robert Elliott
In __get_request calls to printk_ratelimited, include the function name so
the callbacks suppressed message matches the messages that are printed,
and add dev before the device name so it matches other block layer
messages.

Signed-off-by: Robert Elliott elli...@hp.com
Reviewed-by: Webb Scales web...@hp.com
---
 block/blk-core.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 9c5a5b9..204cbd3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1065,8 +1065,8 @@ fail_elvpriv:
 * shouldn't stall IO.  Treat this request as !elvpriv.  This will
 * disturb iosched and blkcg but weird is bettern than dead.
 */
-   printk_ratelimited(KERN_WARNING %s: request aux data allocation 
failed, iosched may be disturbed\n,
-  dev_name(q-backing_dev_info.dev));
+   printk_ratelimited(KERN_WARNING %s: dev %s: request aux data 
allocation failed, iosched may be disturbed\n,
+  __func__, dev_name(q-backing_dev_info.dev));
 
rq-cmd_flags = ~REQ_ELVPRIV;
rq-elv.icq = NULL;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] block: make blk_update_request print prefix match ratelimited prefix

2014-08-27 Thread Robert Elliott
In blk_update_request, change the printk_ratelimited
prefix from end_request to blk_update_request so it
matches the name printed if rate limiting occurs.

Old:
[10234.933106] blk_update_request: 174 callbacks suppressed
[10234.934940] end_request: critical target error, dev sdr, sector 16
[10234.949788] end_request: critical target error, dev sdr, sector 16

New:
[16863.445173] blk_update_request: 398 callbacks suppressed
[16863.447029] blk_update_request: critical target error, dev sdr, sector
1442066176
[16863.449383] blk_update_request: critical target error, dev sdr, sector
802802888
[16863.451680] blk_update_request: critical target error, dev sdr, sector
1609535456

Signed-off-by: Robert Elliott elli...@hp.com
Reviewed-by: Webb Scales web...@hp.com
---
 block/blk-core.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index c359d72..9c5a5b9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2450,8 +2450,8 @@ bool blk_update_request(struct request *req, int error, 
unsigned int nr_bytes)
error_type = I/O;
break;
}
-   printk_ratelimited(KERN_ERR end_request: %s error, dev %s, 
sector %llu\n,
-  error_type, req-rq_disk ?
+   printk_ratelimited(KERN_ERR %s: %s error, dev %s, sector 
%llu\n,
+  __func__, error_type, req-rq_disk ?
   req-rq_disk-disk_name : ?,
   (unsigned long long)blk_rq_pos(req));
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] block: improve I/O error print consistency

2014-08-27 Thread Robert Elliott
The following series improves some of the ratelimited
I/O error prints so what is being ratelimited is clearer.

---

Robert Elliott (2):
  block: make blk_update_request print prefix match ratelimited prefix
  block: include func name in __get_request prints


 block/blk-core.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

-- 
Rob Elliott, HP Server Storage  elli...@hp.com
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] fs: merge I/O error prints into one line

2014-08-27 Thread Robert Elliott
The following series merges I/O error prints into one
line and makes rate limited messages clearer.

---

Robert Elliott (2):
  fs: merge I/O error prints into one line
  fs: clarify rate limit suppressed buffer I/O errors


 fs/buffer.c |   38 +-
 1 files changed, 9 insertions(+), 29 deletions(-)

-- 
Rob Elliott, HP Server Storage  elli...@hp.com
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] fs: clarify rate limit suppressed buffer I/O errors

2014-08-27 Thread Robert Elliott
When quiet_error applies rate limiting to buffer_io_error calls, what the
they apply to is unclear because the name is so generic, particularly
if the messages are interleaved with others:

[ 1936.063572] quiet_error: 664293 callbacks suppressed
[ 1936.065297] Buffer I/O error on dev sdr, logical block 257429952, lost async 
page write
[ 1936.067814] Buffer I/O error on dev sdr, logical block 257429953, lost async 
page write

Also, the function uses printk_ratelimit(), although printk.h includes a
comment advising Please don't use... Instead use printk_ratelimited().

Change buffer_io_error to check the BH_Quiet bit itself, drop the
printk_ratelimit call, and print using printk_ratelimited.

This makes the messages look like:

[  387.208839] buffer_io_error: 676394 callbacks suppressed
[  387.210693] Buffer I/O error on dev sdr, logical block 211291776, lost async 
page write
[  387.213432] Buffer I/O error on dev sdr, logical block 211291777, lost async 
page write

Signed-off-by: Robert Elliott elli...@hp.com
Reviewed-by: Webb Scales web...@hp.com
---
 fs/buffer.c |   23 +++
 1 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/fs/buffer.c b/fs/buffer.c
index c6cb0ee..3710a68 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -128,19 +128,13 @@ __clear_page_buffers(struct page *page)
page_cache_release(page);
 }
 
-
-static int quiet_error(struct buffer_head *bh)
-{
-   if (!test_bit(BH_Quiet, bh-b_state)  printk_ratelimit())
-   return 0;
-   return 1;
-}
-
-
 static void buffer_io_error(struct buffer_head *bh, char *msg)
 {
char b[BDEVNAME_SIZE];
-   printk(KERN_ERR Buffer I/O error on dev %s, logical block %llu%s\n,
+
+   if (!test_bit(BH_Quiet, bh-b_state))
+   printk_ratelimited(KERN_ERR
+   Buffer I/O error on dev %s, logical block %llu%s\n,
bdevname(bh-b_bdev, b),
(unsigned long long)bh-b_blocknr, msg);
 }
@@ -180,8 +174,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int 
uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh))
-   buffer_io_error(bh, , lost sync page write);
+   buffer_io_error(bh, , lost sync page write);
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);
}
@@ -298,8 +291,7 @@ static void end_buffer_async_read(struct buffer_head *bh, 
int uptodate)
set_buffer_uptodate(bh);
} else {
clear_buffer_uptodate(bh);
-   if (!quiet_error(bh))
-   buffer_io_error(bh, , async page read);
+   buffer_io_error(bh, , async page read);
SetPageError(page);
}
 
@@ -358,8 +350,7 @@ void end_buffer_async_write(struct buffer_head *bh, int 
uptodate)
if (uptodate) {
set_buffer_uptodate(bh);
} else {
-   if (!quiet_error(bh))
-   buffer_io_error(bh, , lost async page write);
+   buffer_io_error(bh, , lost async page write);
set_bit(AS_EIO, page-mapping-flags);
set_buffer_write_io_error(bh);
clear_buffer_uptodate(bh);

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] mpt3sas, mpt2sas: fix scsi_add_host error handling problems in _scsih_probe

2014-07-17 Thread Robert Elliott
In _scsih_probe, propagate the return value from scsi_add_host.
In mpt3sas, avoid calling list_del twice if that returns an
error, which causes list_del corruption warnings if an error
is returned.

Tested with blk-mq and scsi-mq patches to properly cleanup
from and propagate blk_mq_init_rq_map errors.

Signed-off-by: Robert Elliott 
---
 drivers/scsi/mpt2sas/mpt2sas_scsih.c |8 ++--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c |9 ++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c 
b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 4f8a45f..7110e75 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -8123,6 +8123,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
 {
struct MPT2SAS_ADAPTER *ioc;
struct Scsi_Host *shost;
+   int rv;
 
shost = scsi_host_alloc(_driver_template,
sizeof(struct MPT2SAS_ADAPTER));
@@ -8218,6 +8219,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if (!ioc->firmware_event_thread) {
printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_thread_fail;
}
 
@@ -8225,6 +8227,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if ((mpt2sas_base_attach(ioc))) {
printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_attach_fail;
}
 
@@ -8242,7 +8245,8 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
} else
ioc->hide_drives = 0;
 
-   if ((scsi_add_host(shost, >dev))) {
+   rv = scsi_add_host(shost, >dev);
+   if (rv) {
printk(MPT2SAS_ERR_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
goto out_add_shost_fail;
@@ -8259,7 +8263,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
  out_thread_fail:
list_del(>list);
scsi_host_put(shost);
-   return -ENODEV;
+   return rv;
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c 
b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index d2e95ff..07454f0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -7727,6 +7727,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
 {
struct MPT3SAS_ADAPTER *ioc;
struct Scsi_Host *shost;
+   int rv;
 
shost = scsi_host_alloc(_driver_template,
sizeof(struct MPT3SAS_ADAPTER));
@@ -7819,6 +7820,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if (!ioc->firmware_event_thread) {
pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_thread_fail;
}
 
@@ -7826,13 +7828,14 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if ((mpt3sas_base_attach(ioc))) {
pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_attach_fail;
}
 
-   if ((scsi_add_host(shost, >dev))) {
+   rv = scsi_add_host(shost, >dev);
+   if (rv) {
pr_err(MPT3SAS_FMT "failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
-   list_del(>list);
goto out_add_shost_fail;
}
 
@@ -7846,7 +7849,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
  out_thread_fail:
list_del(>list);
scsi_host_put(shost);
-   return -ENODEV;
+   return rv;
 }
 
 #ifdef CONFIG_PM

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] blk-mq: pass along blk_mq_alloc_tag_set return values

2014-07-17 Thread Robert Elliott
Two of the blk-mq based drivers do not pass back the return value
from blk_mq_alloc_tag_set, instead just returning -ENOMEM.

blk_mq_alloc_tag_set returns -EINVAL if the number of queues or
queue depth is bad.  -ENOMEM implies that retrying after freeing some
memory might be more successful, but that won't ever change
in the -EINVAL cases.

Change the null_blk and mtip32xx drivers to pass along
the return value.

Signed-off-by: Robert Elliott 
---
 drivers/block/mtip32xx/mtip32xx.c |1 -
 drivers/block/null_blk.c  |   29 +
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c 
b/drivers/block/mtip32xx/mtip32xx.c
index 295f3af..af72232 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3918,7 +3918,6 @@ skip_create_disk:
if (rv) {
dev_err(>pdev->dev,
"Unable to allocate request queue\n");
-   rv = -ENOMEM;
goto block_queue_alloc_init_error;
}
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a3b042c..00d469c 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -462,17 +462,21 @@ static int null_add_dev(void)
struct gendisk *disk;
struct nullb *nullb;
sector_t size;
+   int rv;
 
nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
-   if (!nullb)
+   if (!nullb) {
+   rv = -ENOMEM;
goto out;
+   }
 
spin_lock_init(>lock);
 
if (queue_mode == NULL_Q_MQ && use_per_node_hctx)
submit_queues = nr_online_nodes;
 
-   if (setup_queues(nullb))
+   rv = setup_queues(nullb);
+   if (rv)
goto out_free_nullb;
 
if (queue_mode == NULL_Q_MQ) {
@@ -484,22 +488,29 @@ static int null_add_dev(void)
nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
nullb->tag_set.driver_data = nullb;
 
-   if (blk_mq_alloc_tag_set(>tag_set))
+   rv = blk_mq_alloc_tag_set(>tag_set);
+   if (rv)
goto out_cleanup_queues;
 
nullb->q = blk_mq_init_queue(>tag_set);
-   if (!nullb->q)
+   if (!nullb->q) {
+   rv = -ENOMEM;
goto out_cleanup_tags;
+   }
} else if (queue_mode == NULL_Q_BIO) {
nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
-   if (!nullb->q)
+   if (!nullb->q) {
+   rv = -ENOMEM;
goto out_cleanup_queues;
+   }
blk_queue_make_request(nullb->q, null_queue_bio);
init_driver_queues(nullb);
} else {
nullb->q = blk_init_queue_node(null_request_fn, >lock, 
home_node);
-   if (!nullb->q)
+   if (!nullb->q) {
+   rv = -ENOMEM;
goto out_cleanup_queues;
+   }
blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
init_driver_queues(nullb);
@@ -509,8 +520,10 @@ static int null_add_dev(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 
disk = nullb->disk = alloc_disk_node(1, home_node);
-   if (!disk)
+   if (!disk) {
+   rv = -ENOMEM;
goto out_cleanup_blk_queue;
+   }
 
mutex_lock();
list_add_tail(>list, _list);
@@ -544,7 +557,7 @@ out_cleanup_queues:
 out_free_nullb:
kfree(nullb);
 out:
-   return -ENOMEM;
+   return rv;
 }
 
 static int __init null_init(void)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] blk-mq: cleanup after blk_mq_init_rq_map failures

2014-07-17 Thread Robert Elliott
In blk-mq.c blk_mq_alloc_tag_set, if:
set->tags = kmalloc_node()
succeeds, but one of the blk_mq_init_rq_map() calls fails,
goto out_unwind;
needs to free set->tags so the caller is not obligated
to do so.  None of the current callers (null_blk,
virtio_blk, virtio_blk, or the forthcoming scsi-mq)
do so.

set->tags needs to be set to NULL after doing so,
so other tag cleanup logic doesn't try to free
a stale pointer later.  Also set it to NULL
in blk_mq_free_tag_set.

Tested with error injection on the forthcoming
scsi-mq + hpsa combination.

Signed-off-by: Robert Elliott 
---
 block/blk-mq.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ad69ef6..4a24b97 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1996,6 +1996,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 out_unwind:
while (--i >= 0)
blk_mq_free_rq_map(set, set->tags[i], i);
+   kfree(set->tags);
+   set->tags = NULL;
 out:
return -ENOMEM;
 }
@@ -2011,6 +2013,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
}
 
kfree(set->tags);
+   set->tags = NULL;
 }
 EXPORT_SYMBOL(blk_mq_free_tag_set);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] blk-mq: blk_mq_init_rq_map error handling

2014-07-17 Thread Robert Elliott
The following series cleans up blk_mq_init_rq_map failures
properly.

---

Robert Elliott (3):
  blk-mq: cleanup after blk_mq_init_rq_map failures
  blk-mq: pass along blk_mq_alloc_tag_set return values
  mpt3sas,mpt2sas: fix scsi_add_host error handling problems in _scsih_probe


 block/blk-mq.c   |3 +++
 drivers/block/mtip32xx/mtip32xx.c|1 -
 drivers/block/null_blk.c |   29 +
 drivers/scsi/mpt2sas/mpt2sas_scsih.c |8 ++--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c |9 ++---
 5 files changed, 36 insertions(+), 14 deletions(-)

-- 
Rob Elliott, HP Server Storage
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/3] blk-mq: blk_mq_init_rq_map error handling

2014-07-17 Thread Robert Elliott
The following series cleans up blk_mq_init_rq_map failures
properly.

---

Robert Elliott (3):
  blk-mq: cleanup after blk_mq_init_rq_map failures
  blk-mq: pass along blk_mq_alloc_tag_set return values
  mpt3sas,mpt2sas: fix scsi_add_host error handling problems in _scsih_probe


 block/blk-mq.c   |3 +++
 drivers/block/mtip32xx/mtip32xx.c|1 -
 drivers/block/null_blk.c |   29 +
 drivers/scsi/mpt2sas/mpt2sas_scsih.c |8 ++--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c |9 ++---
 5 files changed, 36 insertions(+), 14 deletions(-)

-- 
Rob Elliott, HP Server Storage
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/3] blk-mq: cleanup after blk_mq_init_rq_map failures

2014-07-17 Thread Robert Elliott
In blk-mq.c blk_mq_alloc_tag_set, if:
set-tags = kmalloc_node()
succeeds, but one of the blk_mq_init_rq_map() calls fails,
goto out_unwind;
needs to free set-tags so the caller is not obligated
to do so.  None of the current callers (null_blk,
virtio_blk, virtio_blk, or the forthcoming scsi-mq)
do so.

set-tags needs to be set to NULL after doing so,
so other tag cleanup logic doesn't try to free
a stale pointer later.  Also set it to NULL
in blk_mq_free_tag_set.

Tested with error injection on the forthcoming
scsi-mq + hpsa combination.

Signed-off-by: Robert Elliott elli...@hp.com
---
 block/blk-mq.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ad69ef6..4a24b97 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1996,6 +1996,8 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 out_unwind:
while (--i = 0)
blk_mq_free_rq_map(set, set-tags[i], i);
+   kfree(set-tags);
+   set-tags = NULL;
 out:
return -ENOMEM;
 }
@@ -2011,6 +2013,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
}
 
kfree(set-tags);
+   set-tags = NULL;
 }
 EXPORT_SYMBOL(blk_mq_free_tag_set);
 

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/3] blk-mq: pass along blk_mq_alloc_tag_set return values

2014-07-17 Thread Robert Elliott
Two of the blk-mq based drivers do not pass back the return value
from blk_mq_alloc_tag_set, instead just returning -ENOMEM.

blk_mq_alloc_tag_set returns -EINVAL if the number of queues or
queue depth is bad.  -ENOMEM implies that retrying after freeing some
memory might be more successful, but that won't ever change
in the -EINVAL cases.

Change the null_blk and mtip32xx drivers to pass along
the return value.

Signed-off-by: Robert Elliott elli...@hp.com
---
 drivers/block/mtip32xx/mtip32xx.c |1 -
 drivers/block/null_blk.c  |   29 +
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c 
b/drivers/block/mtip32xx/mtip32xx.c
index 295f3af..af72232 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3918,7 +3918,6 @@ skip_create_disk:
if (rv) {
dev_err(dd-pdev-dev,
Unable to allocate request queue\n);
-   rv = -ENOMEM;
goto block_queue_alloc_init_error;
}
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a3b042c..00d469c 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -462,17 +462,21 @@ static int null_add_dev(void)
struct gendisk *disk;
struct nullb *nullb;
sector_t size;
+   int rv;
 
nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
-   if (!nullb)
+   if (!nullb) {
+   rv = -ENOMEM;
goto out;
+   }
 
spin_lock_init(nullb-lock);
 
if (queue_mode == NULL_Q_MQ  use_per_node_hctx)
submit_queues = nr_online_nodes;
 
-   if (setup_queues(nullb))
+   rv = setup_queues(nullb);
+   if (rv)
goto out_free_nullb;
 
if (queue_mode == NULL_Q_MQ) {
@@ -484,22 +488,29 @@ static int null_add_dev(void)
nullb-tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
nullb-tag_set.driver_data = nullb;
 
-   if (blk_mq_alloc_tag_set(nullb-tag_set))
+   rv = blk_mq_alloc_tag_set(nullb-tag_set);
+   if (rv)
goto out_cleanup_queues;
 
nullb-q = blk_mq_init_queue(nullb-tag_set);
-   if (!nullb-q)
+   if (!nullb-q) {
+   rv = -ENOMEM;
goto out_cleanup_tags;
+   }
} else if (queue_mode == NULL_Q_BIO) {
nullb-q = blk_alloc_queue_node(GFP_KERNEL, home_node);
-   if (!nullb-q)
+   if (!nullb-q) {
+   rv = -ENOMEM;
goto out_cleanup_queues;
+   }
blk_queue_make_request(nullb-q, null_queue_bio);
init_driver_queues(nullb);
} else {
nullb-q = blk_init_queue_node(null_request_fn, nullb-lock, 
home_node);
-   if (!nullb-q)
+   if (!nullb-q) {
+   rv = -ENOMEM;
goto out_cleanup_queues;
+   }
blk_queue_prep_rq(nullb-q, null_rq_prep_fn);
blk_queue_softirq_done(nullb-q, null_softirq_done_fn);
init_driver_queues(nullb);
@@ -509,8 +520,10 @@ static int null_add_dev(void)
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb-q);
 
disk = nullb-disk = alloc_disk_node(1, home_node);
-   if (!disk)
+   if (!disk) {
+   rv = -ENOMEM;
goto out_cleanup_blk_queue;
+   }
 
mutex_lock(lock);
list_add_tail(nullb-list, nullb_list);
@@ -544,7 +557,7 @@ out_cleanup_queues:
 out_free_nullb:
kfree(nullb);
 out:
-   return -ENOMEM;
+   return rv;
 }
 
 static int __init null_init(void)

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 3/3] mpt3sas, mpt2sas: fix scsi_add_host error handling problems in _scsih_probe

2014-07-17 Thread Robert Elliott
In _scsih_probe, propagate the return value from scsi_add_host.
In mpt3sas, avoid calling list_del twice if that returns an
error, which causes list_del corruption warnings if an error
is returned.

Tested with blk-mq and scsi-mq patches to properly cleanup
from and propagate blk_mq_init_rq_map errors.

Signed-off-by: Robert Elliott elli...@hp.com
---
 drivers/scsi/mpt2sas/mpt2sas_scsih.c |8 ++--
 drivers/scsi/mpt3sas/mpt3sas_scsih.c |9 ++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c 
b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 4f8a45f..7110e75 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -8123,6 +8123,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
 {
struct MPT2SAS_ADAPTER *ioc;
struct Scsi_Host *shost;
+   int rv;
 
shost = scsi_host_alloc(scsih_driver_template,
sizeof(struct MPT2SAS_ADAPTER));
@@ -8218,6 +8219,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if (!ioc-firmware_event_thread) {
printk(MPT2SAS_ERR_FMT failure at %s:%d/%s()!\n,
ioc-name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_thread_fail;
}
 
@@ -8225,6 +8227,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if ((mpt2sas_base_attach(ioc))) {
printk(MPT2SAS_ERR_FMT failure at %s:%d/%s()!\n,
ioc-name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_attach_fail;
}
 
@@ -8242,7 +8245,8 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
} else
ioc-hide_drives = 0;
 
-   if ((scsi_add_host(shost, pdev-dev))) {
+   rv = scsi_add_host(shost, pdev-dev);
+   if (rv) {
printk(MPT2SAS_ERR_FMT failure at %s:%d/%s()!\n,
ioc-name, __FILE__, __LINE__, __func__);
goto out_add_shost_fail;
@@ -8259,7 +8263,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
  out_thread_fail:
list_del(ioc-list);
scsi_host_put(shost);
-   return -ENODEV;
+   return rv;
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c 
b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index d2e95ff..07454f0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -7727,6 +7727,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
 {
struct MPT3SAS_ADAPTER *ioc;
struct Scsi_Host *shost;
+   int rv;
 
shost = scsi_host_alloc(scsih_driver_template,
sizeof(struct MPT3SAS_ADAPTER));
@@ -7819,6 +7820,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if (!ioc-firmware_event_thread) {
pr_err(MPT3SAS_FMT failure at %s:%d/%s()!\n,
ioc-name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_thread_fail;
}
 
@@ -7826,13 +7828,14 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
if ((mpt3sas_base_attach(ioc))) {
pr_err(MPT3SAS_FMT failure at %s:%d/%s()!\n,
ioc-name, __FILE__, __LINE__, __func__);
+   rv = -ENODEV;
goto out_attach_fail;
}
 
-   if ((scsi_add_host(shost, pdev-dev))) {
+   rv = scsi_add_host(shost, pdev-dev);
+   if (rv) {
pr_err(MPT3SAS_FMT failure at %s:%d/%s()!\n,
ioc-name, __FILE__, __LINE__, __func__);
-   list_del(ioc-list);
goto out_add_shost_fail;
}
 
@@ -7846,7 +7849,7 @@ _scsih_probe(struct pci_dev *pdev, const struct 
pci_device_id *id)
  out_thread_fail:
list_del(ioc-list);
scsi_host_put(shost);
-   return -ENODEV;
+   return rv;
 }
 
 #ifdef CONFIG_PM

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 2/2] ftrace: Add funcgraph_tail option to print function name after closing braces

2014-05-20 Thread Robert Elliott
In the function-graph tracer, add a funcgraph_tail option
to print the function name on all } lines, not just
functions whose first line is no longer in the trace
buffer.

If a function calls other traced functions, its total
time appears on its } line.  This change allows grep
to be used to determine the function for which the
line corresponds.

Update Documentation/trace/ftrace.txt to describe
this new option.

Signed-off-by: Robert Elliott 
---
 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 ++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bd36598..2479b2a 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2003,6 +2003,32 @@ want, depending on your needs.
   360.774530 |   1)   0.594 us|  
__phys_addr();
 
 
+The function name is always displayed after the closing bracket
+for a function if the start of that function is not in the
+trace buffer.
+
+Display of the function name after the closing bracket may be
+enabled for functions whose start is in the trace buffer,
+allowing easier searching with grep for function durations.
+It is default disabled.
+
+   hide: echo nofuncgraph-tail > trace_options
+   show: echo funcgraph-tail > trace_options
+
+  Example with nofuncgraph-tail (default):
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|}
+  0)   2.861 us|  }
+
+  Example with funcgraph-tail:
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|} /* kmem_cache_free() */
+  0)   2.861 us|  } /* putname() */
+
 You can put some comments on specific functions by using
 trace_printk() For example, if you want to put a comment inside
 the __might_sleep() function, you just have to include
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 861668a..5ca365f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -725,6 +725,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
 #define TRACE_GRAPH_PRINT_IRQS  0x40
+#define TRACE_GRAPH_PRINT_TAIL  0x80
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index ab78221..ba10bb0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -55,11 +55,13 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+   /* Display function name after trailing } */
+   { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
{ } /* Empty entry */
 };
 
 static struct tracer_flags tracer_flags = {
-   /* Don't display overruns and proc by default */
+   /* Don't display overruns, proc, or tail by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
   TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
@@ -1167,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, 
struct trace_seq *s,
 * If the return function does not have a matching entry,
 * then the entry was lost. Instead of just printing
 * the '}' and letting the user guess what function this
-* belongs to, write out the function name.
+* belongs to, write out the function name. Always do
+* that if the funcgraph-tail option is enabled.
 */
-   if (func_match) {
+   if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
ret = trace_seq_puts(s, "}\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 1/2] ftrace: Eliminate duplicate TRACE_GRAPH_PRINT_xx defines

2014-05-20 Thread Robert Elliott
Eliminate duplicate TRACE_GRAPH_PRINT_xx defines
in trace_functions_graph.c that are already in
trace.h.

Add TRACE_GRAPH_PRINT_IRQS to trace.h, which is
the only one that is missing.

Signed-off-by: Robert Elliott 
---
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 -
 2 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7b..861668a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -724,6 +724,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_PROC  0x8
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
+#define TRACE_GRAPH_PRINT_IRQS  0x40
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index deff112..ab78221 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -38,15 +38,6 @@ struct fgraph_data {
 
 #define TRACE_GRAPH_INDENT 2
 
-/* Flag options */
-#define TRACE_GRAPH_PRINT_OVERRUN  0x1
-#define TRACE_GRAPH_PRINT_CPU  0x2
-#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
-#define TRACE_GRAPH_PRINT_PROC 0x8
-#define TRACE_GRAPH_PRINT_DURATION 0x10
-#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
-#define TRACE_GRAPH_PRINT_IRQS 0x40
-
 static unsigned int max_depth;
 
 static struct tracer_opt trace_opts[] = {

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 0/2] ftrace: Add funcgraph-tail option

2014-05-20 Thread Robert Elliott
Using ftrace function-graph to examine the times consumed by
functions, the time shows up on the line where the call is made
if no other traceable functions were called by that function:
 11)   0.672 us|cmd_alloc [hpsa]();

but the time shows up down by the } if the were other traceable
functions called by that function:
 11)   |  cmd_alloc [hpsa]() {
 11)   0.129 us|cmd_free [hpsa]();
 11)   0.106 us|cmd_free [hpsa]();
 11)   2.014 us|  }

On its own, the } line doesn't indicate which function it is
closing, so grep cannot be used to search for all the times
for this function. You have to write a parser.

The function name does get printed on those lines when the
start of the function is off the trace.

This patch series adds a funcgraph-tail option to do that
in all cases.

echo 1 > /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo funcgraph-tail > /sys/kernel/debug/tracing/trace_options
yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|} /* hpsa_scsi_ioaccel2_queue_command [hpsa] */
 11)   2.501 us|  } /* hpsa_scsi_ioaccel_queue_command [hpsa] */
 11)   3.093 us|} /* hpsa_scsi_ioaccel_raid_map [hpsa] */
 11)   4.667 us|  } /* hpsa_scsi_queue_command [hpsa] */

echo 0 > /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo nofuncgraph-tail > /sys/kernel/debug/tracing/trace_options
(which is the default setting) still yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|}
 11)   2.501 us|  }
 11)   3.093 us|}
 11)   4.667 us|  }


---

Robert Elliott (2):
  ftrace: Eliminate duplicate TRACE_GRAPH_PRINT_xx defines
  ftrace: Add funcgraph_tail option to print function name after closing 
braces


 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |2 ++
 kernel/trace/trace_functions_graph.c |   18 ++
 3 files changed, 34 insertions(+), 12 deletions(-)

-- 
Rob Elliott, HP Server Storage
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 1/2] ftrace: Eliminate duplicate TRACE_GRAPH_PRINT_xx defines

2014-05-20 Thread Robert Elliott
Eliminate duplicate TRACE_GRAPH_PRINT_xx defines
in trace_functions_graph.c that are already in
trace.h.

Add TRACE_GRAPH_PRINT_IRQS to trace.h, which is
the only one that is missing.

Signed-off-by: Robert Elliott elli...@hp.com
---
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 -
 2 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7b..861668a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -724,6 +724,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_PROC  0x8
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
+#define TRACE_GRAPH_PRINT_IRQS  0x40
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3  TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index deff112..ab78221 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -38,15 +38,6 @@ struct fgraph_data {
 
 #define TRACE_GRAPH_INDENT 2
 
-/* Flag options */
-#define TRACE_GRAPH_PRINT_OVERRUN  0x1
-#define TRACE_GRAPH_PRINT_CPU  0x2
-#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
-#define TRACE_GRAPH_PRINT_PROC 0x8
-#define TRACE_GRAPH_PRINT_DURATION 0x10
-#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
-#define TRACE_GRAPH_PRINT_IRQS 0x40
-
 static unsigned int max_depth;
 
 static struct tracer_opt trace_opts[] = {

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 0/2] ftrace: Add funcgraph-tail option

2014-05-20 Thread Robert Elliott
Using ftrace function-graph to examine the times consumed by
functions, the time shows up on the line where the call is made
if no other traceable functions were called by that function:
 11)   0.672 us|cmd_alloc [hpsa]();

but the time shows up down by the } if the were other traceable
functions called by that function:
 11)   |  cmd_alloc [hpsa]() {
 11)   0.129 us|cmd_free [hpsa]();
 11)   0.106 us|cmd_free [hpsa]();
 11)   2.014 us|  }

On its own, the } line doesn't indicate which function it is
closing, so grep cannot be used to search for all the times
for this function. You have to write a parser.

The function name does get printed on those lines when the
start of the function is off the trace.

This patch series adds a funcgraph-tail option to do that
in all cases.

echo 1  /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo funcgraph-tail  /sys/kernel/debug/tracing/trace_options
yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|} /* hpsa_scsi_ioaccel2_queue_command [hpsa] */
 11)   2.501 us|  } /* hpsa_scsi_ioaccel_queue_command [hpsa] */
 11)   3.093 us|} /* hpsa_scsi_ioaccel_raid_map [hpsa] */
 11)   4.667 us|  } /* hpsa_scsi_queue_command [hpsa] */

echo 0  /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo nofuncgraph-tail  /sys/kernel/debug/tracing/trace_options
(which is the default setting) still yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|}
 11)   2.501 us|  }
 11)   3.093 us|}
 11)   4.667 us|  }


---

Robert Elliott (2):
  ftrace: Eliminate duplicate TRACE_GRAPH_PRINT_xx defines
  ftrace: Add funcgraph_tail option to print function name after closing 
braces


 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |2 ++
 kernel/trace/trace_functions_graph.c |   18 ++
 3 files changed, 34 insertions(+), 12 deletions(-)

-- 
Rob Elliott, HP Server Storage
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND 2/2] ftrace: Add funcgraph_tail option to print function name after closing braces

2014-05-20 Thread Robert Elliott
In the function-graph tracer, add a funcgraph_tail option
to print the function name on all } lines, not just
functions whose first line is no longer in the trace
buffer.

If a function calls other traced functions, its total
time appears on its } line.  This change allows grep
to be used to determine the function for which the
line corresponds.

Update Documentation/trace/ftrace.txt to describe
this new option.

Signed-off-by: Robert Elliott elli...@hp.com
---
 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 ++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bd36598..2479b2a 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2003,6 +2003,32 @@ want, depending on your needs.
   360.774530 |   1)   0.594 us|  
__phys_addr();
 
 
+The function name is always displayed after the closing bracket
+for a function if the start of that function is not in the
+trace buffer.
+
+Display of the function name after the closing bracket may be
+enabled for functions whose start is in the trace buffer,
+allowing easier searching with grep for function durations.
+It is default disabled.
+
+   hide: echo nofuncgraph-tail  trace_options
+   show: echo funcgraph-tail  trace_options
+
+  Example with nofuncgraph-tail (default):
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|}
+  0)   2.861 us|  }
+
+  Example with funcgraph-tail:
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|} /* kmem_cache_free() */
+  0)   2.861 us|  } /* putname() */
+
 You can put some comments on specific functions by using
 trace_printk() For example, if you want to put a comment inside
 the __might_sleep() function, you just have to include
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 861668a..5ca365f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -725,6 +725,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
 #define TRACE_GRAPH_PRINT_IRQS  0x40
+#define TRACE_GRAPH_PRINT_TAIL  0x80
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3  TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index ab78221..ba10bb0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -55,11 +55,13 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+   /* Display function name after trailing } */
+   { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
{ } /* Empty entry */
 };
 
 static struct tracer_flags tracer_flags = {
-   /* Don't display overruns and proc by default */
+   /* Don't display overruns, proc, or tail by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
   TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
@@ -1167,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, 
struct trace_seq *s,
 * If the return function does not have a matching entry,
 * then the entry was lost. Instead of just printing
 * the '}' and letting the user guess what function this
-* belongs to, write out the function name.
+* belongs to, write out the function name. Always do
+* that if the funcgraph-tail option is enabled.
 */
-   if (func_match) {
+   if (func_match  !(flags  TRACE_GRAPH_PRINT_TAIL)) {
ret = trace_seq_puts(s, }\n);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] In the function-graph tracer, add a funcgraph_tail option

2014-05-12 Thread Robert Elliott
to print the function name on all } lines, not just
functions whose first line is no longer in the trace
buffer.

If a function calls other traced functions, its total
time appears on its } line.  This change allows grep
to be used to determine the function for which the
line corresponds.

Update Documentation/trace/ftrace.txt to describe
this new option.

Signed-off-by: Robert Elliott 
---
 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 ++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bd36598..2479b2a 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2003,6 +2003,32 @@ want, depending on your needs.
   360.774530 |   1)   0.594 us|  
__phys_addr();
 
 
+The function name is always displayed after the closing bracket
+for a function if the start of that function is not in the
+trace buffer.
+
+Display of the function name after the closing bracket may be
+enabled for functions whose start is in the trace buffer,
+allowing easier searching with grep for function durations.
+It is default disabled.
+
+   hide: echo nofuncgraph-tail > trace_options
+   show: echo funcgraph-tail > trace_options
+
+  Example with nofuncgraph-tail (default):
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|}
+  0)   2.861 us|  }
+
+  Example with funcgraph-tail:
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|} /* kmem_cache_free() */
+  0)   2.861 us|  } /* putname() */
+
 You can put some comments on specific functions by using
 trace_printk() For example, if you want to put a comment inside
 the __might_sleep() function, you just have to include
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 861668a..5ca365f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -725,6 +725,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
 #define TRACE_GRAPH_PRINT_IRQS  0x40
+#define TRACE_GRAPH_PRINT_TAIL  0x80
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index ab78221..ba10bb0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -55,11 +55,13 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+   /* Display function name after trailing } */
+   { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
{ } /* Empty entry */
 };
 
 static struct tracer_flags tracer_flags = {
-   /* Don't display overruns and proc by default */
+   /* Don't display overruns, proc, or tail by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
   TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
@@ -1167,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, 
struct trace_seq *s,
 * If the return function does not have a matching entry,
 * then the entry was lost. Instead of just printing
 * the '}' and letting the user guess what function this
-* belongs to, write out the function name.
+* belongs to, write out the function name. Always do
+* that if the funcgraph-tail option is enabled.
 */
-   if (func_match) {
+   if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
ret = trace_seq_puts(s, "}\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] ftrace: Add funcgraph_tail option

2014-05-12 Thread Robert Elliott
Using ftrace function-graph to examine the times consumed by
functions, the time shows up on the line where the call is made
if no other traceable functions were called by that function:
 11)   0.672 us|cmd_alloc [hpsa]();

but the time shows up down by the } if the were other traceable
functions called by that function:
 11)   |  cmd_alloc [hpsa]() {
 11)   0.129 us|cmd_free [hpsa]();
 11)   0.106 us|cmd_free [hpsa]();
 11)   2.014 us|  }

On its own, the } line doesn't indicate which function it is
closing, so grep cannot be used to search for all the times
for this function. You have to write a parser.

The function name does get printed on those lines when the
start of the function is off the trace.

This patch series adds a funcgraph-tail option to do that
in all cases.

echo 1 > /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo funcgraph-tail > /sys/kernel/debug/tracing/trace_options
yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|} /* hpsa_scsi_ioaccel2_queue_command [hpsa] */
 11)   2.501 us|  } /* hpsa_scsi_ioaccel_queue_command [hpsa] */
 11)   3.093 us|} /* hpsa_scsi_ioaccel_raid_map [hpsa] */
 11)   4.667 us|  } /* hpsa_scsi_queue_command [hpsa] */

echo 0 > /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo nofuncgraph-tail > /sys/kernel/debug/tracing/trace_options
(which is the default setting) still yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|}
 11)   2.501 us|  }
 11)   3.093 us|}
 11)   4.667 us|  }


---

Robert Elliott (2):
  Eliminate duplicate TRACE_GRAPH_PRINT_xx defines
  In the function-graph tracer, add a funcgraph_tail option


 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |2 ++
 kernel/trace/trace_functions_graph.c |   18 ++
 3 files changed, 34 insertions(+), 12 deletions(-)

--
Rob Elliott   HP Server Storage
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Eliminate duplicate TRACE_GRAPH_PRINT_xx defines

2014-05-12 Thread Robert Elliott
in trace_functions_graph.c that are already in
trace.h.

Add TRACE_GRAPH_PRINT_IRQS to trace.h, which is
the only one that is missing.

Signed-off-by: Robert Elliott 
---
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 -
 2 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7b..861668a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -724,6 +724,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_PROC  0x8
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
+#define TRACE_GRAPH_PRINT_IRQS  0x40
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index deff112..ab78221 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -38,15 +38,6 @@ struct fgraph_data {
 
 #define TRACE_GRAPH_INDENT 2
 
-/* Flag options */
-#define TRACE_GRAPH_PRINT_OVERRUN  0x1
-#define TRACE_GRAPH_PRINT_CPU  0x2
-#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
-#define TRACE_GRAPH_PRINT_PROC 0x8
-#define TRACE_GRAPH_PRINT_DURATION 0x10
-#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
-#define TRACE_GRAPH_PRINT_IRQS 0x40
-
 static unsigned int max_depth;
 
 static struct tracer_opt trace_opts[] = {

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] ftrace: Add funcgraph_tail option

2014-05-12 Thread Robert Elliott
Using ftrace function-graph to examine the times consumed by
functions, the time shows up on the line where the call is made
if no other traceable functions were called by that function:
 11)   0.672 us|cmd_alloc [hpsa]();

but the time shows up down by the } if the were other traceable
functions called by that function:
 11)   |  cmd_alloc [hpsa]() {
 11)   0.129 us|cmd_free [hpsa]();
 11)   0.106 us|cmd_free [hpsa]();
 11)   2.014 us|  }

On its own, the } line doesn't indicate which function it is
closing, so grep cannot be used to search for all the times
for this function. You have to write a parser.

The function name does get printed on those lines when the
start of the function is off the trace.

This patch series adds a funcgraph-tail option to do that
in all cases.

echo 1  /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo funcgraph-tail  /sys/kernel/debug/tracing/trace_options
yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|} /* hpsa_scsi_ioaccel2_queue_command [hpsa] */
 11)   2.501 us|  } /* hpsa_scsi_ioaccel_queue_command [hpsa] */
 11)   3.093 us|} /* hpsa_scsi_ioaccel_raid_map [hpsa] */
 11)   4.667 us|  } /* hpsa_scsi_queue_command [hpsa] */

echo 0  /sys/kernel/debug/tracing/options/funcgraph-tail
or
echo nofuncgraph-tail  /sys/kernel/debug/tracing/trace_options
(which is the default setting) still yields:
 11)   |hpsa_scsi_ioaccel_raid_map [hpsa]() {
 11)   |  hpsa_scsi_ioaccel_queue_command [hpsa]() {
 11)   |hpsa_scsi_ioaccel2_queue_command [hpsa]() {
 11)   0.067 us|  fixup_ioaccel_cdb [hpsa]();
 11)   0.053 us|  set_encrypt_ioaccel2 [hpsa]();
 11)   0.199 us|  enqueue_cmd_and_start_io [hpsa]();
 11)   1.952 us|}
 11)   2.501 us|  }
 11)   3.093 us|}
 11)   4.667 us|  }


---

Robert Elliott (2):
  Eliminate duplicate TRACE_GRAPH_PRINT_xx defines
  In the function-graph tracer, add a funcgraph_tail option


 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |2 ++
 kernel/trace/trace_functions_graph.c |   18 ++
 3 files changed, 34 insertions(+), 12 deletions(-)

--
Rob Elliott   HP Server Storage
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] Eliminate duplicate TRACE_GRAPH_PRINT_xx defines

2014-05-12 Thread Robert Elliott
in trace_functions_graph.c that are already in
trace.h.

Add TRACE_GRAPH_PRINT_IRQS to trace.h, which is
the only one that is missing.

Signed-off-by: Robert Elliott elli...@hp.com
---
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 -
 2 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7b..861668a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -724,6 +724,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_PROC  0x8
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
+#define TRACE_GRAPH_PRINT_IRQS  0x40
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3  TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index deff112..ab78221 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -38,15 +38,6 @@ struct fgraph_data {
 
 #define TRACE_GRAPH_INDENT 2
 
-/* Flag options */
-#define TRACE_GRAPH_PRINT_OVERRUN  0x1
-#define TRACE_GRAPH_PRINT_CPU  0x2
-#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
-#define TRACE_GRAPH_PRINT_PROC 0x8
-#define TRACE_GRAPH_PRINT_DURATION 0x10
-#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
-#define TRACE_GRAPH_PRINT_IRQS 0x40
-
 static unsigned int max_depth;
 
 static struct tracer_opt trace_opts[] = {

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/2] In the function-graph tracer, add a funcgraph_tail option

2014-05-12 Thread Robert Elliott
to print the function name on all } lines, not just
functions whose first line is no longer in the trace
buffer.

If a function calls other traced functions, its total
time appears on its } line.  This change allows grep
to be used to determine the function for which the
line corresponds.

Update Documentation/trace/ftrace.txt to describe
this new option.

Signed-off-by: Robert Elliott elli...@hp.com
---
 Documentation/trace/ftrace.txt   |   26 ++
 kernel/trace/trace.h |1 +
 kernel/trace/trace_functions_graph.c |9 ++---
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index bd36598..2479b2a 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2003,6 +2003,32 @@ want, depending on your needs.
   360.774530 |   1)   0.594 us|  
__phys_addr();
 
 
+The function name is always displayed after the closing bracket
+for a function if the start of that function is not in the
+trace buffer.
+
+Display of the function name after the closing bracket may be
+enabled for functions whose start is in the trace buffer,
+allowing easier searching with grep for function durations.
+It is default disabled.
+
+   hide: echo nofuncgraph-tail  trace_options
+   show: echo funcgraph-tail  trace_options
+
+  Example with nofuncgraph-tail (default):
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|}
+  0)   2.861 us|  }
+
+  Example with funcgraph-tail:
+  0)   |  putname() {
+  0)   |kmem_cache_free() {
+  0)   0.518 us|  __phys_addr();
+  0)   1.757 us|} /* kmem_cache_free() */
+  0)   2.861 us|  } /* putname() */
+
 You can put some comments on specific functions by using
 trace_printk() For example, if you want to put a comment inside
 the __might_sleep() function, you just have to include
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 861668a..5ca365f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -725,6 +725,7 @@ extern unsigned long trace_flags;
 #define TRACE_GRAPH_PRINT_DURATION  0x10
 #define TRACE_GRAPH_PRINT_ABS_TIME  0x20
 #define TRACE_GRAPH_PRINT_IRQS  0x40
+#define TRACE_GRAPH_PRINT_TAIL  0x80
 #define TRACE_GRAPH_PRINT_FILL_SHIFT   28
 #define TRACE_GRAPH_PRINT_FILL_MASK(0x3  TRACE_GRAPH_PRINT_FILL_SHIFT)
 
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index ab78221..ba10bb0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -55,11 +55,13 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
/* Display interrupts */
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
+   /* Display function name after trailing } */
+   { TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
{ } /* Empty entry */
 };
 
 static struct tracer_flags tracer_flags = {
-   /* Don't display overruns and proc by default */
+   /* Don't display overruns, proc, or tail by default */
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
   TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
.opts = trace_opts
@@ -1167,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, 
struct trace_seq *s,
 * If the return function does not have a matching entry,
 * then the entry was lost. Instead of just printing
 * the '}' and letting the user guess what function this
-* belongs to, write out the function name.
+* belongs to, write out the function name. Always do
+* that if the funcgraph-tail option is enabled.
 */
-   if (func_match) {
+   if (func_match  !(flags  TRACE_GRAPH_PRINT_TAIL)) {
ret = trace_seq_puts(s, }\n);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/