We currently leak kernel memory to user space, because memory offlining
doesn't do any implicit clearing of memory and we are missing explicit
clearing of memory.
Let's keep it simple and clear pages before removing the linear mapping.
Reproduced in QEMU/TCG with 10 GiB of main memory:
[root@localhost ~]# dd obs=9G if=/dev/urandom of=/dev/null
[... wait until "free -m" used counter no longer changes and cancel]
19665802+0 records in
1+0 records out
9663676416 bytes (9.7 GB, 9.0 GiB) copied, 135.548 s, 71.3 MB/s
[root@localhost ~]# cat /sys/devices/system/memory/block_size_bytes
4000
[root@localhost ~]# echo 0x4000 >
/sys/kernel/debug/powerpc/memtrace/enable
[ 402.978663][ T1086] page:1bc4bc74 refcount:1 mapcount:0
mapping: index:0x0 pfn:0x24900
[ 402.980063][ T1086] flags: 0x701000(reserved)
[ 402.980415][ T1086] raw: 00701000 c00c00924008
c00c00924008
[ 402.980627][ T1086] raw:
0001
[ 402.980845][ T1086] page dumped because: unmovable page
[ 402.989608][ T1086] Offlined Pages 16384
[ 403.324155][ T1086] memtrace: Allocated trace memory on node 0 at
0x0002
Before this patch:
[root@localhost ~]# hexdump -C
/sys/kernel/debug/powerpc/memtrace//trace | head
c8 25 72 51 4d 26 36 c5 5c c2 56 15 d5 1a cd 10 |.%rQM&6.\.V.|
0010 19 b9 50 b2 cb e3 60 b8 ec 0a f3 ec 4b 3c 39 f0
|..P...`.K<9.|$
0020 4e 5a 4c cf bd 26 19 ff 37 79 13 67 24 b7 b8 57
|NZL..&..7y.g$..W|$
0030 98 3e f5 be 6f 14 6a bd a4 52 bc 6e e9 e0 c1 5d
|.>..o.j..R.n...]|$
0040 76 b3 ae b5 88 d7 da e3 64 23 85 2c 10 88 07 b6
|v...d#.,|$
0050 9a d8 91 de f7 50 27 69 2e 64 9c 6f d3 19 45 79
|.P'i.d.o..Ey|$
0060 6a 6f 8a 61 71 19 1f c7 f1 df 28 26 ca 0f 84 55
|jo.aq.(&...U|$
0070 01 3f be e4 e2 e1 da ff 7b 8c 8e 32 37 b4 24 53
|.?..{..27.$S|$
0080 1b 70 30 45 56 e6 8c c4 0e b5 4c fb 9f dd 88 06
|.p0EV.L.|$
0090 ef c4 18 79 f1 60 b1 5c 79 59 4d f4 36 d7 4a 5c
|...y.`.\yYM.6.J\|$
After this patch:
[root@localhost ~]# hexdump -C
/sys/kernel/debug/powerpc/memtrace//trace | head
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ||
*
4000
Reported-by: Michael Ellerman
Fixes: 9d5171a8f248 ("powerpc/powernv: Enable removal of memory for in memory
tracing")
Cc: sta...@vger.kernel.org # v4.14+
Cc: Benjamin Herrenschmidt
Cc: Paul Mackerras
Cc: Rashmica Gupta
Cc: Andrew Morton
Cc: Mike Rapoport
Cc: Michal Hocko
Cc: Oscar Salvador
Cc: Wei Yang
Signed-off-by: David Hildenbrand
---
arch/powerpc/platforms/powernv/memtrace.c | 22 ++
1 file changed, 22 insertions(+)
diff --git a/arch/powerpc/platforms/powernv/memtrace.c
b/arch/powerpc/platforms/powernv/memtrace.c
index 6828108486f8..eea1f94482ff 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -67,6 +67,23 @@ static int change_memblock_state(struct memory_block *mem,
void *arg)
return 0;
}
+static void memtrace_clear_range(unsigned long start_pfn,
+unsigned long nr_pages)
+{
+ unsigned long pfn;
+
+ /*
+* As pages are offline, we cannot trust the memmap anymore. As HIGHMEM
+* does not apply, avoid passing around "struct page" and use
+* clear_page() instead directly.
+*/
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+ if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+ cond_resched();
+ clear_page(__va(PFN_PHYS(pfn)));
+ }
+}
+
/* called with device_hotplug_lock held */
static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
{
@@ -111,6 +128,11 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
lock_device_hotplug();
for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+ /*
+* Clear the range while we still have a linear
+* mapping.
+*/
+ memtrace_clear_range(base_pfn, nr_pages);
/*
* Remove memory in memory block size chunks so that
* iomem resources are always split to the same size and
--
2.26.2