[PATCH 5.7 022/112] powerpc/kvm/book3s: Add helper to walk partition scoped linux page table.

2020-07-07 Thread Greg Kroah-Hartman
From: Aneesh Kumar K.V 

[ Upstream commit 4b99412ed6972cc77c1f16009e1d00323fcef9ab ]

The locking rules for walking partition scoped table is different from process
scoped table. Hence add a helper for secondary linux page table walk and also
add check whether we are holding the right locks.

Signed-off-by: Aneesh Kumar K.V 
Signed-off-by: Michael Ellerman 
Link: 
https://lore.kernel.org/r/20200505071729.54912-10-aneesh.ku...@linux.ibm.com
Signed-off-by: Sasha Levin 
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 13 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++--
 arch/powerpc/kvm/book3s_hv_nested.c  |  2 +-
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 04b2b927bb5ae..2c2635967d6e0 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_PPC_PSERIES
 static inline bool kvmhv_on_pseries(void)
@@ -634,6 +635,18 @@ extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
unsigned long gpa, unsigned long hpa,
unsigned long nbytes);
 
+static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea,
+   unsigned *hshift)
+{
+   pte_t *pte;
+
+   VM_WARN(!spin_is_locked(>mmu_lock),
+   "%s called with kvm mmu_lock not held \n", __func__);
+   pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
+
+   return pte;
+}
+
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bc6c1aa3d0e92..e9b3622405b1d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -993,11 +993,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
return 0;
}
 
-   ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
+   ptep = find_kvm_secondary_pte(kvm, gpa, );
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
 kvm->arch.lpid);
-   return 0;   
+   return 0;
 }
 
 /* Called with kvm->mmu_lock held */
@@ -1013,7 +1013,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot 
*memslot,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return ref;
 
-   ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
+   ptep = find_kvm_secondary_pte(kvm, gpa, );
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
  gpa, shift);
@@ -1040,7 +1040,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct 
kvm_memory_slot *memslot,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return ref;
 
-   ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
+   ptep = find_kvm_secondary_pte(kvm, gpa, );
if (ptep && pte_present(*ptep) && pte_young(*ptep))
ref = 1;
return ref;
@@ -1060,7 +1060,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
return ret;
 
-   ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
+   ptep = find_kvm_secondary_pte(kvm, gpa, );
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
ret = 1;
if (shift)
@@ -1121,7 +1121,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
gpa = memslot->base_gfn << PAGE_SHIFT;
spin_lock(>mmu_lock);
for (n = memslot->npages; n; --n) {
-   ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
+   ptep = find_kvm_secondary_pte(kvm, gpa, );
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
 kvm->arch.lpid);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index dc97e5be76f61..7f1fc5db13eab 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -1362,7 +1362,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run 
*run,
/* See if can find translation in our partition scoped tables for L1 */
pte = __pte(0);
spin_lock(>mmu_lock);
-   pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, );
+   pte_p = find_kvm_secondary_pte(kvm, gpa, );
if (!shift)
shift = PAGE_SHIFT;
if (pte_p)
-- 
2.25.1





[PATCH 4.11 35/78] powerpc/mm: Add physical address to Linux page table dump

2017-06-19 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Oliver O'Halloran 

commit aaa229529244a1135b29353fefb001c430db79f0 upstream.

The current page table dumper scans the Linux page tables and coalesces mappings
with adjacent virtual addresses and similar PTE flags. This behaviour is
somewhat broken when you consider the IOREMAP space where entirely unrelated
mappings will appear to be virtually contiguous. This patch modifies the range
coalescing so that only ranges that are both physically and virtually contiguous
are combined. This patch also adds to the dump output the physical address at
the start of each range.

Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables")
Signed-off-by: Oliver O'Halloran 
[mpe: Print the physicall address with 0x like the other addresses]
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/powerpc/mm/dump_linuxpagetables.c |   18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -57,6 +57,8 @@ struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
+   unsigned long start_pa;
+   unsigned long last_pa;
unsigned int level;
u64 current_flags;
 };
@@ -253,7 +255,9 @@ static void dump_addr(struct pg_state *s
const char *unit = units;
unsigned long delta;
 
-   seq_printf(st->seq, "0x%016lx-0x%016lx   ", st->start_address, addr-1);
+   seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1);
+   seq_printf(st->seq, "0x%016lx ", st->start_pa);
+
delta = (addr - st->start_address) >> 10;
/* Work out what appropriate unit to use */
while (!(delta & 1023) && unit[1]) {
@@ -268,11 +272,15 @@ static void note_page(struct pg_state *s
   unsigned int level, u64 val)
 {
u64 flag = val & pg_level[level].mask;
+   u64 pa = val & PTE_RPN_MASK;
+
/* At first no level is set */
if (!st->level) {
st->level = level;
st->current_flags = flag;
st->start_address = addr;
+   st->start_pa = pa;
+   st->last_pa = pa;
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
/*
 * Dump the section of virtual memory when:
@@ -280,9 +288,11 @@ static void note_page(struct pg_state *s
 *   - we change levels in the tree.
 *   - the address is in a different section of memory and is thus
 *   used for a different purpose, regardless of the flags.
+*   - the pa of this page is not adjacent to the last inspected page
 */
} else if (flag != st->current_flags || level != st->level ||
-  addr >= st->marker[1].start_address) {
+  addr >= st->marker[1].start_address ||
+  pa != st->last_pa + PAGE_SIZE) {
 
/* Check the PTE flags */
if (st->current_flags) {
@@ -306,8 +316,12 @@ static void note_page(struct pg_state *s
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
+   st->start_pa = pa;
+   st->last_pa = pa;
st->current_flags = flag;
st->level = level;
+   } else {
+   st->last_pa = pa;
}
 }
 




[PATCH 4.11 35/78] powerpc/mm: Add physical address to Linux page table dump

2017-06-19 Thread Greg Kroah-Hartman
4.11-stable review patch.  If anyone has any objections, please let me know.

--

From: Oliver O'Halloran 

commit aaa229529244a1135b29353fefb001c430db79f0 upstream.

The current page table dumper scans the Linux page tables and coalesces mappings
with adjacent virtual addresses and similar PTE flags. This behaviour is
somewhat broken when you consider the IOREMAP space where entirely unrelated
mappings will appear to be virtually contiguous. This patch modifies the range
coalescing so that only ranges that are both physically and virtually contiguous
are combined. This patch also adds to the dump output the physical address at
the start of each range.

Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables")
Signed-off-by: Oliver O'Halloran 
[mpe: Print the physicall address with 0x like the other addresses]
Signed-off-by: Michael Ellerman 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/powerpc/mm/dump_linuxpagetables.c |   18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

--- a/arch/powerpc/mm/dump_linuxpagetables.c
+++ b/arch/powerpc/mm/dump_linuxpagetables.c
@@ -57,6 +57,8 @@ struct pg_state {
struct seq_file *seq;
const struct addr_marker *marker;
unsigned long start_address;
+   unsigned long start_pa;
+   unsigned long last_pa;
unsigned int level;
u64 current_flags;
 };
@@ -253,7 +255,9 @@ static void dump_addr(struct pg_state *s
const char *unit = units;
unsigned long delta;
 
-   seq_printf(st->seq, "0x%016lx-0x%016lx   ", st->start_address, addr-1);
+   seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1);
+   seq_printf(st->seq, "0x%016lx ", st->start_pa);
+
delta = (addr - st->start_address) >> 10;
/* Work out what appropriate unit to use */
while (!(delta & 1023) && unit[1]) {
@@ -268,11 +272,15 @@ static void note_page(struct pg_state *s
   unsigned int level, u64 val)
 {
u64 flag = val & pg_level[level].mask;
+   u64 pa = val & PTE_RPN_MASK;
+
/* At first no level is set */
if (!st->level) {
st->level = level;
st->current_flags = flag;
st->start_address = addr;
+   st->start_pa = pa;
+   st->last_pa = pa;
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
/*
 * Dump the section of virtual memory when:
@@ -280,9 +288,11 @@ static void note_page(struct pg_state *s
 *   - we change levels in the tree.
 *   - the address is in a different section of memory and is thus
 *   used for a different purpose, regardless of the flags.
+*   - the pa of this page is not adjacent to the last inspected page
 */
} else if (flag != st->current_flags || level != st->level ||
-  addr >= st->marker[1].start_address) {
+  addr >= st->marker[1].start_address ||
+  pa != st->last_pa + PAGE_SIZE) {
 
/* Check the PTE flags */
if (st->current_flags) {
@@ -306,8 +316,12 @@ static void note_page(struct pg_state *s
seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
+   st->start_pa = pa;
+   st->last_pa = pa;
st->current_flags = flag;
st->level = level;
+   } else {
+   st->last_pa = pa;
}
 }
 




Re: [PATCH V3] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Aneesh Kumar K.V
"Kirill A. Shutemov"  writes:

> On Mon, May 11, 2015 at 11:56:01AM +0530, Aneesh Kumar K.V wrote:
>> Serialize against find_linux_pte_or_hugepte which does lock-less
>> lookup in page tables with local interrupts disabled. For huge pages
>> it casts pmd_t to pte_t. Since format of pte_t is different from
>> pmd_t we want to prevent transit from pmd pointing to page table
>> to pmd pointing to huge page (and back) while interrupts are disabled.
>> We clear pmd to possibly replace it with page table pointer in
>> different code paths. So make sure we wait for the parallel
>> find_linux_pte_or_hugepage to finish.
>> 
>> Without this patch, a find_linux_pte_or_hugepte running in parallel to
>> __split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
>> can run into the above issue. With __split_huge_zero_page_pmd and
>> do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
>> the pmd entry with a regular pgtable address. Such a clear need to
>> wait for the parallel find_linux_pte_or_hugepte to finish.
>> 
>> With zap_huge_pmd, we can run into issues, with a hugepage pte
>> getting zapped due to a MADV_DONTNEED while other cpu fault it
>> in as small pages.
>> 
>> Reported-by: Kirill A. Shutemov 
>> Signed-off-by: Aneesh Kumar K.V 
>
> Reviewed-by: Kirill A. Shutemov 
>
> CC: stable@ ?

Yes, We also need to pick,


dac5657067919161eb3273ca787d8ae9814801e7
691e95fd7396905a38d98919e9c150dbc3ea21a3
7d6e7f7ffaba4e013c7a0589140431799bc17985


But that may need me to a backport, because we have dependencies in kvm
and a cherry-pick may not work.

Will work with Michael Ellerman to find out what needs to be done.

-aneesh

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V3] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Kirill A. Shutemov
On Mon, May 11, 2015 at 11:56:01AM +0530, Aneesh Kumar K.V wrote:
> Serialize against find_linux_pte_or_hugepte which does lock-less
> lookup in page tables with local interrupts disabled. For huge pages
> it casts pmd_t to pte_t. Since format of pte_t is different from
> pmd_t we want to prevent transit from pmd pointing to page table
> to pmd pointing to huge page (and back) while interrupts are disabled.
> We clear pmd to possibly replace it with page table pointer in
> different code paths. So make sure we wait for the parallel
> find_linux_pte_or_hugepage to finish.
> 
> Without this patch, a find_linux_pte_or_hugepte running in parallel to
> __split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
> can run into the above issue. With __split_huge_zero_page_pmd and
> do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
> the pmd entry with a regular pgtable address. Such a clear need to
> wait for the parallel find_linux_pte_or_hugepte to finish.
> 
> With zap_huge_pmd, we can run into issues, with a hugepage pte
> getting zapped due to a MADV_DONTNEED while other cpu fault it
> in as small pages.
> 
> Reported-by: Kirill A. Shutemov 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Kirill A. Shutemov 

CC: stable@ ?

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/2] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Aneesh Kumar K.V
Andrew Morton  writes:

> On Thu,  7 May 2015 12:53:28 +0530 "Aneesh Kumar K.V" 
>  wrote:
>
>> Serialize against find_linux_pte_or_hugepte which does lock-less
>> lookup in page tables with local interrupts disabled. For huge pages
>> it casts pmd_t to pte_t. Since format of pte_t is different from
>> pmd_t we want to prevent transit from pmd pointing to page table
>> to pmd pointing to huge page (and back) while interrupts are disabled.
>> We clear pmd to possibly replace it with page table pointer in
>> different code paths. So make sure we wait for the parallel
>> find_linux_pte_or_hugepage to finish.
>
> I'm not seeing here any description of the problem which is being
> fixed.  Does the patch make the machine faster?  Does the machine
> crash?

I sent v3 with updated commit message. Adding that below.

powerpc/thp: Serialize pmd clear against a linux page table walk.

Serialize against find_linux_pte_or_hugepte which does lock-less
lookup in page tables with local interrupts disabled. For huge pages
it casts pmd_t to pte_t. Since format of pte_t is different from
pmd_t we want to prevent transit from pmd pointing to page table
to pmd pointing to huge page (and back) while interrupts are disabled.
We clear pmd to possibly replace it with page table pointer in
different code paths. So make sure we wait for the parallel
find_linux_pte_or_hugepage to finish.

Without this patch, a find_linux_pte_or_hugepte running in parallel to
__split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
can run into the above issue. With __split_huge_zero_page_pmd and
do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
the pmd entry with a regular pgtable address. Such a clear need to
wait for the parallel find_linux_pte_or_hugepte to finish.

With zap_huge_pmd, we can run into issues, with a hugepage pte
getting zapped due to a MADV_DONTNEED while other cpu fault it
in as small pages.

-aneesh

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V3] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Aneesh Kumar K.V
Serialize against find_linux_pte_or_hugepte which does lock-less
lookup in page tables with local interrupts disabled. For huge pages
it casts pmd_t to pte_t. Since format of pte_t is different from
pmd_t we want to prevent transit from pmd pointing to page table
to pmd pointing to huge page (and back) while interrupts are disabled.
We clear pmd to possibly replace it with page table pointer in
different code paths. So make sure we wait for the parallel
find_linux_pte_or_hugepage to finish.

Without this patch, a find_linux_pte_or_hugepte running in parallel to
__split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
can run into the above issue. With __split_huge_zero_page_pmd and
do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
the pmd entry with a regular pgtable address. Such a clear need to
wait for the parallel find_linux_pte_or_hugepte to finish.

With zap_huge_pmd, we can run into issues, with a hugepage pte
getting zapped due to a MADV_DONTNEED while other cpu fault it
in as small pages.

Reported-by: Kirill A. Shutemov 
Signed-off-by: Aneesh Kumar K.V 
---
Changes from V2:
* Drop the cleanup patch
  Will this as a separate patch and not bug fix.
* Update commit message

 arch/powerpc/mm/pgtable_64.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b651179ac4da..1325be89e670 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -845,6 +845,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 * hash fault look at them.
 */
memset(pgtable, 0, PTE_FRAG_SIZE);
+   /*
+* Serialize against find_linux_pte_or_hugepte which does lock-less
+* lookup in page tables with local interrupts disabled. For huge pages
+* it casts pmd_t to pte_t. Since format of pte_t is different from
+* pmd_t we want to prevent transit from pmd pointing to page table
+* to pmd pointing to huge page (and back) while interrupts are 
disabled.
+* We clear pmd to possibly replace it with page table pointer in
+* different code paths. So make sure we wait for the parallel
+* find_linux_pte_or_hugepage to finish.
+*/
+   kick_all_cpus_sync();
return old_pmd;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/2] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Aneesh Kumar K.V
Andrew Morton a...@linux-foundation.org writes:

 On Thu,  7 May 2015 12:53:28 +0530 Aneesh Kumar K.V 
 aneesh.ku...@linux.vnet.ibm.com wrote:

 Serialize against find_linux_pte_or_hugepte which does lock-less
 lookup in page tables with local interrupts disabled. For huge pages
 it casts pmd_t to pte_t. Since format of pte_t is different from
 pmd_t we want to prevent transit from pmd pointing to page table
 to pmd pointing to huge page (and back) while interrupts are disabled.
 We clear pmd to possibly replace it with page table pointer in
 different code paths. So make sure we wait for the parallel
 find_linux_pte_or_hugepage to finish.

 I'm not seeing here any description of the problem which is being
 fixed.  Does the patch make the machine faster?  Does the machine
 crash?

I sent v3 with updated commit message. Adding that below.

powerpc/thp: Serialize pmd clear against a linux page table walk.

Serialize against find_linux_pte_or_hugepte which does lock-less
lookup in page tables with local interrupts disabled. For huge pages
it casts pmd_t to pte_t. Since format of pte_t is different from
pmd_t we want to prevent transit from pmd pointing to page table
to pmd pointing to huge page (and back) while interrupts are disabled.
We clear pmd to possibly replace it with page table pointer in
different code paths. So make sure we wait for the parallel
find_linux_pte_or_hugepage to finish.

Without this patch, a find_linux_pte_or_hugepte running in parallel to
__split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
can run into the above issue. With __split_huge_zero_page_pmd and
do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
the pmd entry with a regular pgtable address. Such a clear need to
wait for the parallel find_linux_pte_or_hugepte to finish.

With zap_huge_pmd, we can run into issues, with a hugepage pte
getting zapped due to a MADV_DONTNEED while other cpu fault it
in as small pages.

-aneesh

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V3] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Aneesh Kumar K.V
Serialize against find_linux_pte_or_hugepte which does lock-less
lookup in page tables with local interrupts disabled. For huge pages
it casts pmd_t to pte_t. Since format of pte_t is different from
pmd_t we want to prevent transit from pmd pointing to page table
to pmd pointing to huge page (and back) while interrupts are disabled.
We clear pmd to possibly replace it with page table pointer in
different code paths. So make sure we wait for the parallel
find_linux_pte_or_hugepage to finish.

Without this patch, a find_linux_pte_or_hugepte running in parallel to
__split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
can run into the above issue. With __split_huge_zero_page_pmd and
do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
the pmd entry with a regular pgtable address. Such a clear need to
wait for the parallel find_linux_pte_or_hugepte to finish.

With zap_huge_pmd, we can run into issues, with a hugepage pte
getting zapped due to a MADV_DONTNEED while other cpu fault it
in as small pages.

Reported-by: Kirill A. Shutemov kirill.shute...@linux.intel.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
Changes from V2:
* Drop the cleanup patch
  Will this as a separate patch and not bug fix.
* Update commit message

 arch/powerpc/mm/pgtable_64.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index b651179ac4da..1325be89e670 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -845,6 +845,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 * hash fault look at them.
 */
memset(pgtable, 0, PTE_FRAG_SIZE);
+   /*
+* Serialize against find_linux_pte_or_hugepte which does lock-less
+* lookup in page tables with local interrupts disabled. For huge pages
+* it casts pmd_t to pte_t. Since format of pte_t is different from
+* pmd_t we want to prevent transit from pmd pointing to page table
+* to pmd pointing to huge page (and back) while interrupts are 
disabled.
+* We clear pmd to possibly replace it with page table pointer in
+* different code paths. So make sure we wait for the parallel
+* find_linux_pte_or_hugepage to finish.
+*/
+   kick_all_cpus_sync();
return old_pmd;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V3] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Kirill A. Shutemov
On Mon, May 11, 2015 at 11:56:01AM +0530, Aneesh Kumar K.V wrote:
 Serialize against find_linux_pte_or_hugepte which does lock-less
 lookup in page tables with local interrupts disabled. For huge pages
 it casts pmd_t to pte_t. Since format of pte_t is different from
 pmd_t we want to prevent transit from pmd pointing to page table
 to pmd pointing to huge page (and back) while interrupts are disabled.
 We clear pmd to possibly replace it with page table pointer in
 different code paths. So make sure we wait for the parallel
 find_linux_pte_or_hugepage to finish.
 
 Without this patch, a find_linux_pte_or_hugepte running in parallel to
 __split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
 can run into the above issue. With __split_huge_zero_page_pmd and
 do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
 the pmd entry with a regular pgtable address. Such a clear need to
 wait for the parallel find_linux_pte_or_hugepte to finish.
 
 With zap_huge_pmd, we can run into issues, with a hugepage pte
 getting zapped due to a MADV_DONTNEED while other cpu fault it
 in as small pages.
 
 Reported-by: Kirill A. Shutemov kirill.shute...@linux.intel.com
 Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com

Reviewed-by: Kirill A. Shutemov kirill.shute...@linux.intel.com

CC: stable@ ?

-- 
 Kirill A. Shutemov
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V3] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-11 Thread Aneesh Kumar K.V
Kirill A. Shutemov kir...@shutemov.name writes:

 On Mon, May 11, 2015 at 11:56:01AM +0530, Aneesh Kumar K.V wrote:
 Serialize against find_linux_pte_or_hugepte which does lock-less
 lookup in page tables with local interrupts disabled. For huge pages
 it casts pmd_t to pte_t. Since format of pte_t is different from
 pmd_t we want to prevent transit from pmd pointing to page table
 to pmd pointing to huge page (and back) while interrupts are disabled.
 We clear pmd to possibly replace it with page table pointer in
 different code paths. So make sure we wait for the parallel
 find_linux_pte_or_hugepage to finish.
 
 Without this patch, a find_linux_pte_or_hugepte running in parallel to
 __split_huge_zero_page_pmd or do_huge_pmd_wp_page_fallback or zap_huge_pmd
 can run into the above issue. With __split_huge_zero_page_pmd and
 do_huge_pmd_wp_page_fallback we clear the hugepage pte before inserting
 the pmd entry with a regular pgtable address. Such a clear need to
 wait for the parallel find_linux_pte_or_hugepte to finish.
 
 With zap_huge_pmd, we can run into issues, with a hugepage pte
 getting zapped due to a MADV_DONTNEED while other cpu fault it
 in as small pages.
 
 Reported-by: Kirill A. Shutemov kirill.shute...@linux.intel.com
 Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com

 Reviewed-by: Kirill A. Shutemov kirill.shute...@linux.intel.com

 CC: stable@ ?

Yes, We also need to pick,


dac5657067919161eb3273ca787d8ae9814801e7
691e95fd7396905a38d98919e9c150dbc3ea21a3
7d6e7f7ffaba4e013c7a0589140431799bc17985


But that may need me to a backport, because we have dependencies in kvm
and a cherry-pick may not work.

Will work with Michael Ellerman to find out what needs to be done.

-aneesh

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/2] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-08 Thread Andrew Morton
On Thu,  7 May 2015 12:53:28 +0530 "Aneesh Kumar K.V" 
 wrote:

> Serialize against find_linux_pte_or_hugepte which does lock-less
> lookup in page tables with local interrupts disabled. For huge pages
> it casts pmd_t to pte_t. Since format of pte_t is different from
> pmd_t we want to prevent transit from pmd pointing to page table
> to pmd pointing to huge page (and back) while interrupts are disabled.
> We clear pmd to possibly replace it with page table pointer in
> different code paths. So make sure we wait for the parallel
> find_linux_pte_or_hugepage to finish.

I'm not seeing here any description of the problem which is being
fixed.  Does the patch make the machine faster?  Does the machine
crash?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH V2 2/2] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-08 Thread Andrew Morton
On Thu,  7 May 2015 12:53:28 +0530 Aneesh Kumar K.V 
aneesh.ku...@linux.vnet.ibm.com wrote:

 Serialize against find_linux_pte_or_hugepte which does lock-less
 lookup in page tables with local interrupts disabled. For huge pages
 it casts pmd_t to pte_t. Since format of pte_t is different from
 pmd_t we want to prevent transit from pmd pointing to page table
 to pmd pointing to huge page (and back) while interrupts are disabled.
 We clear pmd to possibly replace it with page table pointer in
 different code paths. So make sure we wait for the parallel
 find_linux_pte_or_hugepage to finish.

I'm not seeing here any description of the problem which is being
fixed.  Does the patch make the machine faster?  Does the machine
crash?
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V2 2/2] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-07 Thread Aneesh Kumar K.V
Serialize against find_linux_pte_or_hugepte which does lock-less
lookup in page tables with local interrupts disabled. For huge pages
it casts pmd_t to pte_t. Since format of pte_t is different from
pmd_t we want to prevent transit from pmd pointing to page table
to pmd pointing to huge page (and back) while interrupts are disabled.
We clear pmd to possibly replace it with page table pointer in
different code paths. So make sure we wait for the parallel
find_linux_pte_or_hugepage to finish.

Reported-by: Kirill A. Shutemov 
Signed-off-by: Aneesh Kumar K.V 
---
Changes from v1:
* Move kick_all_cpus_sync to pmdp_get_and_clear so that it handle zap_huge_pmd
  case also.

 arch/powerpc/mm/pgtable_64.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9171c1a37290..049d961802aa 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -845,6 +845,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 * hash fault look at them.
 */
memset(pgtable, 0, PTE_FRAG_SIZE);
+   /*
+* Serialize against find_linux_pte_or_hugepte which does lock-less
+* lookup in page tables with local interrupts disabled. For huge pages
+* it casts pmd_t to pte_t. Since format of pte_t is different from
+* pmd_t we want to prevent transit from pmd pointing to page table
+* to pmd pointing to huge page (and back) while interrupts are 
disabled.
+* We clear pmd to possibly replace it with page table pointer in
+* different code paths. So make sure we wait for the parallel
+* find_linux_pte_or_hugepage to finish.
+*/
+   kick_all_cpus_sync();
return old_pmd;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH V2 2/2] powerpc/thp: Serialize pmd clear against a linux page table walk.

2015-05-07 Thread Aneesh Kumar K.V
Serialize against find_linux_pte_or_hugepte which does lock-less
lookup in page tables with local interrupts disabled. For huge pages
it casts pmd_t to pte_t. Since format of pte_t is different from
pmd_t we want to prevent transit from pmd pointing to page table
to pmd pointing to huge page (and back) while interrupts are disabled.
We clear pmd to possibly replace it with page table pointer in
different code paths. So make sure we wait for the parallel
find_linux_pte_or_hugepage to finish.

Reported-by: Kirill A. Shutemov kirill.shute...@linux.intel.com
Signed-off-by: Aneesh Kumar K.V aneesh.ku...@linux.vnet.ibm.com
---
Changes from v1:
* Move kick_all_cpus_sync to pmdp_get_and_clear so that it handle zap_huge_pmd
  case also.

 arch/powerpc/mm/pgtable_64.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 9171c1a37290..049d961802aa 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -845,6 +845,17 @@ pmd_t pmdp_get_and_clear(struct mm_struct *mm,
 * hash fault look at them.
 */
memset(pgtable, 0, PTE_FRAG_SIZE);
+   /*
+* Serialize against find_linux_pte_or_hugepte which does lock-less
+* lookup in page tables with local interrupts disabled. For huge pages
+* it casts pmd_t to pte_t. Since format of pte_t is different from
+* pmd_t we want to prevent transit from pmd pointing to page table
+* to pmd pointing to huge page (and back) while interrupts are 
disabled.
+* We clear pmd to possibly replace it with page table pointer in
+* different code paths. So make sure we wait for the parallel
+* find_linux_pte_or_hugepage to finish.
+*/
+   kick_all_cpus_sync();
return old_pmd;
 }
 
-- 
2.1.4

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-04 Thread Cong Wang

On 09/03/2012 02:26 AM, Jiri Kosina wrote:

On Sun, 2 Sep 2012, Xin Tong wrote:


3. can two different processes have their CR3 being the same value
even though they have different first level page tables ?


Yes, if they are created by clone(CLONE_VM). In such case they share the
same mm_struct, and therefore mm_struct->pgd (which is exactly what is
loaded into cr3 in switch_mm()) is the same.



Is this the COW mechanism in linux. what if the cloned process need to
have set of its own pages later. do the CR3s for the 2 processes
become different at that point ?


That is a different story. COW is applied on fork() (i.e. spawning new
process), not on clone(CLONE_VM) (i.e. spawning new thread).



Yeah, and unshare(2) does not implement the flag that reverses the 
effects of CLONE_VM.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-04 Thread Cong Wang

On 09/03/2012 02:26 AM, Jiri Kosina wrote:

On Sun, 2 Sep 2012, Xin Tong wrote:


3. can two different processes have their CR3 being the same value
even though they have different first level page tables ?


Yes, if they are created by clone(CLONE_VM). In such case they share the
same mm_struct, and therefore mm_struct-pgd (which is exactly what is
loaded into cr3 in switch_mm()) is the same.



Is this the COW mechanism in linux. what if the cloned process need to
have set of its own pages later. do the CR3s for the 2 processes
become different at that point ?


That is a different story. COW is applied on fork() (i.e. spawning new
process), not on clone(CLONE_VM) (i.e. spawning new thread).



Yeah, and unshare(2) does not implement the flag that reverses the 
effects of CLONE_VM.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-02 Thread Jiri Kosina
On Sun, 2 Sep 2012, Xin Tong wrote:

> >> 3. can two different processes have their CR3 being the same value
> >> even though they have different first level page tables ?
> >
> > Yes, if they are created by clone(CLONE_VM). In such case they share the
> > same mm_struct, and therefore mm_struct->pgd (which is exactly what is
> > loaded into cr3 in switch_mm()) is the same.
> >
> 
> Is this the COW mechanism in linux. what if the cloned process need to
> have set of its own pages later. do the CR3s for the 2 processes
> become different at that point ?

That is a different story. COW is applied on fork() (i.e. spawning new 
process), not on clone(CLONE_VM) (i.e. spawning new thread).

-- 
Jiri Kosina
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-02 Thread Xin Tong
On Sun, Sep 2, 2012 at 1:10 AM, Jiri Kosina  wrote:
> On Sat, 1 Sep 2012, Xin Tong wrote:
>
>> When a process is created in Linux, corresponding page table is
>> implemented. In the current x86 linux, the page table is a multi-level
>> page table and CR3 points to the first level of the page table.  I
>> have 2 questions.
>>
>> 1. is the value in CR3 virtual address or physical address ?
>
> Physical, otherwise you will have chicken-egg problem.
>
>> 2. can the address of the first level of the page table during a
>> process's lifetime change ?
>
> In theory it would be possible to implement. But I don't see a scenario
> when it might be useful.
>
>> 3. can two different processes have their CR3 being the same value
>> even though they have different first level page tables ?
>
> Yes, if they are created by clone(CLONE_VM). In such case they share the
> same mm_struct, and therefore mm_struct->pgd (which is exactly what is
> loaded into cr3 in switch_mm()) is the same.
>

Is this the COW mechanism in linux. what if the cloned process need to
have set of its own pages later. do the CR3s for the 2 processes
become different at that point ?

> LKML is however very inappropriate list for such questions. Please ask on
> kernelnewbies list next time.
>

Thank you for letting me know. Will do next time.
> --
> Jiri Kosina
> SUSE Labs
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-02 Thread Jiri Kosina
On Sat, 1 Sep 2012, Xin Tong wrote:

> When a process is created in Linux, corresponding page table is
> implemented. In the current x86 linux, the page table is a multi-level
> page table and CR3 points to the first level of the page table.  I
> have 2 questions.
> 
> 1. is the value in CR3 virtual address or physical address ?

Physical, otherwise you will have chicken-egg problem.

> 2. can the address of the first level of the page table during a
> process's lifetime change ?

In theory it would be possible to implement. But I don't see a scenario 
when it might be useful.

> 3. can two different processes have their CR3 being the same value
> even though they have different first level page tables ?

Yes, if they are created by clone(CLONE_VM). In such case they share the 
same mm_struct, and therefore mm_struct->pgd (which is exactly what is 
loaded into cr3 in switch_mm()) is the same.

LKML is however very inappropriate list for such questions. Please ask on 
kernelnewbies list next time.

-- 
Jiri Kosina
SUSE Labs

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-02 Thread Jiri Kosina
On Sat, 1 Sep 2012, Xin Tong wrote:

 When a process is created in Linux, corresponding page table is
 implemented. In the current x86 linux, the page table is a multi-level
 page table and CR3 points to the first level of the page table.  I
 have 2 questions.
 
 1. is the value in CR3 virtual address or physical address ?

Physical, otherwise you will have chicken-egg problem.

 2. can the address of the first level of the page table during a
 process's lifetime change ?

In theory it would be possible to implement. But I don't see a scenario 
when it might be useful.

 3. can two different processes have their CR3 being the same value
 even though they have different first level page tables ?

Yes, if they are created by clone(CLONE_VM). In such case they share the 
same mm_struct, and therefore mm_struct-pgd (which is exactly what is 
loaded into cr3 in switch_mm()) is the same.

LKML is however very inappropriate list for such questions. Please ask on 
kernelnewbies list next time.

-- 
Jiri Kosina
SUSE Labs

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-02 Thread Xin Tong
On Sun, Sep 2, 2012 at 1:10 AM, Jiri Kosina jkos...@suse.cz wrote:
 On Sat, 1 Sep 2012, Xin Tong wrote:

 When a process is created in Linux, corresponding page table is
 implemented. In the current x86 linux, the page table is a multi-level
 page table and CR3 points to the first level of the page table.  I
 have 2 questions.

 1. is the value in CR3 virtual address or physical address ?

 Physical, otherwise you will have chicken-egg problem.

 2. can the address of the first level of the page table during a
 process's lifetime change ?

 In theory it would be possible to implement. But I don't see a scenario
 when it might be useful.

 3. can two different processes have their CR3 being the same value
 even though they have different first level page tables ?

 Yes, if they are created by clone(CLONE_VM). In such case they share the
 same mm_struct, and therefore mm_struct-pgd (which is exactly what is
 loaded into cr3 in switch_mm()) is the same.


Is this the COW mechanism in linux. what if the cloned process need to
have set of its own pages later. do the CR3s for the 2 processes
become different at that point ?

 LKML is however very inappropriate list for such questions. Please ask on
 kernelnewbies list next time.


Thank you for letting me know. Will do next time.
 --
 Jiri Kosina
 SUSE Labs

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-02 Thread Jiri Kosina
On Sun, 2 Sep 2012, Xin Tong wrote:

  3. can two different processes have their CR3 being the same value
  even though they have different first level page tables ?
 
  Yes, if they are created by clone(CLONE_VM). In such case they share the
  same mm_struct, and therefore mm_struct-pgd (which is exactly what is
  loaded into cr3 in switch_mm()) is the same.
 
 
 Is this the COW mechanism in linux. what if the cloned process need to
 have set of its own pages later. do the CR3s for the 2 processes
 become different at that point ?

That is a different story. COW is applied on fork() (i.e. spawning new 
process), not on clone(CLONE_VM) (i.e. spawning new thread).

-- 
Jiri Kosina
SUSE Labs
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-01 Thread Xin Tong
On Sat, Sep 1, 2012 at 1:01 PM, Shentino  wrote:
> On Sat, Sep 1, 2012 at 12:30 PM, Xin Tong  wrote:
>> When a process is created in Linux, corresponding page table is
>> implemented. In the current x86 linux, the page table is a multi-level
>> page table and CR3 points to the first level of the page table.  I
>> have 2 questions.
>>
>> 1. is the value in CR3 virtual address or physical address ?
>
> It's a physical address.  It points the CPU to it in physical memory.
>
> More generally, all addresses in page tables, directories, etc are
> physical addresses.
>
>> 2. can the address of the first level of the page table during a
>> process's lifetime change ?
>
> This I don't know.
>
>> 3. can two different processes have their CR3 being the same value
>> even though they have different first level page tables ?
>
> I'm not sure about this, but I think CR3 is actually bound to the
> mm_struct and not the process.
>
> Think about separate processes with the same address space, such as
> multithreaded processes.

Do not all the threads share the same address space in a multithreaded program ?

Xin

>
>>
>> Thanks
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-01 Thread Shentino
On Sat, Sep 1, 2012 at 12:30 PM, Xin Tong  wrote:
> When a process is created in Linux, corresponding page table is
> implemented. In the current x86 linux, the page table is a multi-level
> page table and CR3 points to the first level of the page table.  I
> have 2 questions.
>
> 1. is the value in CR3 virtual address or physical address ?

It's a physical address.  It points the CPU to it in physical memory.

More generally, all addresses in page tables, directories, etc are
physical addresses.

> 2. can the address of the first level of the page table during a
> process's lifetime change ?

This I don't know.

> 3. can two different processes have their CR3 being the same value
> even though they have different first level page tables ?

I'm not sure about this, but I think CR3 is actually bound to the
mm_struct and not the process.

Think about separate processes with the same address space, such as
multithreaded processes.

>
> Thanks
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux page table

2012-09-01 Thread Xin Tong
When a process is created in Linux, corresponding page table is
implemented. In the current x86 linux, the page table is a multi-level
page table and CR3 points to the first level of the page table.  I
have 2 questions.

1. is the value in CR3 virtual address or physical address ?
2. can the address of the first level of the page table during a
process's lifetime change ?
3. can two different processes have their CR3 being the same value
even though they have different first level page tables ?

Thanks
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux page table

2012-09-01 Thread Xin Tong
When a process is created in Linux, corresponding page table is
implemented. In the current x86 linux, the page table is a multi-level
page table and CR3 points to the first level of the page table.  I
have 2 questions.

1. is the value in CR3 virtual address or physical address ?
2. can the address of the first level of the page table during a
process's lifetime change ?
3. can two different processes have their CR3 being the same value
even though they have different first level page tables ?

Thanks
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-01 Thread Shentino
On Sat, Sep 1, 2012 at 12:30 PM, Xin Tong xerox.time.t...@gmail.com wrote:
 When a process is created in Linux, corresponding page table is
 implemented. In the current x86 linux, the page table is a multi-level
 page table and CR3 points to the first level of the page table.  I
 have 2 questions.

 1. is the value in CR3 virtual address or physical address ?

It's a physical address.  It points the CPU to it in physical memory.

More generally, all addresses in page tables, directories, etc are
physical addresses.

 2. can the address of the first level of the page table during a
 process's lifetime change ?

This I don't know.

 3. can two different processes have their CR3 being the same value
 even though they have different first level page tables ?

I'm not sure about this, but I think CR3 is actually bound to the
mm_struct and not the process.

Think about separate processes with the same address space, such as
multithreaded processes.


 Thanks
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: linux page table

2012-09-01 Thread Xin Tong
On Sat, Sep 1, 2012 at 1:01 PM, Shentino shent...@gmail.com wrote:
 On Sat, Sep 1, 2012 at 12:30 PM, Xin Tong xerox.time.t...@gmail.com wrote:
 When a process is created in Linux, corresponding page table is
 implemented. In the current x86 linux, the page table is a multi-level
 page table and CR3 points to the first level of the page table.  I
 have 2 questions.

 1. is the value in CR3 virtual address or physical address ?

 It's a physical address.  It points the CPU to it in physical memory.

 More generally, all addresses in page tables, directories, etc are
 physical addresses.

 2. can the address of the first level of the page table during a
 process's lifetime change ?

 This I don't know.

 3. can two different processes have their CR3 being the same value
 even though they have different first level page tables ?

 I'm not sure about this, but I think CR3 is actually bound to the
 mm_struct and not the process.

 Think about separate processes with the same address space, such as
 multithreaded processes.

Do not all the threads share the same address space in a multithreaded program ?

Xin



 Thanks
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/