On 2026-01-15 at 05:19 +1100, Jan Kara <[email protected]> wrote...
> On Wed 14-01-26 17:49:30, Seunguk Shin wrote:
> > Trying to convert zero or empty xarray entry causes kernel panic.
> > 
> > [    0.737679] EXT4-fs (pmem0p1): mounted filesystem
> > 79676804-7c8b-491a-b2a6-9bae3c72af70 ro with ordered data mode. Quota mode:
> > disabled.
> > [    0.737891] VFS: Mounted root (ext4 filesystem) readonly on device 259:1.
> > [    0.739119] devtmpfs: mounted
> > [    0.739476] Freeing unused kernel memory: 1920K
> > [    0.740156] Run /sbin/init as init process
> > [    0.740229]   with arguments:
> > [    0.740286]     /sbin/init
> > [    0.740321]   with environment:
> > [    0.740369]     HOME=/
> > [    0.740400]     TERM=linux
> > [    0.743162] Unable to handle kernel paging request at virtual address
> > fffffdffbf000008
> > [    0.743285] Mem abort info:
> > [    0.743316]   ESR = 0x0000000096000006
> > [    0.743371]   EC = 0x25: DABT (current EL), IL = 32 bits
> > [    0.743444]   SET = 0, FnV = 0
> > [    0.743489]   EA = 0, S1PTW = 0
> > [    0.743545]   FSC = 0x06: level 2 translation fault
> > [    0.743610] Data abort info:
> > [    0.743656]   ISV = 0, ISS = 0x00000006, ISS2 = 0x00000000
> > [    0.743720]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> > [    0.743785]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> > [    0.743848] swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000b9d17000
> > [    0.743931] [fffffdffbf000008] pgd=10000000bfa3d403,
> > p4d=10000000bfa3d403, pud=1000000040bfe403, pmd=0000000000000000
> > [    0.744070] Internal error: Oops: 0000000096000006 [#1]  SMP
> > [    0.748888] CPU: 0 UID: 0 PID: 1 Comm: init Not tainted 6.18.4 #1 NONE
> > [    0.749421] pstate: 004000c5 (nzcv daIF +PAN -UAO -TCO -DIT -SSBS
> > BTYPE=--)
> > [    0.749969] pc : dax_disassociate_entry.constprop.0+0x20/0x50
> > [    0.750444] lr : dax_insert_entry+0xcc/0x408
> > [    0.750802] sp : ffff80008000b9e0
> > [    0.751083] x29: ffff80008000b9e0 x28: 0000000000000000 x27:
> > 0000000000000000
> > [    0.751682] x26: 0000000001963d01 x25: ffff0000004f7d90 x24:
> > 0000000000000000
> > [    0.752264] x23: 0000000000000000 x22: ffff80008000bcc8 x21:
> > 0000000000000011
> > [    0.752836] x20: ffff80008000ba90 x19: 0000000001963d01 x18:
> > 0000000000000000
> > [    0.753407] x17: 0000000000000000 x16: 0000000000000000 x15:
> > 0000000000000000
> > [    0.753970] x14: ffffbf3154b9ae70 x13: 0000000000000000 x12:
> > ffffbf3154b9ae70
> > [    0.754548] x11: ffffffffffffffff x10: 0000000000000000 x9 :
> > 0000000000000000
> > [    0.755122] x8 : 000000000000000d x7 : 000000000000001f x6 :
> > 0000000000000000
> > [    0.755707] x5 : 0000000000000000 x4 : 0000000000000000 x3 :
> > fffffdffc0000000
> > [    0.756287] x2 : 0000000000000008 x1 : 0000000040000000 x0 :
> > fffffdffbf000000
> > [    0.756871] Call trace:
> > [    0.757107]  dax_disassociate_entry.constprop.0+0x20/0x50 (P)
> > [    0.757592]  dax_iomap_pte_fault+0x4fc/0x808
> > [    0.757951]  dax_iomap_fault+0x28/0x30
> > [    0.758258]  ext4_dax_huge_fault+0x80/0x2dc
> > [    0.758594]  ext4_dax_fault+0x10/0x3c
> > [    0.758892]  __do_fault+0x38/0x12c
> > [    0.759175]  __handle_mm_fault+0x530/0xcf0
> > [    0.759518]  handle_mm_fault+0xe4/0x230
> > [    0.759833]  do_page_fault+0x17c/0x4dc
> > [    0.760144]  do_translation_fault+0x30/0x38
> > [    0.760483]  do_mem_abort+0x40/0x8c
> > [    0.760771]  el0_ia+0x4c/0x170
> > [    0.761032]  el0t_64_sync_handler+0xd8/0xdc
> > [    0.761371]  el0t_64_sync+0x168/0x16c
> > [    0.761677] Code: f9453021 f2dfbfe3 cb813080 8b001860 (f9400401)
> > [    0.762168] ---[ end trace 0000000000000000 ]---
> > [    0.762550] note: init[1] exited with irqs disabled
> > [    0.762631] Kernel panic - not syncing: Attempted to kill init!
> > exitcode=0x0000000b
> > 
> > This patch just reorders checking and converting.
> > 
> > Signed-off-by: Seunguk Shin <[email protected]>
> 
> I think this was introduced by Alistair's patches (added to CC) and as such
> we should add:
> 
> Fixes: 38607c62b34b ("fs/dax: properly refcount fs dax pages")

Ack, thanks for fixing this. I think I see what happened - prior to this change
these three functions called pfn_folio in a loop using for_each_mapped_pfn()
which filtered out empty/zero entries:

static unsigned long dax_end_pfn(void *entry)
{
        return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
}

/*
 * Iterate through all mapped pfns represented by an entry, i.e. skip
 * 'empty' and 'zero' entries.
 */
#define for_each_mapped_pfn(entry, pfn) \
        for (pfn = dax_to_pfn(entry); \
                        pfn < dax_end_pfn(entry); pfn++)

That function did what it says on the tin, but it's subtle because it relied on
dax_end_pfn() for a zero/empty entry returning zero. Obviously I missed that in
the conversion but the fix looks good to me so:

Reviewed-by: Alistair Popple <[email protected]>

> 
> tag here. Otherwise the fix looks good to me. Feel free to add:
> 
> Reviewed-by: Jan Kara <[email protected]>
> 
>                                                               Honza
> 
> > ---
> >  fs/dax.c | 9 ++++++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 289e6254aa30..de316be2cc4e 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -443,11 +443,12 @@ static void dax_associate_entry(void *entry, struct
> > address_space *mapping,
> >                                 unsigned long address, bool shared)
> >  {
> >         unsigned long size = dax_entry_size(entry), index;
> > -       struct folio *folio = dax_to_folio(entry);
> > +       struct folio *folio;
> > 
> >         if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
> >                 return;
> > 
> > +       folio = dax_to_folio(entry);
> >         index = linear_page_index(vma, address & ~(size - 1));
> >         if (shared && (folio->mapping || dax_folio_is_shared(folio))) {
> >                 if (folio->mapping)
> > @@ -468,21 +469,23 @@ static void dax_associate_entry(void *entry, struct
> > address_space *mapping,
> >  static void dax_disassociate_entry(void *entry, struct address_space
> > *mapping,
> >                                 bool trunc)
> >  {
> > -       struct folio *folio = dax_to_folio(entry);
> > +       struct folio *folio;
> > 
> >         if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
> >                 return;
> > 
> > +       folio = dax_to_folio(entry);
> >         dax_folio_put(folio);
> >  }
> > 
> >  static struct page *dax_busy_page(void *entry)
> >  {
> > -       struct folio *folio = dax_to_folio(entry);
> > +       struct folio *folio;
> > 
> >         if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
> >                 return NULL;
> > 
> > +       folio = dax_to_folio(entry);
> >         if (folio_ref_count(folio) - folio_mapcount(folio))
> >                 return &folio->page;
> >         else
> > --
> > 2.34.1
> > 
> -- 
> Jan Kara <[email protected]>
> SUSE Labs, CR

Reply via email to