On 11/21/25 8:55 PM, Tom Rini wrote:

Hello Tom,

Synchronize local copy of DTC with Linux 6.17 , using commits picked
from Linux kernel. This also includes two fix up patches to make the
DM core work with new 8-byte alignment checking in libfdt and another
fix for NULL pointer check that is missing in libfdt.

This depends on the following patches sent separately, which fix
various 8-byte alignment problems in the code base:

  - boot: android: Always use 8-byte aligned DT with libfdt
  - test/py: android: Point fdt command to aligned addresses
  - test/py: Use aligned address for overlays in 'extension' test
  - sandbox: Fix DT compiler address warnings in sandbox DTs
  - sandbox: Fix DT compiler pin warnings in sandbox DTs
  - boot: Assure FDT is always at 8-byte aligned address
  - arm: qemu: Eliminate fdt_high and initrd_high misuse
  - efi_loader: Assure fitImage from capsule is used from 8-byte aligned address
  - MIPS: Assure end of U-Boot is at 8-byte aligned offset

So, taking a look at the test branch you pointed me at, my big concern
is size growth. On imx8mp_dhcom_drc02 (where we're already LTO'ing),
with the CI gcc-14.2.0 toolchain full U-Boot grows by more than 6KiB and
SPL by a bit more than 2KiB. This is a bit of a worst-case, imx8mp_navqp
is a bit more than 3KiB / 548 bytes, with the average feeling like
~4KiB/1KiB for aarch64.

I'm coming back to this to try and better understand things. And one
problem here is that upstream dtc changes really trip up LTO. I made as
a local hack, a change for imx8mp_dhcom_pdk2 to NOT use LTO (and so SPL
fails to link, but it's about the same growth, given the change in
overflows sram by numbers). This brought the size change down from ~6KiB
to ~4KiB.  Since this was already a hack just for investigation, I then
started out with giving full U-Boot the "assume perfect dtb" mask. This
reduces growth by 900 bytes.

A better test case is pinephone because it's aarch64 but not LTO. And
with a full mask in U-Boot hack, the size growth for full U-Boot is
around 1000 bytes and 300 bytes in SPL.

And so to me, there's a few questions. The first of which is, how is
what's being done so terrible for LTO. It's not good for normal
optimizations either, but it's really bad with LTO. The second of which
is, is there something being done with how the sanity checks are
performed that can be re-examined? Take fdt_get_string for example,
which grows by 120 bytes without changing the mask at all, and the code
changes are trivial switches to the new FDT_ASSUME mechanic and dropping
extra parens. That shouldn't have size growth, I would expect.

Does this patch below make your size problem go away ?

"
diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h
index d7cf74722b2..7f8bb05c545 100644
--- a/scripts/dtc/libfdt/libfdt.h
+++ b/scripts/dtc/libfdt/libfdt.h
@@ -140,12 +140,7 @@ static inline uint16_t fdt16_ld(const fdt16_t *p)

 static inline uint32_t fdt32_ld(const fdt32_t *p)
 {
-       const uint8_t *bp = (const uint8_t *)p;
-
-       return ((uint32_t)bp[0] << 24)
-               | ((uint32_t)bp[1] << 16)
-               | ((uint32_t)bp[2] << 8)
-               | bp[3];
+       return fdt32_to_cpu(*p);
 }

 static inline void fdt32_st(void *property, uint32_t value)
@@ -160,16 +155,7 @@ static inline void fdt32_st(void *property, uint32_t value)

 static inline uint64_t fdt64_ld(const fdt64_t *p)
 {
-       const uint8_t *bp = (const uint8_t *)p;
-
-       return ((uint64_t)bp[0] << 56)
-               | ((uint64_t)bp[1] << 48)
-               | ((uint64_t)bp[2] << 40)
-               | ((uint64_t)bp[3] << 32)
-               | ((uint64_t)bp[4] << 24)
-               | ((uint64_t)bp[5] << 16)
-               | ((uint64_t)bp[6] << 8)
-               | bp[7];
+       return fdt64_to_cpu(*p);
 }

 static inline void fdt64_st(void *property, uint64_t value)
"

You can see the difference caused by these new unaligned-access functions in the disassembly (objdump -lSD) of u-boot with (top) and without (bottom) the DTC 1.7.2 patch:

"
00000000402984e0 <fdt_get_string>:
fdt_get_string():
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:35
{
    402984e0:   a9be7bfd        stp     x29, x30, [sp, #-32]!
    402984e4:   aa0003e3        mov     x3, x0
    402984e8:   2a0103e4        mov     w4, w1
    402984ec:   910003fd        mov     x29, sp
    402984f0:   a90153f3        stp     x19, x20, [sp, #16]
    402984f4:   aa0203f4        mov     x20, x2
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:49
        totalsize = fdt_ro_probe_(fdt);
    402984f8:   97fffe4b        bl      40297e24 <fdt_ro_probe_>
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:51
        if (totalsize < 0)
402984fc: 37f80a40 tbnz w0, #31, 40298644 <fdt_get_string+0x164>
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv #1
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:55
        absoffset = stroffset + fdt_off_dt_strings(fdt);
    40298500:   39403061        ldrb    w1, [x3, #12]
    40298504:   39403462        ldrb    w2, [x3, #13]
    40298508:   39403c73        ldrb    w19, [x3, #15]
    4029850c:   aa022022        orr     x2, x1, x2, lsl #8
    40298510:   39403861        ldrb    w1, [x3, #14]
    40298514:   aa014041        orr     x1, x2, x1, lsl #16
    40298518:   aa136033        orr     x19, x1, x19, lsl #24
    4029851c:   5ac00a73        rev     w19, w19
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #1
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:55 (discriminator 1)
    40298520:   0b130093        add     w19, w4, w19
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:56
        if (absoffset >= (unsigned)totalsize)
    40298524:   6b13001f        cmp     w0, w19
40298528: 54000969 b.ls 40298654 <fdt_get_string+0x174> // b.plast
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:58
        len = totalsize - absoffset;
    4029852c:   39400062        ldrb    w2, [x3]
    40298530:   4b130000        sub     w0, w0, w19
/tmp/dtc-yes/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:60
        if (fdt_magic(fdt) == FDT_MAGIC) {
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv #2
    40298534:   39400461        ldrb    w1, [x3, #1]
    40298538:   aa012041        orr     x1, x2, x1, lsl #8
    4029853c:   39400862        ldrb    w2, [x3, #2]
    40298540:   aa024022        orr     x2, x1, x2, lsl #16
    40298544:   39400c61        ldrb    w1, [x3, #3]
    40298548:   aa016041        orr     x1, x2, x1, lsl #24
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #2
"

"
0000000040297804 <fdt_get_string>:
fdt_get_string():
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:35
{
    40297804:   a9bd7bfd        stp     x29, x30, [sp, #-48]!
    40297808:   910003fd        mov     x29, sp
    4029780c:   a90153f3        stp     x19, x20, [sp, #16]
    40297810:   2a0103f4        mov     w20, w1
    40297814:   a9025bf5        stp     x21, x22, [sp, #32]
    40297818:   aa0003f6        mov     x22, x0
    4029781c:   aa0203f5        mov     x21, x2
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:49
        totalsize = fdt_ro_probe_(fdt);
    40297820:   97fff833        bl      402958ec <fdt_ro_probe_>
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:51
        if (totalsize < 0)
40297824: 37f806a0 tbnz w0, #31, 402978f8 <fdt_get_string+0xf4>
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:55
        absoffset = stroffset + fdt_off_dt_strings(fdt);
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv #1
    40297828:   b9400ed3        ldr     w19, [x22, #12]
    4029782c:   5ac00a73        rev     w19, w19
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #1
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:55 (discriminator 6)
    40297830:   0b130293        add     w19, w20, w19
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:56
        if (absoffset >= (unsigned)totalsize)
    40297834:   6b13001f        cmp     w0, w19
40297838: 54000689 b.ls 40297908 <fdt_get_string+0x104> // b.plast
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:58
        len = totalsize - absoffset;
    4029783c:   b94002c1        ldr     w1, [x22]
/tmp/dtc-no/lib/libfdt/../../scripts/dtc/libfdt/fdt_ro.c:60 (discriminator 6)
        if (fdt_magic(fdt) == FDT_MAGIC) {
vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv #2
40297840: 5281ba02 mov w2, #0xdd0 // #3536
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #2
"

But the question now is, how should we handle this ? These unaligned access functions are there for a reason.
diff --git a/scripts/dtc/libfdt/libfdt.h b/scripts/dtc/libfdt/libfdt.h
index d7cf74722b2..7f8bb05c545 100644
--- a/scripts/dtc/libfdt/libfdt.h
+++ b/scripts/dtc/libfdt/libfdt.h
@@ -140,12 +140,7 @@ static inline uint16_t fdt16_ld(const fdt16_t *p)
 
 static inline uint32_t fdt32_ld(const fdt32_t *p)
 {
-	const uint8_t *bp = (const uint8_t *)p;
-
-	return ((uint32_t)bp[0] << 24)
-		| ((uint32_t)bp[1] << 16)
-		| ((uint32_t)bp[2] << 8)
-		| bp[3];
+	return fdt32_to_cpu(*p);
 }
 
 static inline void fdt32_st(void *property, uint32_t value)
@@ -160,16 +155,7 @@ static inline void fdt32_st(void *property, uint32_t value)
 
 static inline uint64_t fdt64_ld(const fdt64_t *p)
 {
-	const uint8_t *bp = (const uint8_t *)p;
-
-	return ((uint64_t)bp[0] << 56)
-		| ((uint64_t)bp[1] << 48)
-		| ((uint64_t)bp[2] << 40)
-		| ((uint64_t)bp[3] << 32)
-		| ((uint64_t)bp[4] << 24)
-		| ((uint64_t)bp[5] << 16)
-		| ((uint64_t)bp[6] << 8)
-		| bp[7];
+	return fdt64_to_cpu(*p);
 }
 
 static inline void fdt64_st(void *property, uint64_t value)

Reply via email to