The branch main has been updated by andrew:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=2ccbf06c0285ca1c06681e7212da8e7d1e87fe19

commit 2ccbf06c0285ca1c06681e7212da8e7d1e87fe19
Author:     Sarah Walker <[email protected]>
AuthorDate: 2026-01-28 16:22:50 +0000
Commit:     Andrew Turner <[email protected]>
CommitDate: 2026-02-10 15:39:56 +0000

    arm64: Add MOPS implementations of memset(), memcpy() and memmove()
    
    Enable the use of MOPS implementations of memset, memcpy and memmove within
    the kernel. Fix pre-ifunc resolution uses of these functions.
    
    Reported by:    andrew
    Sponsored by:   Arm Ltd
    Differential Revision:  https://reviews.freebsd.org/D55051
---
 sys/arm64/arm64/identcpu.c     |  5 +++--
 sys/arm64/arm64/machdep.c      | 38 ++++++++++++++++++++++++++++++++++++--
 sys/arm64/arm64/machdep_boot.c |  2 +-
 sys/arm64/arm64/memcpy.S       | 24 ++++++++++++++++++++----
 sys/arm64/arm64/memset.S       | 12 ++++++++++--
 sys/arm64/arm64/pmap.c         | 10 +++++-----
 sys/arm64/include/cpu.h        |  7 ++++++-
 7 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index 91078a411b88..e2f09fcb7f52 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -2675,14 +2675,15 @@ update_special_regs(u_int cpu)
 
        if (cpu == 0) {
                /* Create a user visible cpu description with safe values */
-               memset(&user_cpu_desc, 0, sizeof(user_cpu_desc));
+               memset_early(&user_cpu_desc, 0, sizeof(user_cpu_desc));
                /* Safe values for these registers */
                user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_AdvSIMD_NONE |
                    ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 |
                    ID_AA64PFR0_EL0_64;
                user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DebugVer_8;
                /* Create the Linux user visible cpu description */
-               memcpy(&l_user_cpu_desc, &user_cpu_desc, sizeof(user_cpu_desc));
+               memcpy_early(&l_user_cpu_desc, &user_cpu_desc,
+                   sizeof(user_cpu_desc));
        }
 
        desc = get_cpu_desc(cpu);
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 5e6a39381e84..ffe9acb0cfa4 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -80,6 +80,7 @@
 #include <machine/cpu_feat.h>
 #include <machine/debug_monitor.h>
 #include <machine/hypervisor.h>
+#include <machine/ifunc.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
@@ -807,6 +808,9 @@ initarm(struct arm64_bootparams *abp)
 
        update_special_regs(0);
 
+       sched_instance_select();
+       link_elf_ireloc();
+
        /* Set the pcpu data, this is needed by pmap_bootstrap */
        pcpup = &pcpu0;
        pcpu_init(pcpup, 0, sizeof(struct pcpu));
@@ -823,8 +827,6 @@ initarm(struct arm64_bootparams *abp)
        PCPU_SET(curthread, &thread0);
        PCPU_SET(midr, get_midr());
 
-       sched_instance_select();
-       link_elf_ireloc();
 #ifdef FDT
        try_load_dtb();
 #endif
@@ -1076,3 +1078,35 @@ DB_SHOW_COMMAND(vtop, db_show_vtop)
                db_printf("show vtop <virt_addr>\n");
 }
 #endif
+
+#undef memset
+#undef memmove
+#undef memcpy
+
+void   *memset_std(void *buf, int c, size_t len);
+void   *memset_mops(void *buf, int c, size_t len);
+void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
+           size_t len);
+void    *memmove_mops(void * _Nonnull dst, const void * _Nonnull src,
+           size_t len);
+void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
+           size_t len);
+void    *memcpy_mops(void * _Nonnull dst, const void * _Nonnull src,
+           size_t len);
+
+DEFINE_IFUNC(, void *, memset, (void *, int, size_t))
+{
+       return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memset_mops : memset_std);
+}
+
+DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
+    size_t))
+{
+       return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memmove_mops : memmove_std);
+}
+
+DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,
+    size_t))
+{
+       return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memcpy_mops : memcpy_std);
+}
diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c
index 1c5e8189e436..0ccfd1b67a39 100644
--- a/sys/arm64/arm64/machdep_boot.c
+++ b/sys/arm64/arm64/machdep_boot.c
@@ -115,7 +115,7 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size)
                PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_DTBP);
                PRELOAD_PUSH_VALUE(uint32_t, sizeof(uint64_t));
                PRELOAD_PUSH_VALUE(uint64_t, (uint64_t)lastaddr);
-               memmove((void *)lastaddr, dtb_ptr, dtb_size);
+               memmove_early((void *)lastaddr, dtb_ptr, dtb_size);
                lastaddr += dtb_size;
                lastaddr = roundup(lastaddr, sizeof(int));
        }
diff --git a/sys/arm64/arm64/memcpy.S b/sys/arm64/arm64/memcpy.S
index 01daa8e1c228..3c408d2836aa 100644
--- a/sys/arm64/arm64/memcpy.S
+++ b/sys/arm64/arm64/memcpy.S
@@ -57,8 +57,8 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-EENTRY(memmove)
-ENTRY(memcpy)
+EENTRY(memmove_std)
+ENTRY(memcpy_std)
        add     srcend, src, count
        add     dstend, dstin, count
        cmp     count, 128
@@ -239,7 +239,23 @@ L(copy64_from_start):
        stp     B_l, B_h, [dstin, 16]
        stp     C_l, C_h, [dstin]
        ret
-END(memcpy)
-EEND(memmove)
+END(memcpy_std)
+EEND(memmove_std)
+
+ENTRY(memcpy_mops)
+       mov     x3, x0
+       .inst   0x19010443      /* cpyfp   [x3]!, [x1]!, x2!  */
+       .inst   0x19410443      /* cpyfm   [x3]!, [x1]!, x2!  */
+       .inst   0x19810443      /* cpyfe   [x3]!, [x1]!, x2!  */
+       ret
+END(memcpy_mops)
+
+ENTRY(memmove_mops)
+       mov     x3, x0
+       .inst   0x1d010443      /* cpyp    [x3]!, [x1]!, x2!  */
+       .inst   0x1d410443      /* cpym    [x3]!, [x1]!, x2!  */
+       .inst   0x1d810443      /* cpye    [x3]!, [x1]!, x2!  */
+       ret
+END(memmove_mops)
 
 GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
diff --git a/sys/arm64/arm64/memset.S b/sys/arm64/arm64/memset.S
index f52bfd62cc54..f226e8de1e95 100644
--- a/sys/arm64/arm64/memset.S
+++ b/sys/arm64/arm64/memset.S
@@ -51,7 +51,7 @@
 #define dst            x8
 #define tmp3w          w9
 
-ENTRY(memset)
+ENTRY(memset_std)
 
        mov     dst, dstin              /* Preserve return value.  */
        ands    A_lw, val, #255
@@ -196,6 +196,14 @@ ENTRY(memset)
        ands    count, count, zva_bits_x
        b.ne    .Ltail_maybe_long
        ret
-END(memset)
+END(memset_std)
+
+ENTRY(memset_mops)
+       mov     x3, x0
+       .inst   0x19c10443      /* setp    [x3]!, x2!, x1  */
+       .inst   0x19c14443      /* setm    [x3]!, x2!, x1  */
+       .inst   0x19c18443      /* sete    [x3]!, x2!, x1  */
+       ret
+END(memset_mops)
 
 GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 680209efd881..e865569ac377 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1015,7 +1015,7 @@ pmap_bootstrap_l0_table(struct pmap_bootstrap_state 
*state)
 
                /* Create a new L0 table entry */
                state->l1 = (pt_entry_t *)state->freemempos;
-               memset(state->l1, 0, PAGE_SIZE);
+               memset_early(state->l1, 0, PAGE_SIZE);
                state->freemempos += PAGE_SIZE;
 
                l1_pa = pmap_early_vtophys((vm_offset_t)state->l1);
@@ -1063,7 +1063,7 @@ pmap_bootstrap_l1_table(struct pmap_bootstrap_state 
*state)
 
                /* Create a new L1 table entry */
                state->l2 = (pt_entry_t *)state->freemempos;
-               memset(state->l2, 0, PAGE_SIZE);
+               memset_early(state->l2, 0, PAGE_SIZE);
                state->freemempos += PAGE_SIZE;
 
                l2_pa = pmap_early_vtophys((vm_offset_t)state->l2);
@@ -1107,7 +1107,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state 
*state)
 
                /* Create a new L2 table entry */
                state->l3 = (pt_entry_t *)state->freemempos;
-               memset(state->l3, 0, PAGE_SIZE);
+               memset_early(state->l3, 0, PAGE_SIZE);
                state->freemempos += PAGE_SIZE;
 
                l3_pa = pmap_early_vtophys((vm_offset_t)state->l3);
@@ -1406,7 +1406,7 @@ pmap_bootstrap(void)
 #define alloc_pages(var, np)                                           \
        (var) = bs_state.freemempos;                                    \
        bs_state.freemempos += (np * PAGE_SIZE);                        \
-       memset((char *)(var), 0, ((np) * PAGE_SIZE));
+       memset_early((char *)(var), 0, ((np) * PAGE_SIZE));
 
        /* Allocate dynamic per-cpu area. */
        alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
@@ -1444,7 +1444,7 @@ pmap_bootstrap_allocate_san_l2(vm_paddr_t start_pa, 
vm_paddr_t end_pa,
                        continue;
                }
 
-               bzero((void *)PHYS_TO_DMAP(pa), L2_SIZE);
+               bzero_early((void *)PHYS_TO_DMAP(pa), L2_SIZE);
                physmem_exclude_region(pa, L2_SIZE, EXFLAG_NOALLOC);
                pmap_store(l2, PHYS_TO_PTE(pa) | PMAP_SAN_PTE_BITS | L2_BLOCK);
        }
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 9f1db23744d4..05844ad63036 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -328,7 +328,12 @@ ADDRESS_TRANSLATE_FUNC(s1e1r)
 ADDRESS_TRANSLATE_FUNC(s1e1w)
 
 #endif /* !__ASSEMBLER__ */
-#endif
+
+#define MEMSET_EARLY_FUNC      memset_std
+#define MEMCPY_EARLY_FUNC      memcpy_std
+#define MEMMOVE_EARLY_FUNC     memmove_std
+
+#endif /* _KERNEL */
 
 #endif /* !_MACHINE_CPU_H_ */
 

Reply via email to