On Sat, Mar 30, 2013 at 04:15:54PM +0100, Francesco Lavra wrote: > On 03/24/2013 06:01 PM, Leif Lindholm wrote: > [...] > > === added file 'grub-core/kern/arm/cache.S' > > --- grub-core/kern/arm/cache.S 1970-01-01 00:00:00 +0000 > > +++ grub-core/kern/arm/cache.S 2013-03-24 12:56:20 +0000 > > @@ -0,0 +1,251 @@ > > +/* > > + * GRUB -- GRand Unified Bootloader > > + * Copyright (C) 2013 Free Software Foundation, Inc. > > + * > > + * GRUB is free software: you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License as published by > > + * the Free Software Foundation, either version 3 of the License, or > > + * (at your option) any later version. > > + * > > + * GRUB is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with GRUB. If not, see <http://www.gnu.org/licenses/>. > > + */ > > + > > +#include <grub/symbol.h> > > +#include <grub/dl.h> > > This include is not needed. > > > + > > + .file "cache.S" > > + .text > > + .syntax unified > > +#if !defined (__thumb2__) > > + .arm > > +#define ARM(x...) x > > +#define THUMB(x...) > > +#else > > + .thumb > > +#define THUMB(x...) x > > +#define ARM(x...) > > +#endif > > + > > + .align 2 > > + > > +/* > > + * Simple cache maintenance functions > > + */ > > + > > +@ r0 - *beg (inclusive) > > +@ r1 - *end (exclusive) > > +clean_dcache_range: > > + @ Clean data cache range for range to point-of-unification > > + ldr r2, dlinesz > > +1: cmp r0, r1 > > + bge 2f > > +#ifdef DEBUG > > + push {r0-r2, lr} > > + mov r1, r2 > > + mov r2, r0 > > + ldr r0, =dcstr > > + bl EXT_C(grub_printf) > > + pop {r0-r2, lr} > > +#endif > > + mcr p15, 0, r0, c7, c11, 1 @ DCCMVAU > > + add r0, r0, r2 @ Next line > > + b 1b > > +2: dsb > > + bx lr > > If the start address (initial value of r0 in this function) is not > aligned to a cache line boundary, the last cache line in the range to be > cleaned might not be cleaned, depending on the value of r1 (the end > address). To handle correctly such cases, the initial value of r0 should > be aligned to the cache line boundary. ACK. Fixed, will be in next version.
> > + > > +@ r0 - *beg (inclusive) > > +@ r1 - *end (exclusive) > > +invalidate_icache_range: > > + @ Invalidate instruction cache for range to point-of-unification > > + ldr r2, ilinesz > > +1: cmp r0, r1 > > + bge 2f > > +#ifdef DEBUG > > + push {r0-r2, lr} > > + mov r1, r2 > > + mov r2, r0 > > + ldr r0, =icstr > > + bl EXT_C(grub_printf) > > + pop {r0-r2, lr} > > +#endif > > + mcr p15, 0, r0, c7, c5, 1 @ ICIMVAU > > + add r0, r0, r2 @ Next line > > + b 1b > > The same applies here: the initial value of r0 should be aligned to the > cache line boundary. And again. > > + @ Branch predictor invalidate all > > +2: mcr p15, 0, r0, c7, c5, 6 @ BPIALL > > + dsb > > + isb > > + bx lr > > + > > +@void __wrap___clear_cache(char *beg, char *end); > > +FUNCTION(__wrap___clear_cache) > > + dmb > > + dsb > > + push {r4-r6, lr} > > + ldr r2, probed @ If first call, probe cache sizes > > + cmp r2, #0 > > If the -mimplicit-it=thumb assembler option is removed from patch 1/7 > (as I think it should), then here goes the instruction: > > it eq > > > + bleq probe_caches @ This call corrupts r3 > > + mov r4, r0 > > + mov r5, r1 > > + bl clean_dcache_range > > + mov r0, r4 > > + mov r1, r5 > > + bl invalidate_icache_range > > + pop {r4-r6, pc} > > + > > +probe_caches: > > + push {r4-r6, lr} > > + mrc p15, 0, r4, c0, c0, 1 @ Read Cache Type Register > > + mov r5, #1 > > + ubfx r6, r4, #16, #4 @ Extract min D-cache num word log2 > > + add r6, r6, #2 @ words->bytes > > + lsl r6, r5, r6 @ Convert to num bytes > > + ldr r3, =dlinesz > > + str r6, [r3] > > + and r6, r4, #0xf @ Extract min I-cache num word log2 > > + add r6, r6, #2 @ words->bytes > > + lsl r6, r5, r6 @ Convert to num bytes > > + ldr r3, =ilinesz > > + str r6, [r3] > > + ldr r3, =probed @ Flag cache probing done > > + str r5, [r3] > > + pop {r4-r6, pc} > > + > > +#ifdef DEBUG > > +dcstr: .asciz "cleaning %d bytes of D cache @ 0x%08x\n" > > +icstr: .asciz "invalidating %d bytes of I cache @ 0x%08x\n" > > +#endif > > + > > + .align 3 > > +probed: .long 0 > > +dlinesz: > > + .long 0 > > +ilinesz: > > + .long 0 > > + > > +@void grub_arch_sync_caches (void *address, grub_size_t len) > > +FUNCTION(grub_arch_sync_caches) > > + add r1, r0, r1 > > + b __wrap___clear_cache > > + > > + @ r0 - CLIDR > > + @ r1 - LoC > > + @ r2 - current level > > + @ r3 - num sets > > + @ r4 - num ways > > + @ r5 - current set > > + @ r6 - current way > > + @ r7 - line size > > + @ r8 - scratch > > + @ r9 - scratch > > + @ r10 - scratch > > + @ r11 - scratch > > +clean_invalidate_dcache: > > + push {r4-r12, lr} > > + mrc p15, 1, r0, c0, c0, 1 @ Read CLIDR > > + ubfx r1, r0, #24, #3 @ Extract LoC > > + > > + mov r2, #0 @ First level, L1 > > +2: and r8, r0, #7 @ cache type at current level > > + cmp r8, #2 > > + blt 5f @ instruction only, or none, skip level > > + > > + @ set current cache level/type (for CSSIDR read) > > + lsl r8, r2, #1 > > + mcr p15, 2, r8, c0, c0, 0 @ Write CSSELR (level, type: data/uni) > > + > > + @ read current cache information > > + mrc p15, 1, r8, c0, c0, 0 @ Read CSSIDR > > + ubfx r3, r8, #13, #14 @ Number of sets -1 > > + ubfx r4, r8, #3, #9 @ Number of ways -1 > > + and r7, r8, #7 @ log2(line size in words) - 2 > > + add r7, r7, #2 @ adjust > > + mov r8, #1 > > + lsl r7, r8, r7 @ -> line size in words > > + lsl r7, r7, #2 @ -> bytes > > + > > + @ set loop > > + mov r5, #0 @ current set = 0 > > +3: lsl r8, r2, #1 @ insert level > > + clz r9, r7 @ calculate set field offset > > + mov r10, #31 > > + sub r9, r10, r9 > > + lsl r10, r5, r9 > > + orr r8, r8, r10 @ insert set field > > + > > + @ way loop > > + @ calculate way field offset > > + mov r6, #0 @ current way = 0 > > + add r10, r4, #1 > > + clz r9, r10 @ r9 = way field offset > > + add r9, r9, #1 > > +4: lsl r10, r6, r9 > > + orr r11, r8, r10 @ insert way field > > + > > + @ clean line by set/way > > Nitpick: the comment should be "clean and invalidate line by set/way". Definitely, the current comment is incorrect. > [...] > > === added file 'grub-core/kern/arm/dl.c' > > --- grub-core/kern/arm/dl.c 1970-01-01 00:00:00 +0000 > > +++ grub-core/kern/arm/dl.c 2013-03-24 12:56:20 +0000 > [...] > > +/* > > + * R_ARM_THM_JUMP19 > > + * > > + * Relocate conditional Thumb (T32) B<c>.W > > + */ > > +grub_err_t > > +reloc_thm_jump19 (grub_uint16_t *addr, Elf32_Addr sym_addr) > > +{ > > + grub_int32_t offset; > > + grub_uint32_t insword, insmask; > > + > > + /* Extract instruction word in alignment-safe manner */ > > + insword = (*addr) << 16 | *(addr + 1); > > + insmask = 0xfbc0d800; > > Shouldn't it be 0xfbc0d000? Bit 11 of the second half-word corresponds > to field J2, and as such is part of the offset. Err, yes - certainly. > > + > > + /* Extract and sign extend offset */ > > + offset = ((insword >> 26) & 1) << 18 > > + | ((insword >> 11) & 1) << 17 > > + | ((insword >> 13) & 1) << 16 > > + | ((insword >> 16) & 0x3f) << 11 > > + | (insword & 0x7ff); > > + offset <<= 1; > > + if (offset & (1 << 19)) > > + offset -= (1 << 20); > > It looks to me like some shifts are wrong here. It should be: > > offset = ((insword >> 26) & 1) << 19 > | ((insword >> 11) & 1) << 18 > | ((insword >> 13) & 1) << 17 > | ((insword >> 16) & 0x3f) << 11 > | (insword & 0x7ff); > offset <<= 1; > if (offset & (1 << 20)) > offset -= (1 << 21); ACK. > > + > > + /* Adjust and re-truncate offset */ > > +#ifdef GRUB_UTIL > > + offset += sym_addr; > > +#else > > + offset += sym_addr - (grub_uint32_t) addr; > > +#endif > > + if ((offset > 1048574) || (offset < -1048576)) > > + { > > + return grub_error > > + (GRUB_ERR_OUT_OF_RANGE, N_("THM_JUMP19 Relocation out of range.")); > > + } > > + > > + offset >>= 1; > > + offset &= 0x7ffff; > > The mask should be 0xfffff. ACK. > > + > > + /* Reassemble instruction word and write back */ > > + insword &= insmask; > > + insword |= ((offset >> 18) & 1) << 26 > > + | ((offset >> 17) & 1) << 11 > > + | ((offset >> 16) & 1) << 13 > > + | ((offset >> 11) & 0x3f) << 16 > > + | (offset & 0x7ff); > > As above, it looks like some shifts are wrong. It should be: > > insword |= ((offset >> 19) & 1) << 26 > | ((offset >> 18) & 1) << 11 > | ((offset >> 17) & 1) << 13 > | ((offset >> 11) & 0x3f) << 16 > | (offset & 0x7ff); ACK. The scary thing is I have a GRUB happily running under UEFI, successfully loading a Linux kernel from MMC, using this code... Fixing for next version. > > + *addr = insword >> 16; > > + *(addr + 1) = insword & 0xffff; > > + return GRUB_ERR_NONE; > > +} > > > > > > > > /*********************************************************** > > * ARM (A32) relocations: * > > * * > > * ARM instructions are 32-bit in size and 32-bit aligned. * > > ***********************************************************/ > > > > /* > > * R_ARM_JUMP24 > > * > > * Relocate ARM (A32) B > > */ > > grub_err_t > > reloc_jump24 (grub_uint32_t *addr, Elf32_Addr sym_addr) > > { > > grub_uint32_t insword; > > grub_int32_t offset; > > > > insword = *addr; > > > > offset = (insword & 0x00ffffff) << 2; > > if (offset & 0x02000000) > > offset -= 0x04000000; > > #ifdef GRUB_UTIL > > offset += sym_addr; > > #else > > offset += sym_addr - (grub_uint32_t) addr; > > #endif > > > > insword &= 0xff000000; > > insword |= (offset >> 2) & 0x00ffffff; > > > > *addr = insword; > > > > return GRUB_ERR_NONE; > > } > > If sym_addr has its least significant bit set, it means it addresses a > Thumb instruction, and in this case an inter-working veneer must be used > to effect the transition from ARM to Thumb state. Implementing veneers > is probably overkill here, but I think in this case this function should > error out as the relocation is not supported, instead of silently > performing a wrong relocation. Yes, fair enough - I did it for R_THM_JUMP24, so just being lazy here. > [...] > > +/* > > + * do_relocations(): > > + * Iterate over all relocations in section, calling appropriate functions > > + * for patching. > > + */ > > +static grub_err_t > > +do_relocations (Elf_Shdr * relhdr, Elf_Ehdr * e, grub_dl_t mod) > > +{ > > + grub_dl_segment_t seg; > > + Elf_Rel *rel; > > + Elf_Sym *sym; > > + int i, entnum; > > + > > + entnum = relhdr->sh_size / sizeof (Elf_Rel); > > + > > + /* Find the target segment for this relocation section. */ > > + seg = find_segment (mod->segment, relhdr->sh_info); > > + if (!seg) > > + return grub_error (GRUB_ERR_EOF, N_("relocation segment not found")); > > + > > + rel = (Elf_Rel *) ((grub_addr_t) e + relhdr->sh_offset); > > + > > + /* Step through all relocations */ > > + for (i = 0, sym = mod->symtab; i < entnum; i++) > > + { > > + Elf_Addr *target, sym_addr; > > + int relsym, reltype; > > + grub_err_t retval; > > + > > + if (seg->size < rel[i].r_offset) > > + return grub_error (GRUB_ERR_BAD_MODULE, > > + "reloc offset is out of the segment"); > > + relsym = ELF_R_SYM (rel[i].r_info); > > + reltype = ELF_R_TYPE (rel[i].r_info); > > + target = (Elf_Word *) ((grub_addr_t) seg->addr + rel[i].r_offset); > > target is declared as Elf_Addr *, so the cast to Elf_Word * seems > inappropriate, even though they are practically the same type. > Since target can point at either a 32 bit word or a 16 bit half-word, > I'm wondering whether void * would be a more appropriate type here. void * makes sense to me. / Leif _______________________________________________ Grub-devel mailing list Grub-devel@gnu.org https://lists.gnu.org/mailman/listinfo/grub-devel