On Thu, Sep 08, 2016 at 06:32:05PM +0100, Ard Biesheuvel wrote: > The UEFI spec stipulates that unaligned accesses should be enabled > on CPUs that support them, which means all of them, given that we > no longer support pre-v7 ARM cores, and the AARCH64 bindings mandate > support for unaligned accesses unconditionally. > > This means that one should not assume that CopyMem () is safe to call > on regions that may be mapped using device attributes, which is the > case for the NOR flash. Since we have no control over the mappings when > running under the OS, and given that write accesses require device > mappings, we should not call CopyMem () in the read path either, but > use our own implementation that is guaranteed to take alignment into > account.
This is an important fix - but one comment (and a tiny bit of bikeshedding) below: > Contributed-under: TianoCore Contribution Agreement 1.0 > Signed-off-by: Ard Biesheuvel <[email protected]> > --- > ArmPlatformPkg/Drivers/NorFlashDxe/NorFlashDxe.c | 130 +++++++++++++++++++- > 1 file changed, 128 insertions(+), 2 deletions(-) > > diff --git a/ArmPlatformPkg/Drivers/NorFlashDxe/NorFlashDxe.c > b/ArmPlatformPkg/Drivers/NorFlashDxe/NorFlashDxe.c > index 3abbe5cb32bc..45d1f0c20d52 100644 > --- a/ArmPlatformPkg/Drivers/NorFlashDxe/NorFlashDxe.c > +++ b/ArmPlatformPkg/Drivers/NorFlashDxe/NorFlashDxe.c > @@ -744,6 +744,132 @@ NorFlashWriteBlocks ( > return Status; > } > > +/** > + Copy Length bytes from Source to Destination, using aligned accesses only Maybe a comment here clarifying that we're following CopyMem semantics (buffers may overlap) rather than memcpy semantics (buffers must not overlap)? > + > + @param DestinationBuffer The target of the copy request. > + @param SourceBuffer The place to copy from. > + @param Length The number of bytes to copy. > + > + @return Destination > + > +**/ > +STATIC > +VOID * > +AlignedCopyMem ( > + OUT VOID *DestinationBuffer, > + IN CONST VOID *SourceBuffer, > + IN UINTN Length > + ) > +{ > + UINT8 *Destination8; > + CONST UINT8 *Source8; > + UINT32 *Destination32; > + CONST UINT32 *Source32; > + UINT64 *Destination64; > + CONST UINT64 *Source64; Can we be sure the compiler won't occasionally do something "clever" unless the above are all pointer-to-volatile? > + UINTN Alignment; > + > + if ((((UINTN)DestinationBuffer & 0x7) == 0) && (((UINTN)SourceBuffer & > 0x7) == 0) && (Length >= 8)) { Could we have a macro in here to keep the line length down? Like BOTH_ALIGNED(x, y, z) (!(((UINTN)x & ((z >> 3) - 1)) | ((UINTN)y & ((z >> 3) - 1)))) called as BOTH_ALIGNED(DestinationBuffer, SourceBuffer, 64) BOTH_ALIGNED(DestinationBuffer, SourceBuffer, 32) > + if (SourceBuffer > DestinationBuffer) { > + Destination64 = (UINT64*)DestinationBuffer; > + Source64 = (CONST UINT64*)SourceBuffer; > + while (Length >= 8) { > + *(Destination64++) = *(Source64++); > + Length -= 8; > + } > + > + // Finish if there are still some bytes to copy > + Destination8 = (UINT8*)Destination64; > + Source8 = (CONST UINT8*)Source64; > + while (Length-- != 0) { > + *(Destination8++) = *(Source8++); > + } > + } else if (SourceBuffer < DestinationBuffer) { > + Destination64 = (UINT64*)((UINTN)DestinationBuffer + Length); > + Source64 = (CONST UINT64*)((UINTN)SourceBuffer + Length); > + > + // Destination64 and Source64 were aligned on a 64-bit boundary > + // but if length is not a multiple of 8 bytes then they won't be > + // anymore. > + > + Alignment = Length & 0x7; > + if (Alignment != 0) { > + Destination8 = (UINT8*)Destination64; > + Source8 = (CONST UINT8*)Source64; > + > + while (Alignment-- != 0) { > + *(--Destination8) = *(--Source8); > + --Length; > + } > + Destination64 = (UINT64*)Destination8; > + Source64 = (CONST UINT64*)Source8; > + } > + > + while (Length > 0) { > + *(--Destination64) = *(--Source64); > + Length -= 8; > + } > + } > + } else if ((((UINTN)DestinationBuffer & 0x3) == 0) && > (((UINTN)SourceBuffer & 0x3) == 0) && (Length >= 4)) { > + if (SourceBuffer > DestinationBuffer) { > + Destination32 = (UINT32*)DestinationBuffer; > + Source32 = (CONST UINT32*)SourceBuffer; > + while (Length >= 4) { > + *(Destination32++) = *(Source32++); > + Length -= 4; > + } > + > + // Finish if there are still some bytes to copy > + Destination8 = (UINT8*)Destination32; > + Source8 = (CONST UINT8*)Source32; > + while (Length-- != 0) { > + *(Destination8++) = *(Source8++); > + } > + } else if (SourceBuffer < DestinationBuffer) { > + Destination32 = (UINT32*)((UINTN)DestinationBuffer + Length); > + Source32 = (CONST UINT32*)((UINTN)SourceBuffer + Length); > + > + // Destination32 and Source32 were aligned on a 32-bit boundary > + // but if length is not a multiple of 4 bytes then they won't be > + // anymore. > + > + Alignment = Length & 0x3; > + if (Alignment != 0) { > + Destination8 = (UINT8*)Destination32; > + Source8 = (CONST UINT8*)Source32; > + > + while (Alignment-- != 0) { > + *(--Destination8) = *(--Source8); > + --Length; > + } > + Destination32 = (UINT32*)Destination8; > + Source32 = (CONST UINT32*)Source8; > + } > + > + while (Length > 0) { > + *(--Destination32) = *(--Source32); > + Length -= 4; > + } > + } > + } else { > + if (SourceBuffer > DestinationBuffer) { > + Destination8 = (UINT8*)DestinationBuffer; > + Source8 = (CONST UINT8*)SourceBuffer; > + while (Length-- != 0) { > + *(Destination8++) = *(Source8++); > + } > + } else if (SourceBuffer < DestinationBuffer) { > + Destination8 = (UINT8*)DestinationBuffer + Length; > + Source8 = (CONST UINT8*)SourceBuffer + Length; > + while (Length-- != 0) { > + *(--Destination8) = *(--Source8); > + } > + } > + } > + return DestinationBuffer; > +} > + > EFI_STATUS > NorFlashReadBlocks ( > IN NOR_FLASH_INSTANCE *Instance, > @@ -791,7 +917,7 @@ NorFlashReadBlocks ( > SEND_NOR_COMMAND (Instance->DeviceBaseAddress, 0, P30_CMD_READ_ARRAY); > > // Readout the data > - CopyMem(Buffer, (UINTN *)StartAddress, BufferSizeInBytes); > + AlignedCopyMem (Buffer, (UINTN *)StartAddress, BufferSizeInBytes); Since you're changing these call sites anyway, can we make the casts to VOID * for clarity? > > return EFI_SUCCESS; > } > @@ -832,7 +958,7 @@ NorFlashRead ( > SEND_NOR_COMMAND (Instance->DeviceBaseAddress, 0, P30_CMD_READ_ARRAY); > > // Readout the data > - CopyMem (Buffer, (UINTN *)(StartAddress + Offset), BufferSizeInBytes); > + AlignedCopyMem (Buffer, (UINTN *)(StartAddress + Offset), > BufferSizeInBytes); / Leif > > return EFI_SUCCESS; > } > -- > 2.7.4 > _______________________________________________ edk2-devel mailing list [email protected] https://lists.01.org/mailman/listinfo/edk2-devel

