Optimized version of memset() in memset.S if called as:
memset(foo, 0, size)
will try to explicitly zero out data cache with:
dc zva, dst
which will result in Alignement Exception (DABT) if MMU is not
enabled.
For more info see:
- C4.4.8 "DC ZVA, Data Cache Zero by VA"
- D5.2.8 "The effects of disabling a stage of address translation"
in "ARM Architecture Reference Manual. ARMv8, for ARMv8-A architecture
profile"
In similar vein, using optimized version of memcpy() could lead to a
unaligned 16-byte write (using 'stp'), which is not allowed for
Device-nGnRnE type of memory (see D5.2.8) and would liead to
Alignement Exception.
To fix both problems expose non-optimized and optimzied versions of
the function and created a wrapper to dispatch the call to either one
based on if MMU is enabled or not.
Signed-off-by: Andrey Smirnov <[email protected]>
---
arch/arm/Kconfig | 7 +++++++
arch/arm/lib64/Makefile | 2 +-
arch/arm/lib64/memcpy.S | 6 +++---
arch/arm/lib64/memset.S | 4 ++--
arch/arm/lib64/string.c | 22 ++++++++++++++++++++++
include/string.h | 3 +++
lib/string.c | 18 ++++++++++++------
7 files changed, 50 insertions(+), 12 deletions(-)
create mode 100644 arch/arm/lib64/string.c
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index bd736284f..c330a5a18 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -353,6 +353,13 @@ menu "ARM specific settings"
config ARM_OPTIMZED_STRING_FUNCTIONS
bool "use assembler optimized string functions"
+ #
+ # memset() and memcpy() in arm/lib64/mem[set|cpy].S are
+ # written with assumption of enabled MMU and cache. Depending
+ # on the inputs in may fail with Alignement exception if used
+ # without MMU
+ #
+ depends on !CPU_V8 || MMU
help
Say yes here to use assembler optimized memcpy / memset functions.
These functions work much faster than the normal versions but
diff --git a/arch/arm/lib64/Makefile b/arch/arm/lib64/Makefile
index 77647128a..4c0019fab 100644
--- a/arch/arm/lib64/Makefile
+++ b/arch/arm/lib64/Makefile
@@ -2,7 +2,7 @@ obj-y += stacktrace.o
obj-$(CONFIG_ARM_LINUX) += armlinux.o
obj-y += div0.o
obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memcpy.o
-obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memset.o
+obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS) += memset.o string.o
extra-y += barebox.lds
obj-pbl-y += runtime-offset.o
diff --git a/arch/arm/lib64/memcpy.S b/arch/arm/lib64/memcpy.S
index cfed3191c..a70e96ca2 100644
--- a/arch/arm/lib64/memcpy.S
+++ b/arch/arm/lib64/memcpy.S
@@ -67,8 +67,8 @@
stp \ptr, \regB, [\regC], \val
.endm
- .weak memcpy
-ENTRY(memcpy)
+ .weak __arch_memcpy
+ENTRY(__arch_memcpy)
#include "copy_template.S"
ret
-ENDPROC(memcpy)
+ENDPROC(__arch_memcpy)
diff --git a/arch/arm/lib64/memset.S b/arch/arm/lib64/memset.S
index 380a54097..d17bcc612 100644
--- a/arch/arm/lib64/memset.S
+++ b/arch/arm/lib64/memset.S
@@ -54,7 +54,7 @@ tmp3w .req w9
tmp3 .req x9
.weak memset
-ENTRY(memset)
+ENTRY(__arch_memset)
mov dst, dstin /* Preserve return value. */
and A_lw, val, #255
orr A_lw, A_lw, A_lw, lsl #8
@@ -212,4 +212,4 @@ ENTRY(memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
-ENDPROC(memset)
+ENDPROC(__arch_memset)
diff --git a/arch/arm/lib64/string.c b/arch/arm/lib64/string.c
new file mode 100644
index 000000000..cb2633152
--- /dev/null
+++ b/arch/arm/lib64/string.c
@@ -0,0 +1,22 @@
+#include <common.h>
+#include <asm/system.h>
+#include <string.h>
+
+void *__arch_memset(void *dst, int c, __kernel_size_t size);
+void *__arch_memcpy(void * dest, const void *src, size_t count);
+
+void *memset(void *dst, int c, __kernel_size_t size)
+{
+ if (likely(get_cr() & CR_M))
+ return __arch_memset(dst, c, size);
+
+ return __default_memset(dst, c, size);
+}
+
+void *memcpy(void * dest, const void *src, size_t count)
+{
+ if (likely(get_cr() & CR_M))
+ return __arch_memcpy(dest, src, count);
+
+ return __default_memcpy(dest, src, count);
+}
\ No newline at end of file
diff --git a/include/string.h b/include/string.h
index 0c557d6f1..6ceb33224 100644
--- a/include/string.h
+++ b/include/string.h
@@ -6,4 +6,7 @@
void *memdup(const void *, size_t);
int strtobool(const char *str, int *val);
+void *__default_memset(void *, int, __kernel_size_t);
+void *__default_memcpy(void * dest,const void *src,size_t count);
+
#endif /* __STRING_H */
diff --git a/lib/string.c b/lib/string.c
index f588933e8..717b59aa5 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -479,7 +479,6 @@ char *strswab(const char *s)
}
#endif
-#ifndef __HAVE_ARCH_MEMSET
/**
* memset - Fill a region of memory with the given value
* @s: Pointer to the start of the area.
@@ -488,7 +487,7 @@ char *strswab(const char *s)
*
* Do not use memset() to access IO space, use memset_io() instead.
*/
-void * memset(void * s,int c,size_t count)
+void *__default_memset(void * s,int c,size_t count)
{
char *xs = (char *) s;
@@ -497,10 +496,12 @@ void * memset(void * s,int c,size_t count)
return s;
}
+EXPORT_SYMBOL(__default_memset);
+
+#ifndef __HAVE_ARCH_MEMSET
+void *memset(void *s, int c, size_t count) __alias(__default_memset);
#endif
-EXPORT_SYMBOL(memset);
-#ifndef __HAVE_ARCH_MEMCPY
/**
* memcpy - Copy one area of memory to another
* @dest: Where to copy to
@@ -510,7 +511,7 @@ EXPORT_SYMBOL(memset);
* You should not use this function to access IO space, use memcpy_toio()
* or memcpy_fromio() instead.
*/
-void * memcpy(void * dest,const void *src,size_t count)
+void *__default_memcpy(void * dest,const void *src,size_t count)
{
char *tmp = (char *) dest, *s = (char *) src;
@@ -519,9 +520,14 @@ void * memcpy(void * dest,const void *src,size_t count)
return dest;
}
-#endif
EXPORT_SYMBOL(memcpy);
+#ifndef __HAVE_ARCH_MEMCPY
+void *memcpy(void * dest, const void *src, size_t count)
+ __alias(__default_memcpy);
+#endif
+
+
#ifndef __HAVE_ARCH_MEMMOVE
/**
* memmove - Copy one area of memory to another
--
2.17.0
_______________________________________________
barebox mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/barebox