Hi,

while still playing with llvmlinux patches against Linux v4.3-rc2+ I
wondered about the diverse usage of memcpy() in several string*.[c,h]
files below x86 arch.

Just FYI: I am here on Ubuntu/precise AMD64.

The background is my build breaks again due to commit (see [1])...
     "x86, efi, kasan: #undef memset/memcpy/memmove per arch"
...with its undefs especially for memcpy.
( I have reverted it for now. )

There exists a LLVM PR18415 (see [2]) and llvmlinux has a workaround
(see [3]) for this issue.
I have attached an older alternative patch [4] from PaX Team.

So let's look at my build-log when building with CLANG v3.7...
...
  CC      arch/x86/kernel/setup.o
...
In file included from arch/x86/kernel/setup.c:96:
./arch/x86/include/asm/desc.h:121:2: error: implicit declaration of
function 'memcpy' [-Werror,-Wimplicit-function-declaration]
        memcpy(&idt[entry], gate, sizeof(*gate));
        ^
1 error generated.
...

Investigating the source-code...

[ arch/x86/kernel/setup.c:96 ]
#include <asm/desc.h>

[ arch/x86/include/asm/desc.h:121 ]
static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
{
memcpy(&idt[entry], gate, sizeof(*gate)); <--- XXX: Line #121
}

Checking the includes...

$ grep ^#include arch/x86/kernel/setup.c | egrep 'efi|string'
#include <linux/efi.h>
#include <asm/efi.h> <--- XXX: undefs of commit 769a8089c1fd moved here

Furthermore Andrey comments in his patch...
[ arch/x86/include/asm/efi.h ]
...
+/*
+ * CONFIG_KASAN may redefine memset to __memset. __memset function is present
+ * only in kernel binary. Since the EFI stub linked into a separate binary it
+ * doesn't have __memset(). So we should use standard memset from
+ * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove.
+ */
+#undef memcpy
+#undef memset
+#undef memmove
...

This statement is confirmed in...

[ arch/x86/boot/string.h ]
...
/* Undef any of these macros coming from string_32.h. */
#undef memcpy
#undef memset
#undef memcmp
...
/*
 * Access builtin version by default. If one needs to use optimized version,
 * do "undef memcpy" in .c file and link against right string.c
 */
#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
#define memset(d,c,l) __builtin_memset(d,c,l)
#define memcmp  __builtin_memcmp
...

More confirmation when looking at patch...
     "x86, boot: Move optimized memcpy() 32/64 bit versions to
compressed/string.c"

My 1st question...
Is "arch/x86/boot/compressed/string.c" file the central place for
memcpy() or only for the "optimized" version?
BTW, why a c-file and not a h-file like arch/x86/boot/compressed/string.h ?

2nd question...
When thinking of an alternative implementation like in [4] - which
file is predestinated?

Why do we have in arch/x86...
    __builtin_memcpy() |  __inline_memcpy() | __memcpy() |  memcpy() ?
Some comments say "faster implementation".

Just curious...
Isn't that crying for a "simplification" or "centralization" of
memcpy() | memset() | memcmp() ?

More enlightenment! Thoughts?

Thanks in advance.

Hopes to get less confused,
- Sedat -

[1] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=769a8089c1fd2fe94c13e66fe6e03d7820953ee3
[2] https://llvm.org/bugs/show_bug.cgi?id=18415
[3] 
http://git.linuxfoundation.org/?p=llvmlinux.git;a=blob_plain;f=arch/x86_64/patches/boot-workaround-PR18415.patch
[4] 
http://git.linuxfoundation.org/?p=llvmlinux.git;a=blob_plain;f=arch/x86_64/patches/ARCHIVE/0026-Add-own-versions-of-memcpy-and-memset-for-compilatio.patch
[5] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kernel/setup.c#n96
[6] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/desc.h#n121
[7] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=820e8feca06ff744f60e5036c3178dde40b91afc

[ INVESTIGATIONS ]

$ cd arch/x86/

$ for p in __builtin_memcpy __inline_memcpy __memcpy memcpy ; do echo
[ $p ] ; LC_ALL=C grep $p -nr ./ | grep -v "Binary file" | grep string
| sort ; echo "" ; done

[ __builtin_memcpy ]
./boot/string.h:19:#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
./include/asm/string_32.h:182:#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
./include/asm/string_64.h:43:           __ret =
__builtin_memcpy((dst), (src), __len);  \

[ __inline_memcpy ]
./include/asm/string_64.h:52:#define memcpy(dst, src, len)
__inline_memcpy((dst), (src), (len))
./include/asm/string_64.h:9:static __always_inline void
*__inline_memcpy(void *to, const void *from, size_t n)

[ __memcpy ]
./include/asm/string_32.h:161:static inline void *__memcpy3d(void *to,
const void *from, size_t len)
./include/asm/string_32.h:164:          return __memcpy(to, from, len);
./include/asm/string_32.h:171:   : __memcpy3d((t), (f), (n)))
./include/asm/string_32.h:187:   : __memcpy((t), (f), (n)))
./include/asm/string_32.h:194:#define memcpy(t, f, n) __memcpy((t), (f), (n))
./include/asm/string_32.h:32:static __always_inline void
*__memcpy(void *to, const void *from, size_t n)
./include/asm/string_64.h:30:extern void *__memcpy(void *to, const
void *from, size_t len);
./include/asm/string_64.h:41:           __ret = __memcpy((dst), (src),
__len);          \
./include/asm/string_64.h:77:#define memcpy(dst, src, len)
__memcpy(dst, src, len)

[ memcpy ]
./boot/compressed/string.c:19:static inline void *memcpy(void *dest,
const void *src, size_t n)
./boot/compressed/string.c:29:void *memcpy(void *dest, const void
*src, size_t n)
./boot/compressed/string.c:4:void *memcpy(void *dest, const void *src, size_t n)
./boot/string.h:15: * do "undef memcpy" in .c file and link against
right string.c
./boot/string.h:19:#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
./boot/string.h:5:#undef memcpy
./boot/string.h:9:void *memcpy(void *dst, const void *src, size_t len);
./include/asm/string_32.h:154:static inline void
*__constant_memcpy3d(void *to, const void *from, size_t len)
./include/asm/string_32.h:157:          return __constant_memcpy(to, from, len);
./include/asm/string_32.h:158:  return _mmx_memcpy(to, from, len);
./include/asm/string_32.h:161:static inline void *__memcpy3d(void *to,
const void *from, size_t len)
./include/asm/string_32.h:164:          return __memcpy(to, from, len);
./include/asm/string_32.h:165:  return _mmx_memcpy(to, from, len);
./include/asm/string_32.h:168:#define memcpy(t, f, n)
         \
./include/asm/string_32.h:170:   ? __constant_memcpy3d((t), (f), (n))   \
./include/asm/string_32.h:171:   : __memcpy3d((t), (f), (n)))
./include/asm/string_32.h:182:#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
./include/asm/string_32.h:184:#define memcpy(t, f, n)
         \
./include/asm/string_32.h:186:   ? __constant_memcpy((t), (f), (n))     \
./include/asm/string_32.h:187:   : __memcpy((t), (f), (n)))
./include/asm/string_32.h:194:#define memcpy(t, f, n) __memcpy((t), (f), (n))
./include/asm/string_32.h:32:static __always_inline void
*__memcpy(void *to, const void *from, size_t n)
./include/asm/string_32.h:51:static __always_inline void
*__constant_memcpy(void *to, const void *from,
./include/asm/string_64.h:30:extern void *__memcpy(void *to, const
void *from, size_t len);
./include/asm/string_64.h:34:extern void *memcpy(void *to, const void
*from, size_t len);
./include/asm/string_64.h:36:#define memcpy(dst, src, len)
                         \
./include/asm/string_64.h:41:           __ret = __memcpy((dst), (src),
__len);          \
./include/asm/string_64.h:43:           __ret =
__builtin_memcpy((dst), (src), __len);  \
./include/asm/string_64.h:52:#define memcpy(dst, src, len)
__inline_memcpy((dst), (src), (len))
./include/asm/string_64.h:76:#undef memcpy
./include/asm/string_64.h:77:#define memcpy(dst, src, len)
__memcpy(dst, src, len)
./include/asm/string_64.h:9:static __always_inline void
*__inline_memcpy(void *to, const void *from, size_t n)
./lib/memcpy_64.S:44: * memcpy_erms() - enhanced fast string memcpy.
This is faster and

$ grep ^#undef -nr arch/x86 | egrep 'memcpy|memcmp|memset' | sort
arch/x86/boot/compressed/misc.c:101:#undef memcpy
arch/x86/boot/compressed/misc.c:108:#undef memset
arch/x86/boot/string.h:5:#undef memcpy
arch/x86/boot/string.h:6:#undef memset
arch/x86/boot/string.h:7:#undef memcmp
arch/x86/include/asm/string_64.h:76:#undef memcpy
arch/x86/kernel/x8664_ksyms_64.c:49:#undef memcpy
arch/x86/kernel/x8664_ksyms_64.c:50:#undef memset
arch/x86/lib/memcpy_32.c:4:#undef memcpy
arch/x86/lib/memcpy_32.c:5:#undef memset

[ / INVESTIGATIONS ]
From 81aa8c99b5cb439e97e8896319a29d8d25b916e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan-Simon=20M=C3=B6ller?= <[email protected]>
Date: Wed, 14 Nov 2012 12:46:46 +0100
Subject: [PATCH 22/39] x86: LLVMLinux: Add own versions of memcpy and memset
 for compilation with clang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using clang to compile the kernel, things like memcpy and memset need to
be provided. With clang these are normally provided by the LLVM compiler-rt
library which isn't used with the Linux kernel.

Author:  PaX Team <[email protected]>
ML-Post: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120507/142707.html
URL:     http://llvm.linuxfoundation.org
Merge:   Jan-Simon Möller <[email protected]>
Signed-off-by: Jan-Simon Möller <[email protected]>

---
 arch/x86/boot/boot.h            | 36 +++++++++++++++++++++++++++++++++---
 arch/x86/boot/compressed/misc.c |  8 ++++----
 arch/x86/boot/memory.c          |  2 +-
 arch/x86/boot/string.c          |  6 +++---
 4 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 5b75319..e02f05d 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -30,6 +30,12 @@
 #include <asm/processor-flags.h>
 #include "ctype.h"
 
+#ifdef CONFIG_X86_32
+#define asmlinkage __attribute__((regparm(0)))
+#else
+#define asmlinkage
+#endif
+
 /* Useful macros */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
 
@@ -229,12 +235,36 @@ void copy_to_fs(addr_t dst, void *src, size_t len);
 void *copy_from_fs(void *dst, addr_t src, size_t len);
 void copy_to_gs(addr_t dst, void *src, size_t len);
 void *copy_from_gs(void *dst, addr_t src, size_t len);
+
+#ifndef __clang__
 void *memcpy(void *dst, void *src, size_t len);
 void *memset(void *dst, int c, size_t len);
 
 #define memcpy(d,s,l) __builtin_memcpy(d,s,l)
 #define memset(d,c,l) __builtin_memset(d,c,l)
 
+#else
+static inline void *memcpy(void *d, const void *s, size_t l)
+{
+	int d0, d1, d2;
+	asm volatile("rep ; addr32 movsb\n\t"
+		     : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		     : "0" (l), "1" ((long)d), "2" ((long)s)
+		     : "memory");
+	return d;
+}
+
+static inline void *memset(void *d, char c, size_t l)
+{
+	int d0, d1;
+	asm volatile("rep ; addr32 stosb\n\t"
+		     : "=&c" (d0), "=&D" (d1)
+		     : "0" (l), "1" (d), "a" (c)
+		     : "memory");
+	return d;
+}
+#endif
+
 /* a20.c */
 int enable_a20(void);
 
@@ -350,9 +380,9 @@ int printf(const char *fmt, ...);
 void initregs(struct biosregs *regs);
 
 /* string.c */
-int strcmp(const char *str1, const char *str2);
-int strncmp(const char *cs, const char *ct, size_t count);
-size_t strnlen(const char *s, size_t maxlen);
+asmlinkage int strcmp(const char *str1, const char *str2);
+asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
+asmlinkage size_t strnlen(const char *s, size_t maxlen);
 unsigned int atou(const char *s);
 unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);
 
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 0319c88..de331e5 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -109,8 +109,8 @@ static void error(char *m);
  */
 struct boot_params *real_mode;		/* Pointer to real-mode data */
 
-void *memset(void *s, int c, size_t n);
-void *memcpy(void *dest, const void *src, size_t n);
+asmlinkage void *memset(void *s, int c, size_t n);
+asmlinkage void *memcpy(void *dest, const void *src, size_t n);
 
 #ifdef CONFIG_X86_64
 #define memptr long
@@ -222,7 +222,7 @@ void __putstr(const char *s)
 	outb(0xff & (pos >> 1), vidport+1);
 }
 
-void *memset(void *s, int c, size_t n)
+asmlinkage void *memset(void *s, int c, size_t n)
 {
 	int i;
 	char *ss = s;
@@ -232,7 +232,7 @@ void *memset(void *s, int c, size_t n)
 	return s;
 }
 #ifdef CONFIG_X86_32
-void *memcpy(void *dest, const void *src, size_t n)
+asmlinkage void *memcpy(void *dest, const void *src, size_t n)
 {
 	int d0, d1, d2;
 	asm volatile(
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index db75d07..65607e5 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -64,7 +64,7 @@ static int detect_memory_e820(void)
 			break;
 		}
 
-		*desc++ = buf;
+		memcpy(desc++, &buf, sizeof(buf));
 		count++;
 	} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
 
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 574dedf..476ddea 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,7 +14,7 @@
 
 #include "boot.h"
 
-int strcmp(const char *str1, const char *str2)
+asmlinkage int strcmp(const char *str1, const char *str2)
 {
 	const unsigned char *s1 = (const unsigned char *)str1;
 	const unsigned char *s2 = (const unsigned char *)str2;
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2)
 	return 0;
 }
 
-int strncmp(const char *cs, const char *ct, size_t count)
+asmlinkage int strncmp(const char *cs, const char *ct, size_t count)
 {
 	unsigned char c1, c2;
 
@@ -46,7 +46,7 @@ int strncmp(const char *cs, const char *ct, size_t count)
 	return 0;
 }
 
-size_t strnlen(const char *s, size_t maxlen)
+asmlinkage size_t strnlen(const char *s, size_t maxlen)
 {
 	const char *es = s;
 	while (*es && maxlen) {
-- 
1.8.1.2

Reply via email to