On Tue, Dec 16, 2008 at 05:36:32PM -0800, [email protected] wrote: >Author: vda >Date: 2008-12-16 17:36:31 -0800 (Tue, 16 Dec 2008) >New Revision: 24435 > >Log: >since gcc -Os hates us and does not inline string ops, >implement inline versions of some of them. >Enable only those which result roughly in the same >code size as using out-or-line versions.
This should be fixed proper on gcc trunk and perhaps even 4.3.2, isn't it? Given that it didn't compile for me ¹) and that it's the compilers duty to handle them, i admit that i do not like this patch too much.. Also, is there a special reason why you didn't use a .macro for small_store and small_move? Finally i think that we have a config knob for arch specific string ops: UCLIBC_HAS_STRING_ARCH_OPT It sounds a bit like you would not honour this option.. So, just to be sure.. Did you (when using a current compiler) have UCLIBC_HAS_STRING_ARCH_OPT turned on? thanks, ¹) Index: libc/string/memcpy.c =================================================================== --- libc/string/memcpy.c (revision 24439) +++ libc/string/memcpy.c (working copy) @@ -8,8 +8,10 @@ #include "_string.h" #ifdef WANT_WIDE +# undef wmemcpy # define Wmemcpy wmemcpy #else +# undef memcpy # define Wmemcpy memcpy #endif Index: libc/string/memset.c =================================================================== --- libc/string/memset.c (revision 24439) +++ libc/string/memset.c (working copy) @@ -8,9 +8,10 @@ #include "_string.h" #ifdef WANT_WIDE +# undef wmemset # define Wmemset wmemset #else -/* Experimentally off - libc_hidden_proto(memset) */ +# undef memset # define Wmemset memset #endif > >None of this affects users, installed headers won't have >any trace of it. > > > >Added: > trunk/uClibc/include/libc-string_i386.h > >Modified: > trunk/uClibc/include/string.h > trunk/uClibc/libc/string/generic/memchr.c > trunk/uClibc/libc/string/generic/mempcpy.c > trunk/uClibc/libc/string/i386/memcpy.c > trunk/uClibc/libc/string/i386/memset.c > trunk/uClibc/libc/string/i386/strcpy.c > trunk/uClibc/libc/string/i386/strlen.c > trunk/uClibc/libc/string/memchr.c > trunk/uClibc/libc/string/mempcpy.c > trunk/uClibc/libc/string/stpcpy.c > > >Changeset: >Added: trunk/uClibc/include/libc-string_i386.h >=================================================================== >--- trunk/uClibc/include/libc-string_i386.h (rev 0) >+++ trunk/uClibc/include/libc-string_i386.h 2008-12-17 01:36:31 UTC (rev >24435) >@@ -0,0 +1,314 @@ >+/* >+ * Copyright (C) 2008 Denys Vlasenko <[email protected]> >+ * >+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball >+ */ >+ >+#if !defined _STRING_H >+#error "Never use <libc-string_i386.h> directly; include <string.h> instead" >+#endif >+ >+#ifndef _LIBC_STRING_i386_H >+#define _LIBC_STRING_i386_H 1 >+ >+static __always_inline >+void *inlined_memset_const_c_count4(void *s, unsigned eax, unsigned count) >+{ >+ int ecx, edi; >+ >+ if (count == 0) >+ return s; >+ >+ /* Very small (2 stores or less) are best done with direct >+ * mov <const>,<mem> instructions (they do not clobber registers) */ >+ if (count == 1) { >+ *(char *)(s + 0) = eax; >+ return s; >+ } >+ >+ eax *= 0x01010101; /* done at compile time */ >+ >+ if (count == 2) { >+ *(short *)(s + 0) = eax; >+ return s; >+ } >+ if (count == 3) { >+ *(short *)(s + 0) = eax; >+ *(char *) (s + 2) = eax; >+ return s; >+ } >+ if (count == 1*4 + 0) { >+ *(int *)(s + 0) = eax; >+ return s; >+ } >+ if (count == 1*4 + 1) { >+ *(int *) (s + 0) = eax; >+ *(char *)(s + 4) = eax; >+ return s; >+ } >+ if (count == 1*4 + 2) { >+ *(int *) (s + 0) = eax; >+ *(short *)(s + 4) = eax; >+ return s; >+ } >+ >+ /* Small string stores: don't clobber ecx >+ * (clobbers only eax and edi) */ >+#define small_store(arg) { \ >+ __asm__ __volatile__( \ >+ arg \ >+ : "=&D" (edi) \ >+ : "a" (eax), "0" (s) \ >+ : "memory" \ >+ ); \ >+ return s; \ >+} >+ if (count == 1*4 + 3) small_store("stosl; stosw; stosb"); >+ if (count == 2*4 + 0) { >+ ((int *)s)[0] = eax; >+ ((int *)s)[1] = eax; >+ return s; >+ } >+ if (count == 2*4 + 1) small_store("stosl; stosl; stosb"); >+ if (count == 2*4 + 2) small_store("stosl; stosl; stosw"); >+ if (count == 2*4 + 3) small_store("stosl; stosl; stosw; stosb"); >+ if (count == 3*4 + 0) small_store("stosl; stosl; stosl"); >+ if (count == 3*4 + 1) small_store("stosl; stosl; stosl; stosb"); >+ if (count == 3*4 + 2) small_store("stosl; stosl; stosl; stosw"); >+ if (count == 3*4 + 3) small_store("stosl; stosl; stosl; stosw; stosb"); >+ if (count == 4*4 + 0) small_store("stosl; stosl; stosl; stosl"); >+ if (count == 4*4 + 1) small_store("stosl; stosl; stosl; stosl; stosb"); >+ /* going over 7 bytes is suboptimal */ >+ /* stosw is 2-byte insn, so this one takes 6 bytes: */ >+ if (count == 4*4 + 2) small_store("stosl; stosl; stosl; stosl; stosw"); >+ /* 7 bytes */ >+ if (count == 4*4 + 3) small_store("stosl; stosl; stosl; stosl; stosw; >stosb"); >+ /* 5 bytes */ >+ if (count == 5*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl"); >+ /* 6 bytes */ >+ if (count == 5*4 + 1) small_store("stosl; stosl; stosl; stosl; stosl; >stosb"); >+ /* 7 bytes */ >+ if (count == 5*4 + 2) small_store("stosl; stosl; stosl; stosl; stosl; >stosw"); >+ /* 8 bytes, but oh well... */ >+ if (count == 5*4 + 3) small_store("stosl; stosl; stosl; stosl; stosl; >stosw; stosb"); >+ /* 6 bytes */ >+ if (count == 6*4 + 0) small_store("stosl; stosl; stosl; stosl; stosl; >stosl"); >+ /* the rest would be 7+ bytes and is handled below instead */ >+#undef small_store >+ >+ /* Not small, but multiple-of-4 store. >+ * "mov <const>,%ecx; rep; stosl" sequence is 7 bytes */ >+ __asm__ __volatile__( >+ " rep; stosl\n" >+ : "=&c" (ecx), "=&D" (edi) >+ : "a" (eax), "0" (count / 4), "1" (s) >+ : "memory" >+ ); >+ return s; >+} >+#if 1 /* -51 bytes on shared i386 build with gcc 4.3.0 */ >+#define memset(s, c, count) ( \ >+ ( !(__builtin_constant_p(c) && __builtin_constant_p(count)) \ >+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \ >+ ) \ >+ ? memset((s), (c), (count)) \ >+ : inlined_memset_const_c_count4((s), (c), (count)) \ >+ ) >+#endif >+ >+ >+static __always_inline >+void *inlined_mempcpy_const_count4(void *d, const void *s, unsigned count) >+{ >+ int ecx; >+ char *esi, *edi; >+ >+ if (count == 0) >+ return d; >+ >+ if (count == 1) { >+ *(char *)d = *(char *)s; >+ return d + 1; >+ } >+ if (count == 2) { >+ *(short *)d = *(short *)s; >+ return d + 2; >+ } >+ /* Small string moves: don't clobber ecx >+ * (clobbers only esi and edi) */ >+#define small_move(arg) { \ >+ __asm__ __volatile__( \ >+ arg \ >+ : "=&S" (esi), "=&D" (edi) \ >+ : "0" (s), "1" (d) \ >+ : "memory" \ >+ ); \ >+ return edi; \ >+} >+ if (count == 3) small_move("movsw; movsb"); >+ if (count == 1*4 + 0) { >+ *(int *)d = *(int *)s; >+ return d + 4; >+ } >+ if (count == 1*4 + 1) small_move("movsl; movsb"); >+ if (count == 1*4 + 2) small_move("movsl; movsw"); >+ if (count == 1*4 + 3) small_move("movsl; movsw; movsb"); >+ if (count == 2*4 + 0) small_move("movsl; movsl"); >+ if (count == 2*4 + 1) small_move("movsl; movsl; movsb"); >+ if (count == 2*4 + 2) small_move("movsl; movsl; movsw"); >+ if (count == 2*4 + 3) small_move("movsl; movsl; movsw; movsb"); >+ if (count == 3*4 + 0) small_move("movsl; movsl; movsl"); >+ if (count == 3*4 + 1) small_move("movsl; movsl; movsl; movsb"); >+ if (count == 3*4 + 2) small_move("movsl; movsl; movsl; movsw"); >+ if (count == 3*4 + 3) small_move("movsl; movsl; movsl; movsw; movsb"); >+ if (count == 4*4 + 0) small_move("movsl; movsl; movsl; movsl"); >+ if (count == 4*4 + 1) small_move("movsl; movsl; movsl; movsl; movsb"); >+ /* going over 7 bytes is suboptimal */ >+ /* movsw is 2-byte insn, so this one takes 6 bytes: */ >+ if (count == 4*4 + 2) small_move("movsl; movsl; movsl; movsl; movsw"); >+ /* 7 bytes */ >+ if (count == 4*4 + 3) small_move("movsl; movsl; movsl; movsl; movsw; >movsb"); >+ /* 5 bytes */ >+ if (count == 5*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl"); >+ /* 6 bytes */ >+ if (count == 5*4 + 1) small_move("movsl; movsl; movsl; movsl; movsl; >movsb"); >+ /* 7 bytes */ >+ if (count == 5*4 + 2) small_move("movsl; movsl; movsl; movsl; movsl; >movsw"); >+ /* 8 bytes, but oh well... */ >+ if (count == 5*4 + 3) small_move("movsl; movsl; movsl; movsl; movsl; >movsw; movsb"); >+ /* 6 bytes */ >+ if (count == 6*4 + 0) small_move("movsl; movsl; movsl; movsl; movsl; >movsl"); >+ /* the rest would be 7+ bytes and is handled below instead */ >+#undef small_move >+ >+ /* Not small, but multiple-of-4 move. >+ * "mov <const>,%ecx; rep; movsl" sequence is 7 bytes */ >+ __asm__ __volatile__( >+ " rep; movsl\n" >+ : "=&c" (ecx), "=&S" (esi), "=&D" (edi) >+ : "0" (count / 4), "1" (s), "2" (d) >+ : "memory" >+ ); >+ return edi; >+} >+static __always_inline >+void *inlined_memcpy_const_count4(void *d, const void *s, unsigned count) >+{ >+ inlined_mempcpy_const_count4(d, s, count); >+ return d; >+} >+#if 1 /* +34 bytes on shared i386 build with gcc 4.3.0 */ >+#define mempcpy(d, s, count) ( \ >+ ( !(__builtin_constant_p(count)) \ >+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \ >+ ) \ >+ ? mempcpy((d), (s), (count)) \ >+ : inlined_mempcpy_const_count4((d), (s), (count)) \ >+ ) >+#define memcpy(d, s, count) ( \ >+ ( !(__builtin_constant_p(count)) \ >+ || ((count) > (6*4 + 0) && ((count) % 4) != 0) \ >+ ) \ >+ ? memcpy((d), (s), (count)) \ >+ : inlined_memcpy_const_count4((d), (s), (count)) \ >+ ) >+#endif >+ >+ >+static __always_inline >+size_t inlined_strlen(const char *s) >+{ >+ int edi; >+ int ecx; >+ __asm__ __volatile__( >+ " repne; scasb\n" >+ /* " notl %0\n" */ >+ /* " decl %0\n" */ >+ : "=c" (ecx), "=&D" (edi) >+ : "1" (s), "a" (0), "0" (0xffffffffu) >+ /* : no clobbers */ >+ ); >+ return -ecx - 1; >+} >+#if 0 /* +1108 bytes on shared i386 build with gcc 4.3.0 */ >+#define strlen(s) inlined_strlen(s) >+#endif >+ >+ >+static __always_inline >+char *inlined_stpcpy(char *dest, const char *src) >+{ >+ char *esi, *edi; >+ int eax; >+ __asm__ __volatile__( >+ "1: lodsb\n" >+ " stosb\n" >+ " testb %%al, %%al\n" >+ " jnz 1b\n" >+ : "=&S" (esi), "=&D" (edi), "=&a" (eax) >+ : "0" (src), "1" (dest) >+ : "memory" >+ ); >+ return edi - 1; >+} >+static __always_inline >+char *inlined_strcpy(char *dest, const char *src) >+{ >+ inlined_stpcpy(dest, src); >+ return dest; >+} >+#if 0 /* +562 bytes on shared i386 build with gcc 4.3.0 */ >+#define stpcpy(dest, src) inlined_stpcpy(dest, src) >+#define strcpy(dest, src) inlined_strcpy(dest, src) >+#endif >+ >+ >+static __always_inline >+void *inlined_memchr(const void *s, int c, size_t count) >+{ >+ void *edi; >+ int ecx; >+ /* Unfortunately, c gets loaded to %eax (wide insn), not %al */ >+ __asm__ __volatile__( >+ " jecxz 1f\n" >+ " repne; scasb\n" >+ " leal -1(%%edi), %%edi\n" >+ " je 2f\n" >+ "1:\n" >+ " xorl %%edi, %%edi\n" >+ "2:\n" >+ : "=&D" (edi), "=&c" (ecx) >+ : "a" (c), "0" (s), "1" (count) >+ /* : no clobbers */ >+ ); >+ return edi; >+} >+static __always_inline >+void *inlined_memchr_const_c(const void *s, int c, size_t count) >+{ >+ void *edi; >+ int ecx, eax; >+ __asm__ __volatile__( >+ " jecxz 1f\n" >+ " movb %4, %%al\n" /* const c to %%al */ >+ " repne; scasb\n" >+ " leal -1(%%edi), %%edi\n" >+ " je 2f\n" >+ "1:\n" >+ " xorl %%edi, %%edi\n" >+ "2:\n" >+ : "=&D" (edi), "=&c" (ecx), "=&a" (eax) >+ : "0" (s), "i" (c), "1" (count) >+ /* : no clobbers */ >+ ); >+ return edi; >+} >+#if 1 /* +2 bytes on shared i386 build with gcc 4.3.0 */ >+#define memchr(s, c, count) ( \ >+ __builtin_constant_p(c) \ >+ ? inlined_memchr_const_c(s, (c) & 0xff, count) \ >+ : inlined_memchr(s, c, count) \ >+ ) >+#endif >+ >+#endif /* _LIBC_STRING_i386_H */ > >Modified: trunk/uClibc/include/string.h >=================================================================== >--- trunk/uClibc/include/string.h 2008-12-17 01:31:29 UTC (rev 24434) >+++ trunk/uClibc/include/string.h 2008-12-17 01:36:31 UTC (rev 24435) >@@ -378,7 +378,7 @@ > > /* The following two functions are non-standard but necessary for non-32 bit > platforms. */ >-#if 0 /*def __USE_GNU*/ >+# if 0 /*#ifdef __USE_GNU*/ > extern int ffsl (long int __l) __THROW __attribute__ ((__const__)); > # ifdef __GNUC__ > __extension__ extern int ffsll (long long int __ll) >@@ -422,44 +422,44 @@ > > #ifdef __USE_GNU > /* Compare S1 and S2 as strings holding name & indices/version numbers. */ >-#if 0 >+# if 0 > extern int strverscmp (__const char *__s1, __const char *__s2) > __THROW __attribute_pure__ __nonnull ((1, 2)); > libc_hidden_proto(strverscmp) >-#endif >+# endif > > /* Return a string describing the meaning of the signal number in SIG. */ > extern char *strsignal (int __sig) __THROW; > libc_hidden_proto(strsignal) > > /* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. > */ >-#if 0 /* uClibc: disabled */ >+# if 0 /* uClibc: disabled */ > extern char *__stpcpy (char *__restrict __dest, __const char *__restrict > __src) > __THROW __nonnull ((1, 2)); >-#endif >+# endif > extern char *stpcpy (char *__restrict __dest, __const char *__restrict __src) > __THROW __nonnull ((1, 2)); > libc_hidden_proto(stpcpy) > > /* Copy no more than N characters of SRC to DEST, returning the address of > the last character written into DEST. */ >-#if 0 /* uClibc: disabled */ >+# if 0 /* uClibc: disabled */ > extern char *__stpncpy (char *__restrict __dest, > __const char *__restrict __src, size_t __n) > __THROW __nonnull ((1, 2)); >-#endif >+# endif > extern char *stpncpy (char *__restrict __dest, > __const char *__restrict __src, size_t __n) > __THROW __nonnull ((1, 2)); > libc_hidden_proto(stpncpy) > >-#if 0 /* uClibc does not >support strfry or memfrob. */ >+# if 0 /* uClibc does not support strfry or memfrob. */ > /* Sautee STRING briskly. */ > extern char *strfry (char *__string) __THROW __nonnull ((1)); > > /* Frobnicate N bytes of S. */ > extern void *memfrob (void *__s, size_t __n) __THROW __nonnull ((1)); >-#endif >+# endif > > # ifndef basename > /* Return the file name within directory of FILENAME. We don't >@@ -469,7 +469,7 @@ > extern char *basename (__const char *__filename) __THROW __nonnull ((1)); > libc_hidden_proto(basename) > # endif >-#endif >+#endif /* __USE_GNU */ > > > #ifdef __USE_BSD >@@ -484,4 +484,11 @@ > > __END_DECLS > >-#endif /* string.h */ >+ >+#ifdef UCLIBC_INTERNAL >+# if defined __i386__ >+# include <libc-string_i386.h> >+# endif >+#endif >+ >+#endif /* string.h */ > >Modified: trunk/uClibc/libc/string/generic/memchr.c >=================================================================== >--- trunk/uClibc/libc/string/generic/memchr.c 2008-12-17 01:31:29 UTC (rev >24434) >+++ trunk/uClibc/libc/string/generic/memchr.c 2008-12-17 01:36:31 UTC (rev >24435) >@@ -25,14 +25,12 @@ > #include <stdlib.h> > #include <limits.h> > >-/* Experimentally off - libc_hidden_proto(memchr) */ >-/* libc_hidden_proto(abort) */ >- > #include "memcopy.h" > > #define LONG_MAX_32_BITS 2147483647 > > /* Search no more than N bytes of S for C. */ >+#undef memchr > void *memchr (const void * s, int c_in, size_t n) > { > const unsigned char *char_ptr; > >Modified: trunk/uClibc/libc/string/generic/mempcpy.c >=================================================================== >--- trunk/uClibc/libc/string/generic/mempcpy.c 2008-12-17 01:31:29 UTC (rev >24434) >+++ trunk/uClibc/libc/string/generic/mempcpy.c 2008-12-17 01:36:31 UTC (rev >24435) >@@ -8,9 +8,8 @@ > #include <string.h> > > #ifdef __USE_GNU >-/* Experimentally off - libc_hidden_proto(mempcpy) */ >-/* Experimentally off - libc_hidden_proto(memcpy) */ > >+# undef mempcpy > void *mempcpy (void *dstpp, const void *srcpp, size_t len) > { > memcpy(dstpp, srcpp, len); > >Modified: trunk/uClibc/libc/string/i386/memcpy.c >=================================================================== >--- trunk/uClibc/libc/string/i386/memcpy.c 2008-12-17 01:31:29 UTC (rev >24434) >+++ trunk/uClibc/libc/string/i386/memcpy.c 2008-12-17 01:36:31 UTC (rev >24435) >@@ -32,7 +32,7 @@ > > #include <string.h> > >-/* Experimentally off - libc_hidden_proto(memcpy) */ >+#undef memcpy > void *memcpy(void * to, const void * from, size_t n) > { > int d0, d1, d2; > >Modified: trunk/uClibc/libc/string/i386/memset.c >=================================================================== >--- trunk/uClibc/libc/string/i386/memset.c 2008-12-17 01:31:29 UTC (rev >24434) >+++ trunk/uClibc/libc/string/i386/memset.c 2008-12-17 01:36:31 UTC (rev >24435) >@@ -33,6 +33,7 @@ > #include <string.h> > > /* Experimentally off - libc_hidden_proto(memset) */ >+#undef memset > void *memset(void *s, int c, size_t count) > { > int d0, d1; > >Modified: trunk/uClibc/libc/string/i386/strcpy.c >=================================================================== >--- trunk/uClibc/libc/string/i386/strcpy.c 2008-12-17 01:31:29 UTC (rev >24434) >+++ trunk/uClibc/libc/string/i386/strcpy.c 2008-12-17 01:36:31 UTC (rev >24435) >@@ -32,7 +32,7 @@ > > #include <string.h> > >-/* Experimentally off - libc_hidden_proto(strcpy) */ >+#undef strcpy > char *strcpy(char * dest, const char * src) > { > int d0, d1, d2; > >Modified: trunk/uClibc/libc/string/i386/strlen.c >=================================================================== >--- trunk/uClibc/libc/string/i386/strlen.c 2008-12-17 01:31:29 UTC (rev >24434) >+++ trunk/uClibc/libc/string/i386/strlen.c 2008-12-17 01:36:31 UTC (rev >24435) >@@ -32,7 +32,7 @@ > > #include <string.h> > >-/* Experimentally off - libc_hidden_proto(strlen) */ >+#undef strlen > size_t strlen(const char *s) > { > int d0; > >Modified: trunk/uClibc/libc/string/memchr.c >=================================================================== >--- trunk/uClibc/libc/string/memchr.c 2008-12-17 01:31:29 UTC (rev 24434) >+++ trunk/uClibc/libc/string/memchr.c 2008-12-17 01:36:31 UTC (rev 24435) >@@ -10,6 +10,7 @@ > #ifdef WANT_WIDE > # define Wmemchr wmemchr > #else >+# undef memchr > # define Wmemchr memchr > #endif > > >Modified: trunk/uClibc/libc/string/mempcpy.c >=================================================================== >--- trunk/uClibc/libc/string/mempcpy.c 2008-12-17 01:31:29 UTC (rev 24434) >+++ trunk/uClibc/libc/string/mempcpy.c 2008-12-17 01:36:31 UTC (rev 24435) >@@ -12,6 +12,7 @@ > #ifdef WANT_WIDE > # define Wmempcpy wmempcpy > #else >+# undef mempcpy > # define Wmempcpy mempcpy > #endif > > >Modified: trunk/uClibc/libc/string/stpcpy.c >=================================================================== >--- trunk/uClibc/libc/string/stpcpy.c 2008-12-17 01:31:29 UTC (rev 24434) >+++ trunk/uClibc/libc/string/stpcpy.c 2008-12-17 01:36:31 UTC (rev 24435) >@@ -10,7 +10,7 @@ > #ifdef WANT_WIDE > # define Wstpcpy wcpcpy > #else >-/* Experimentally off - libc_hidden_proto(stpcpy) */ >+# undef stpcpy > # define Wstpcpy stpcpy > #endif > > >_______________________________________________ >uClibc-cvs mailing list >[email protected] >http://busybox.net/cgi-bin/mailman/listinfo/uclibc-cvs _______________________________________________ uClibc mailing list [email protected] http://busybox.net/cgi-bin/mailman/listinfo/uclibc
