Title: [8686] trunk/arch/blackfin/lib: Add a little optimization to strncpy - which decreases boot time by 20%.
- Revision
- 8686
- Author
- rgetz
- Date
- 2010-05-04 10:59:21 -0400 (Tue, 04 May 2010)
Log Message
Add a little optimization to strncpy - which decreases boot time by 20%.
When the kernel is booting, with a initramfs - it builds up the filesystem
from a cpio archive, by calling strncpy_from_user, in fs/namei.c:do_getname()
for every file in the archive (which can be lots) with a length of PATH_MAX
(1024). This causes the dest of the strncpy to be padded with many null bytes.
This optimization mostly causes these many null bytes to be padded with
a call to memset(), which is already optimized for filling memory
quickly.
Boot time measured with 'loglevel=0', so UART speed doesn't get in the
way.
Modified Paths
Diff
Modified: trunk/arch/blackfin/lib/memset.S (8685 => 8686)
--- trunk/arch/blackfin/lib/memset.S 2010-05-04 10:36:22 UTC (rev 8685)
+++ trunk/arch/blackfin/lib/memset.S 2010-05-04 14:59:21 UTC (rev 8686)
@@ -20,6 +20,7 @@
* R1 = filler byte
* R2 = count
* Favours word aligned data.
+ * The strncpy assumes that I0 and I1 are not used in this function
*/
ENTRY(_memset)
Modified: trunk/arch/blackfin/lib/strncpy.S (8685 => 8686)
--- trunk/arch/blackfin/lib/strncpy.S 2010-05-04 10:36:22 UTC (rev 8685)
+++ trunk/arch/blackfin/lib/strncpy.S 2010-05-04 14:59:21 UTC (rev 8686)
@@ -10,7 +10,8 @@
* R0 = address (dest)
* R1 = address (src)
* R2 = size
- * Returns a pointer to the destination string dest
+ * Returns a pointer (R0) to the destination string dest
+ * we do this by not changing R0
*/
#ifdef CONFIG_STRNCPY_L1
@@ -24,29 +25,60 @@
ENTRY(_strncpy)
CC = R2 == 0;
if CC JUMP 4f;
+
+ P2 = R2 ; /* size */
P0 = R0 ; /* dst*/
P1 = R1 ; /* src*/
+ LSETUP (1f, 2f) LC0 = P2;
1:
R1 = B [P1++] (Z);
B [P0++] = R1;
- CC = R1;
- if ! cc jump 2f;
- R2 += -1;
- CC = R2 == 0;
- if ! cc jump 1b (bp);
- jump 4f;
+ CC = R1 == 0;
2:
- /* if src is shorter than n, we need to null pad bytes in dest */
- R1 = 0;
+ if CC jump 3f;
+
+ RTS;
+
+ /* if src is shorter than n, we need to null pad bytes in dest
+ * but, we can get here when the last byte is zero, and we don't
+ * want to copy an extra byte at the end, so we need to check
+ */
3:
+ R2 = LC0;
+ CC = R2
+ if ! CC jump 6f;
+
+ /* if the required null padded portion is small, do it here, rather than
+ * handling the overhead of memset (which is OK when things are big).
+ */
+ R3 = 0x20;
+ CC = R2 < R3;
+ IF CC jump 4f;
+
R2 += -1;
- CC = R2 == 0;
- if cc jump 4f;
- B [P0++] = R1;
- jump 3b;
+ /* Set things up for memset
+ * R0 = address
+ * R1 = filler byte (this case it's zero, set above)
+ * R2 = count (set above)
+ */
+
+ I1 = R0;
+ R0 = RETS;
+ I0 = R0;
+ R0 = P0;
+ CALL _memset;
+ R0 = I0;
+ RETS = R0;
+ R0 = I1;
+ RTS;
+
4:
+ LSETUP(5f, 5f) LC0;
+5:
+ B [P0++] = R1;
+6:
RTS;
ENDPROC(_strncpy)
_______________________________________________
Linux-kernel-commits mailing list
[email protected]
https://blackfin.uclinux.org/mailman/listinfo/linux-kernel-commits