These are library functions used by ARC700 architecture.

Following files were borrowed from Linux kernel sources,
commit 5ee54f38171b9b3541c5e9cf9c3a9e53455fd8b4 (Linux 3.11.10):

 * memcmp.S
 * memcpy-700.S
 * memset.S
 * strchr-700.S
 * strcmp.S
 * strcpy-700.S
 * strlen.S

Signed-off-by: Alexey Brodkin <abrod...@synopsys.com>

Cc: Mischa Jonker <mjon...@synopsys.com>
Cc: Francois Bedard <fbed...@synopsys.com>

Changes for v2:

 * Added commit message
 * Added borrowed from Linux optimized string routines
 * Added explicit mention of files borrowed from Linux sources with
   reference to versoin/commit in Linux git repository
---
 arch/arc/lib/Makefile     |  16 ++++++
 arch/arc/lib/bootm.c      | 106 ++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcmp.S     | 122 ++++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/memcpy-700.S |  64 ++++++++++++++++++++++
 arch/arc/lib/memset.S     |  59 ++++++++++++++++++++
 arch/arc/lib/relocate.c   |  74 ++++++++++++++++++++++++++
 arch/arc/lib/sections.c   |  21 ++++++++
 arch/arc/lib/strchr-700.S | 133 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/arc/lib/strcmp.S     |  96 +++++++++++++++++++++++++++++++++
 arch/arc/lib/strcpy-700.S |  70 ++++++++++++++++++++++++
 arch/arc/lib/strlen.S     |  83 +++++++++++++++++++++++++++++
 11 files changed, 844 insertions(+)
 create mode 100644 arch/arc/lib/Makefile
 create mode 100644 arch/arc/lib/bootm.c
 create mode 100644 arch/arc/lib/memcmp.S
 create mode 100644 arch/arc/lib/memcpy-700.S
 create mode 100644 arch/arc/lib/memset.S
 create mode 100644 arch/arc/lib/relocate.c
 create mode 100644 arch/arc/lib/sections.c
 create mode 100644 arch/arc/lib/strchr-700.S
 create mode 100644 arch/arc/lib/strcmp.S
 create mode 100644 arch/arc/lib/strcpy-700.S
 create mode 100644 arch/arc/lib/strlen.S

diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile
new file mode 100644
index 0000000..7675f85
--- /dev/null
+++ b/arch/arc/lib/Makefile
@@ -0,0 +1,16 @@
+#
+# Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+#
+# SPDX-License-Identifier:     GPL-2.0+
+#
+
+obj-y += sections.o
+obj-y += relocate.o
+obj-y += strchr-700.o
+obj-y += strcmp.o
+obj-y += strcpy-700.o
+obj-y += strlen.o
+obj-y += memcmp.o
+obj-y += memcpy-700.o
+obj-y += memset.o
+obj-$(CONFIG_CMD_BOOTM) += bootm.o
diff --git a/arch/arc/lib/bootm.c b/arch/arc/lib/bootm.c
new file mode 100644
index 0000000..d185a50
--- /dev/null
+++ b/arch/arc/lib/bootm.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <common.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+static ulong get_sp(void)
+{
+       ulong ret;
+
+       asm("mov %0, sp" : "=r"(ret) : );
+       return ret;
+}
+
+void arch_lmb_reserve(struct lmb *lmb)
+{
+       ulong sp;
+
+       /*
+        * Booting a (Linux) kernel image
+        *
+        * Allocate space for command line and board info - the
+        * address should be as high as possible within the reach of
+        * the kernel (see CONFIG_SYS_BOOTMAPSZ settings), but in unused
+        * memory, which means far enough below the current stack
+        * pointer.
+        */
+       sp = get_sp();
+       debug("## Current stack ends at 0x%08lx ", sp);
+
+       /* adjust sp by 4K to be safe */
+       sp -= 4096;
+       lmb_reserve(lmb, sp, (CONFIG_SYS_SDRAM_BASE + gd->ram_size - sp));
+}
+
+static int cleanup_before_linux(void)
+{
+       disable_interrupts();
+       flush_dcache_all();
+       invalidate_icache_all();
+
+       return 0;
+}
+
+/* Subcommand: PREP */
+static void boot_prep_linux(bootm_headers_t *images)
+{
+       if (image_setup_linux(images))
+               hang();
+}
+
+/* Subcommand: GO */
+static void boot_jump_linux(bootm_headers_t *images, int flag)
+{
+       void (*kernel_entry)(int zero, int arch, uint params);
+       unsigned int r0, r2;
+       int fake = (flag & BOOTM_STATE_OS_FAKE_GO);
+
+       kernel_entry = (void (*)(int, int, uint))images->ep;
+
+       debug("## Transferring control to Linux (at address %08lx)...\n",
+             (ulong) kernel_entry);
+       bootstage_mark(BOOTSTAGE_ID_RUN_OS);
+
+       printf("\nStarting kernel ...%s\n\n", fake ?
+              "(fake run for tracing)" : "");
+       bootstage_mark_name(BOOTSTAGE_ID_BOOTM_HANDOFF, "start_kernel");
+
+       cleanup_before_linux();
+
+       if (IMAGE_ENABLE_OF_LIBFDT && images->ft_len) {
+               r0 = 2;
+               r2 = (unsigned int)images->ft_addr;
+       } else {
+               r0 = 1;
+               r2 = (unsigned int)getenv("bootargs");
+       }
+
+       if (!fake)
+               kernel_entry(r0, 0, r2);
+}
+
+int do_bootm_linux(int flag, int argc, char *argv[], bootm_headers_t *images)
+{
+       /* No need for those on ARC */
+       if ((flag & BOOTM_STATE_OS_BD_T) || (flag & BOOTM_STATE_OS_CMDLINE))
+               return -1;
+
+       if (flag & BOOTM_STATE_OS_PREP) {
+               boot_prep_linux(images);
+               return 0;
+       }
+
+       if (flag & (BOOTM_STATE_OS_GO | BOOTM_STATE_OS_FAKE_GO)) {
+               boot_jump_linux(images, flag);
+               return 0;
+       }
+
+       boot_prep_linux(images);
+       boot_jump_linux(images, flag);
+       return 0;
+}
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
new file mode 100644
index 0000000..c47a271
--- /dev/null
+++ b/arch/arc/lib/memcmp.S
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm/linkage.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define WORD2 r2
+#define SHIFT r3
+#else /* BIG ENDIAN */
+#define WORD2 r3
+#define SHIFT r2
+#endif
+
+ARC_ENTRY memcmp
+       or      r12,r0,r1
+       asl_s   r12,r12,30
+       sub     r3,r2,1
+       brls    r2,r12,.Lbytewise
+       ld      r4,[r0,0]
+       ld      r5,[r1,0]
+       lsr.f   lp_count,r3,3
+       lpne    .Loop_end
+       ld_s    WORD2,[r0,4]
+       ld_s    r12,[r1,4]
+       brne    r4,r5,.Leven
+       ld.a    r4,[r0,8]
+       ld.a    r5,[r1,8]
+       brne    WORD2,r12,.Lodd
+.Loop_end:
+       asl_s   SHIFT,SHIFT,3
+       bhs_s   .Last_cmp
+       brne    r4,r5,.Leven
+       ld      r4,[r0,4]
+       ld      r5,[r1,4]
+#ifdef __LITTLE_ENDIAN__
+       nop_s
+       ; one more load latency cycle
+.Last_cmp:
+       xor     r0,r4,r5
+       bset    r0,r0,SHIFT
+       sub_s   r1,r0,1
+       bic_s   r1,r1,r0
+       norm    r1,r1
+       b.d     .Leven_cmp
+       and     r1,r1,24
+.Leven:
+       xor     r0,r4,r5
+       sub_s   r1,r0,1
+       bic_s   r1,r1,r0
+       norm    r1,r1
+       ; slow track insn
+       and     r1,r1,24
+.Leven_cmp:
+       asl     r2,r4,r1
+       asl     r12,r5,r1
+       lsr_s   r2,r2,1
+       lsr_s   r12,r12,1
+       j_s.d   [blink]
+       sub     r0,r2,r12
+       .balign 4
+.Lodd:
+       xor     r0,WORD2,r12
+       sub_s   r1,r0,1
+       bic_s   r1,r1,r0
+       norm    r1,r1
+       ; slow track insn
+       and     r1,r1,24
+       asl_s   r2,r2,r1
+       asl_s   r12,r12,r1
+       lsr_s   r2,r2,1
+       lsr_s   r12,r12,1
+       j_s.d   [blink]
+       sub     r0,r2,r12
+#else /* BIG ENDIAN */
+.Last_cmp:
+       neg_s   SHIFT,SHIFT
+       lsr     r4,r4,SHIFT
+       lsr     r5,r5,SHIFT
+       ; slow track insn
+.Leven:
+       sub.f   r0,r4,r5
+       mov.ne  r0,1
+       j_s.d   [blink]
+       bset.cs r0,r0,31
+.Lodd:
+       cmp_s   WORD2,r12
+
+       mov_s   r0,1
+       j_s.d   [blink]
+       bset.cs r0,r0,31
+#endif /* ENDIAN */
+       .balign 4
+.Lbytewise:
+       breq    r2,0,.Lnil
+       ldb     r4,[r0,0]
+       ldb     r5,[r1,0]
+       lsr.f   lp_count,r3
+       lpne    .Lbyte_end
+       ldb_s   r3,[r0,1]
+       ldb     r12,[r1,1]
+       brne    r4,r5,.Lbyte_even
+       ldb.a   r4,[r0,2]
+       ldb.a   r5,[r1,2]
+       brne    r3,r12,.Lbyte_odd
+.Lbyte_end:
+       bcc     .Lbyte_even
+       brne    r4,r5,.Lbyte_even
+       ldb_s   r3,[r0,1]
+       ldb_s   r12,[r1,1]
+.Lbyte_odd:
+       j_s.d   [blink]
+       sub     r0,r3,r12
+.Lbyte_even:
+       j_s.d   [blink]
+       sub     r0,r4,r5
+.Lnil:
+       j_s.d   [blink]
+       mov     r0,0
+ARC_EXIT memcmp
diff --git a/arch/arc/lib/memcpy-700.S b/arch/arc/lib/memcpy-700.S
new file mode 100644
index 0000000..b5f6151
--- /dev/null
+++ b/arch/arc/lib/memcpy-700.S
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY memcpy
+       or      r3,r0,r1
+       asl_s   r3,r3,30
+       mov_s   r5,r0
+       brls.d  r2,r3,.Lcopy_bytewise
+       sub.f   r3,r2,1
+       ld_s    r12,[r1,0]
+       asr.f   lp_count,r3,3
+       bbit0.d r3,2,.Lnox4
+       bmsk_s  r2,r2,1
+       st.ab   r12,[r5,4]
+       ld.a    r12,[r1,4]
+.Lnox4:
+       lppnz   .Lendloop
+       ld_s    r3,[r1,4]
+       st.ab   r12,[r5,4]
+       ld.a    r12,[r1,8]
+       st.ab   r3,[r5,4]
+.Lendloop:
+       breq    r2,0,.Last_store
+       ld      r3,[r5,0]
+#ifdef __LITTLE_ENDIAN__
+       add3    r2,-1,r2
+       ; uses long immediate
+       xor_s   r12,r12,r3
+       bmsk    r12,r12,r2
+    xor_s      r12,r12,r3
+#else /* BIG ENDIAN */
+       sub3    r2,31,r2
+       ; uses long immediate
+        xor_s  r3,r3,r12
+        bmsk   r3,r3,r2
+        xor_s  r12,r12,r3
+#endif /* ENDIAN */
+.Last_store:
+       j_s.d   [blink]
+       st      r12,[r5,0]
+
+       .balign 4
+.Lcopy_bytewise:
+       jcs     [blink]
+       ldb_s   r12,[r1,0]
+       lsr.f   lp_count,r3
+       bhs_s   .Lnox1
+       stb.ab  r12,[r5,1]
+       ldb.a   r12,[r1,1]
+.Lnox1:
+       lppnz   .Lendbloop
+       ldb_s   r3,[r1,1]
+       stb.ab  r12,[r5,1]
+       ldb.a   r12,[r1,2]
+       stb.ab  r3,[r5,1]
+.Lendbloop:
+       j_s.d   [blink]
+       stb     r12,[r5,0]
+ARC_EXIT memcpy
diff --git a/arch/arc/lib/memset.S b/arch/arc/lib/memset.S
new file mode 100644
index 0000000..9b2d88d
--- /dev/null
+++ b/arch/arc/lib/memset.S
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+#define SMALL  7 /* Must be at least 6 to deal with alignment/loop issues.  */
+
+ARC_ENTRY memset
+       mov_s   r4,r0
+       or      r12,r0,r2
+       bmsk.f  r12,r12,1
+       extb_s  r1,r1
+       asl     r3,r1,8
+       beq.d   .Laligned
+       or_s    r1,r1,r3
+       brls    r2,SMALL,.Ltiny
+       add     r3,r2,r0
+       stb     r1,[r3,-1]
+       bclr_s  r3,r3,0
+       stw     r1,[r3,-2]
+       bmsk.f  r12,r0,1
+       add_s   r2,r2,r12
+       sub.ne  r2,r2,4
+       stb.ab  r1,[r4,1]
+       and     r4,r4,-2
+       stw.ab  r1,[r4,2]
+       and     r4,r4,-4
+.Laligned:     ; This code address should be aligned for speed.
+       asl     r3,r1,16
+       lsr.f   lp_count,r2,2
+       or_s    r1,r1,r3
+       lpne    .Loop_end
+       st.ab   r1,[r4,4]
+.Loop_end:
+       j_s     [blink]
+
+       .balign 4
+.Ltiny:
+       mov.f   lp_count,r2
+       lpne    .Ltiny_end
+       stb.ab  r1,[r4,1]
+.Ltiny_end:
+       j_s     [blink]
+ARC_EXIT memset
+
+; memzero: @r0 = mem, @r1 = size_t
+; memset:  @r0 = mem, @r1 = char, @r2 = size_t
+
+ARC_ENTRY memzero
+    ; adjust bzero args to memset args
+    mov r2, r1
+    mov r1, 0
+    b  memset    ;tail call so need to tinker with blink
+ARC_EXIT memzero
diff --git a/arch/arc/lib/relocate.c b/arch/arc/lib/relocate.c
new file mode 100644
index 0000000..710b792
--- /dev/null
+++ b/arch/arc/lib/relocate.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+#include <common.h>
+#include <elf.h>
+#include <asm/sections.h>
+
+DECLARE_GLOBAL_DATA_PTR;
+
+extern char __text_end[];
+
+/*
+ * Base functionality is taken from x86 version with added ARC-specifics
+ */
+int do_elf_reloc_fixups(void)
+{
+       Elf32_Rela *re_src = (Elf32_Rela *)(&__rel_dyn_start);
+       Elf32_Rela *re_end = (Elf32_Rela *)(&__rel_dyn_end);
+
+       Elf32_Addr *offset_ptr_rom, *last_offset = NULL;
+       Elf32_Addr *offset_ptr_ram;
+
+       do {
+               /* Get the location from the relocation entry */
+               offset_ptr_rom = (Elf32_Addr *)re_src->r_offset;
+
+               /* Check that the location of the relocation is in .text */
+               if (offset_ptr_rom >= (Elf32_Addr *)CONFIG_SYS_TEXT_BASE &&
+                   offset_ptr_rom > last_offset) {
+                       unsigned int val;
+                       /* Switch to the in-RAM version */
+                       offset_ptr_ram = (Elf32_Addr *)((ulong)offset_ptr_rom +
+                                                       gd->reloc_off);
+
+                       /*
+                        * Use "memcpy" because target location might be
+                        * 16-bit aligned on ARC so we may need to read
+                        * byte-by-byte. On attempt to read entire word by
+                        * CPU throws an exception
+                        */
+                       memcpy(&val, offset_ptr_ram, sizeof(int));
+
+                       /* If location in ".text" section swap value */
+                       if ((unsigned int)offset_ptr_rom <
+                           (unsigned int)&__text_end)
+                               val = (val << 16) | (val >> 16);
+
+                       /* Check that the target points into .text */
+                       if (val >= CONFIG_SYS_TEXT_BASE && val <=
+                           (unsigned int)&__bss_end) {
+                               val += gd->reloc_off;
+                               /* If location in ".text" section swap value */
+                               if ((unsigned int)offset_ptr_rom <
+                                   (unsigned int)&__text_end)
+                                       val = (val << 16) | (val >> 16);
+                               memcpy(offset_ptr_ram, &val, sizeof(int));
+                       } else {
+                               debug("   %p: rom reloc %x, ram %p, value %x, 
limit %x\n",
+                                     re_src, re_src->r_offset, offset_ptr_ram,
+                                     val, (unsigned int)&__bss_end);
+                       }
+               } else {
+                       debug("   %p: rom reloc %x, last %p\n", re_src,
+                             re_src->r_offset, last_offset);
+               }
+               last_offset = offset_ptr_rom;
+
+       } while (++re_src < re_end);
+
+       return 0;
+}
diff --git a/arch/arc/lib/sections.c b/arch/arc/lib/sections.c
new file mode 100644
index 0000000..b0b46a4
--- /dev/null
+++ b/arch/arc/lib/sections.c
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013-2014 Synopsys, Inc. All rights reserved.
+ *
+ * SPDX-License-Identifier:    GPL-2.0+
+ */
+
+/*
+ * For some reason linker sets linker-generated symbols to zero in PIE mode.
+ * A work-around is substitution of linker-generated symbols with
+ * compiler-generated symbols which are properly handled by linker in PAE mode.
+ */
+
+char __bss_start[0] __attribute__((section(".__bss_start")));
+char __bss_end[0] __attribute__((section(".__bss_end")));
+char __image_copy_start[0] __attribute__((section(".__image_copy_start")));
+char __image_copy_end[0] __attribute__((section(".__image_copy_end")));
+char __rel_dyn_start[0] __attribute__((section(".__rel_dyn_start")));
+char __rel_dyn_end[0] __attribute__((section(".__rel_dyn_end")));
+char __text_start[0] __attribute__((section(".__text_start")));
+char __text_end[0] __attribute__((section(".__text_end")));
+char __init_end[0] __attribute__((section(".__init_end")));
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
new file mode 100644
index 0000000..9c548c7
--- /dev/null
+++ b/arch/arc/lib/strchr-700.S
@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* ARC700 has a relatively long pipeline and branch prediction, so we want
+   to avoid branches that are hard to predict.  On the other hand, the
+   presence of the norm instruction makes it easier to operate on whole
+   words branch-free.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strchr
+       extb_s  r1,r1
+       asl     r5,r1,8
+       bmsk    r2,r0,1
+       or      r5,r5,r1
+       mov_s   r3,0x01010101
+       breq.d  r2,r0,.Laligned
+       asl     r4,r5,16
+       sub_s   r0,r0,r2
+       asl     r7,r2,3
+       ld_s    r2,[r0]
+#ifdef __LITTLE_ENDIAN__
+       asl     r7,r3,r7
+#else
+       lsr     r7,r3,r7
+#endif
+       or      r5,r5,r4
+       ror     r4,r3
+       sub     r12,r2,r7
+       bic_s   r12,r12,r2
+       and     r12,r12,r4
+       brne.d  r12,0,.Lfound0_ua
+       xor     r6,r2,r5
+       ld.a    r2,[r0,4]
+       sub     r12,r6,r7
+       bic     r12,r12,r6
+#ifdef __LITTLE_ENDIAN__
+       and     r7,r12,r4
+       breq    r7,0,.Loop ; For speed, we want this branch to be unaligned.
+       b       .Lfound_char ; Likewise this one.
+#else
+       and     r12,r12,r4
+       breq    r12,0,.Loop ; For speed, we want this branch to be unaligned.
+       lsr_s   r12,r12,7
+       bic     r2,r7,r6
+       b.d     .Lfound_char_b
+       and_s   r2,r2,r12
+#endif
+; /* We require this code address to be unaligned for speed...  */
+.Laligned:
+       ld_s    r2,[r0]
+       or      r5,r5,r4
+       ror     r4,r3
+; /* ... so that this code address is aligned, for itself and ...  */
+.Loop:
+       sub     r12,r2,r3
+       bic_s   r12,r12,r2
+       and     r12,r12,r4
+       brne.d  r12,0,.Lfound0
+       xor     r6,r2,r5
+       ld.a    r2,[r0,4]
+       sub     r12,r6,r3
+       bic     r12,r12,r6
+       and     r7,r12,r4
+       breq    r7,0,.Loop /* ... so that this branch is unaligned.  */
+       ; Found searched-for character.  r0 has already advanced to next word.
+#ifdef __LITTLE_ENDIAN__
+/* We only need the information about the first matching byte
+   (i.e. the least significant matching byte) to be exact,
+   hence there is no problem with carry effects.  */
+.Lfound_char:
+       sub     r3,r7,1
+       bic     r3,r3,r7
+       norm    r2,r3
+       sub_s   r0,r0,1
+       asr_s   r2,r2,3
+       j.d     [blink]
+       sub_s   r0,r0,r2
+
+       .balign 4
+.Lfound0_ua:
+       mov     r3,r7
+.Lfound0:
+       sub     r3,r6,r3
+       bic     r3,r3,r6
+       and     r2,r3,r4
+       or_s    r12,r12,r2
+       sub_s   r3,r12,1
+       bic_s   r3,r3,r12
+       norm    r3,r3
+       add_s   r0,r0,3
+       asr_s   r12,r3,3
+       asl.f   0,r2,r3
+       sub_s   r0,r0,r12
+       j_s.d   [blink]
+       mov.pl  r0,0
+#else /* BIG ENDIAN */
+.Lfound_char:
+       lsr     r7,r7,7
+
+       bic     r2,r7,r6
+.Lfound_char_b:
+       norm    r2,r2
+       sub_s   r0,r0,4
+       asr_s   r2,r2,3
+       j.d     [blink]
+       add_s   r0,r0,r2
+
+.Lfound0_ua:
+       mov_s   r3,r7
+.Lfound0:
+       asl_s   r2,r2,7
+       or      r7,r6,r4
+       bic_s   r12,r12,r2
+       sub     r2,r7,r3
+       or      r2,r2,r6
+       bic     r12,r2,r12
+       bic.f   r3,r4,r12
+       norm    r3,r3
+
+       add.pl  r3,r3,1
+       asr_s   r12,r3,3
+       asl.f   0,r2,r3
+       add_s   r0,r0,r12
+       j_s.d   [blink]
+       mov.mi  r0,0
+#endif /* ENDIAN */
+ARC_EXIT strchr
diff --git a/arch/arc/lib/strcmp.S b/arch/arc/lib/strcmp.S
new file mode 100644
index 0000000..5dc802b
--- /dev/null
+++ b/arch/arc/lib/strcmp.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* This is optimized primarily for the ARC700.
+   It would be possible to speed up the loops by one cycle / word
+   respective one cycle / byte by forcing double source 1 alignment, unrolling
+   by a factor of two, and speculatively loading the second word / byte of
+   source 1; however, that would increase the overhead for loop setup / finish,
+   and strcmp might often terminate early.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcmp
+       or      r2,r0,r1
+       bmsk_s  r2,r2,1
+       brne    r2,0,.Lcharloop
+       mov_s   r12,0x01010101
+       ror     r5,r12
+.Lwordloop:
+       ld.ab   r2,[r0,4]
+       ld.ab   r3,[r1,4]
+       nop_s
+       sub     r4,r2,r12
+       bic     r4,r4,r2
+       and     r4,r4,r5
+       brne    r4,0,.Lfound0
+       breq    r2,r3,.Lwordloop
+#ifdef __LITTLE_ENDIAN__
+       xor     r0,r2,r3        ; mask for difference
+       sub_s   r1,r0,1
+       bic_s   r0,r0,r1        ; mask for least significant difference bit
+       sub     r1,r5,r0
+       xor     r0,r5,r1        ; mask for least significant difference byte
+       and_s   r2,r2,r0
+       and_s   r3,r3,r0
+#endif /* LITTLE ENDIAN */
+       cmp_s   r2,r3
+       mov_s   r0,1
+       j_s.d   [blink]
+       bset.lo r0,r0,31
+
+       .balign 4
+#ifdef __LITTLE_ENDIAN__
+.Lfound0:
+       xor     r0,r2,r3        ; mask for difference
+       or      r0,r0,r4        ; or in zero indicator
+       sub_s   r1,r0,1
+       bic_s   r0,r0,r1        ; mask for least significant difference bit
+       sub     r1,r5,r0
+       xor     r0,r5,r1        ; mask for least significant difference byte
+       and_s   r2,r2,r0
+       and_s   r3,r3,r0
+       sub.f   r0,r2,r3
+       mov.hi  r0,1
+       j_s.d   [blink]
+       bset.lo r0,r0,31
+#else /* BIG ENDIAN */
+       /* The zero-detection above can mis-detect 0x01 bytes as zeroes
+          because of carry-propagateion from a lower significant zero byte.
+          We can compensate for this by checking that bit0 is zero.
+          This compensation is not necessary in the step where we
+          get a low estimate for r2, because in any affected bytes
+          we already have 0x00 or 0x01, which will remain unchanged
+          when bit 7 is cleared.  */
+       .balign 4
+.Lfound0:
+       lsr     r0,r4,8
+       lsr_s   r1,r2
+       bic_s   r2,r2,r0        ; get low estimate for r2 and get ...
+       bic_s   r0,r0,r1        ; <this is the adjusted mask for zeros>
+       or_s    r3,r3,r0        ; ... high estimate r3 so that r2 > r3 will ...
+       cmp_s   r3,r2           ; ... be independent of trailing garbage
+       or_s    r2,r2,r0        ; likewise for r3 > r2
+       bic_s   r3,r3,r0
+       rlc     r0,0            ; r0 := r2 > r3 ? 1 : 0
+       cmp_s   r2,r3
+       j_s.d   [blink]
+       bset.lo r0,r0,31
+#endif /* ENDIAN */
+
+       .balign 4
+.Lcharloop:
+       ldb.ab  r2,[r0,1]
+       ldb.ab  r3,[r1,1]
+       nop_s
+       breq    r2,0,.Lcmpend
+       breq    r2,r3,.Lcharloop
+.Lcmpend:
+       j_s.d   [blink]
+       sub     r0,r2,r3
+ARC_EXIT strcmp
diff --git a/arch/arc/lib/strcpy-700.S b/arch/arc/lib/strcpy-700.S
new file mode 100644
index 0000000..b7ca4ae
--- /dev/null
+++ b/arch/arc/lib/strcpy-700.S
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/* If dst and src are 4 byte aligned, copy 8 bytes at a time.
+   If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
+   it 8 byte aligned.  Thus, we can do a little read-ahead, without
+   dereferencing a cache line that we should not touch.
+   Note that short and long instructions have been scheduled to avoid
+   branch stalls.
+   The beq_s to r3z could be made unaligned & long to avoid a stall
+   there, but the it is not likely to be taken often, and it
+   would also be likey to cost an unaligned mispredict at the next call.  */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strcpy
+       or      r2,r0,r1
+       bmsk_s  r2,r2,1
+       brne.d  r2,0,charloop
+       mov_s   r10,r0
+       ld_s    r3,[r1,0]
+       mov     r8,0x01010101
+       bbit0.d r1,2,loop_start
+       ror     r12,r8
+       sub     r2,r3,r8
+       bic_s   r2,r2,r3
+       tst_s   r2,r12
+       bne     r3z
+       mov_s   r4,r3
+       .balign 4
+loop:
+       ld.a    r3,[r1,4]
+       st.ab   r4,[r10,4]
+loop_start:
+       ld.a    r4,[r1,4]
+       sub     r2,r3,r8
+       bic_s   r2,r2,r3
+       tst_s   r2,r12
+       bne_s   r3z
+       st.ab   r3,[r10,4]
+       sub     r2,r4,r8
+       bic     r2,r2,r4
+       tst     r2,r12
+       beq     loop
+       mov_s   r3,r4
+#ifdef __LITTLE_ENDIAN__
+r3z:   bmsk.f  r1,r3,7
+       lsr_s   r3,r3,8
+#else
+r3z:   lsr.f   r1,r3,24
+       asl_s   r3,r3,8
+#endif
+       bne.d   r3z
+       stb.ab  r1,[r10,1]
+       j_s     [blink]
+
+       .balign 4
+charloop:
+       ldb.ab  r3,[r1,1]
+
+
+       brne.d  r3,0,charloop
+       stb.ab  r3,[r10,1]
+       j       [blink]
+ARC_EXIT strcpy
diff --git a/arch/arc/lib/strlen.S b/arch/arc/lib/strlen.S
new file mode 100644
index 0000000..39759e0
--- /dev/null
+++ b/arch/arc/lib/strlen.S
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/linkage.h>
+
+ARC_ENTRY strlen
+       or      r3,r0,7
+       ld      r2,[r3,-7]
+       ld.a    r6,[r3,-3]
+       mov     r4,0x01010101
+       ; uses long immediate
+#ifdef __LITTLE_ENDIAN__
+       asl_s   r1,r0,3
+       btst_s  r0,2
+       asl     r7,r4,r1
+       ror     r5,r4
+       sub     r1,r2,r7
+       bic_s   r1,r1,r2
+       mov.eq  r7,r4
+       sub     r12,r6,r7
+       bic     r12,r12,r6
+       or.eq   r12,r12,r1
+       and     r12,r12,r5
+       brne    r12,0,.Learly_end
+#else /* BIG ENDIAN */
+       ror     r5,r4
+       btst_s  r0,2
+       mov_s   r1,31
+       sub3    r7,r1,r0
+       sub     r1,r2,r4
+       bic_s   r1,r1,r2
+       bmsk    r1,r1,r7
+       sub     r12,r6,r4
+       bic     r12,r12,r6
+       bmsk.ne r12,r12,r7
+       or.eq   r12,r12,r1
+       and     r12,r12,r5
+       brne    r12,0,.Learly_end
+#endif /* ENDIAN */
+
+.Loop:
+       ld_s    r2,[r3,4]
+       ld.a    r6,[r3,8]
+       ; stall for load result
+       sub     r1,r2,r4
+       bic_s   r1,r1,r2
+       sub     r12,r6,r4
+       bic     r12,r12,r6
+       or      r12,r12,r1
+       and     r12,r12,r5
+       breq r12,0,.Loop
+.Lend:
+       and.f   r1,r1,r5
+       sub.ne  r3,r3,4
+       mov.eq  r1,r12
+#ifdef __LITTLE_ENDIAN__
+       sub_s   r2,r1,1
+       bic_s   r2,r2,r1
+       norm    r1,r2
+       sub_s   r0,r0,3
+       lsr_s   r1,r1,3
+       sub         r0,r3,r0
+       j_s.d   [blink]
+       sub         r0,r0,r1
+#else /* BIG ENDIAN */
+       lsr_s   r1,r1,7
+       mov.eq  r2,r6
+       bic_s   r1,r1,r2
+       norm    r1,r1
+       sub         r0,r3,r0
+       lsr_s   r1,r1,3
+       j_s.d   [blink]
+       add         r0,r0,r1
+#endif /* ENDIAN */
+.Learly_end:
+       b.d     .Lend
+       sub_s.ne r1,r1,r1
+ARC_EXIT strlen
-- 
1.8.5.3

_______________________________________________
U-Boot mailing list
U-Boot@lists.denx.de
http://lists.denx.de/mailman/listinfo/u-boot

Reply via email to