On 8/25/23 03:20, Yeqi Fu wrote:
This commit implements a shared library, where native functions are
rewritten as special instructions. At runtime, user programs load
the shared library, and special instructions are executed when
native functions are called.
Signed-off-by: Yeqi Fu <fufuyqqq...@gmail.com>
...
diff --git a/common-user/native/libnative.S b/common-user/native/libnative.S
new file mode 100644
index 0000000000..3692eaa3cf
--- /dev/null
+++ b/common-user/native/libnative.S
@@ -0,0 +1,69 @@
+#if defined(i386) || defined(x86_64)
+/*
+ * An unused instruction is utilized to mark a native call.
+ */
+#define __SPECIAL_INSTR .byte 0x0f, 0xff;
+#define __RET_INSTR ret;
+#endif
+
+#if defined(arm) || defined(aarch64)
+/*
+ * HLT is an invalid instruction for userspace programs,
+ * and is used to mark a native call.
+ */
+#define __SPECIAL_INSTR hlt 0xffff;
+#if defined(aarch64)
+#define __RET_INSTR ret;
+#else
+#define __RET_INSTR bx lr;
+#endif
+#endif
+
+
+#if defined(mips) || defined(mips64)
+/*
+ * The syscall instruction contains 20 unused bits, which are typically
+ * set to 0. These bits can be used to store non-zero data,
+ * distinguishing them from a regular syscall instruction.
+ */
+#define __SPECIAL_INSTR syscall 0xffff;
+#define __RET_INSTR jr $ra;
+#endif
+
+/* Symbols of native functions */
+.section .data
+sym_memset: .asciz "memset"
+sym_memcpy: .asciz "memcpy"
+sym_strncpy: .asciz "strncpy"
+sym_memcmp: .asciz "memcmp"
+sym_strncmp: .asciz "strncmp"
+sym_strcpy: .asciz "strcpy"
+sym_strcat: .asciz "strcat"
+sym_strcmp: .asciz "strcmp"
+
+.macro define_function name
+\name:
+#if defined(x86_64) || defined(aarch64)
+ __SPECIAL_INSTR
+ .quad sym_\name
+ __RET_INSTR
+#elif defined(mips64)
+.align 4
+ __SPECIAL_INSTR
+ .quad sym_\name
+ __RET_INSTR
+#elif defined(i386) || defined(mips) || defined(arm)
+ __SPECIAL_INSTR
+ .long sym_\name
+ __RET_INSTR
+#endif
+.endm
+
+define_function memcpy
+define_function strncpy
+define_function memset
+define_function memcmp
+define_function strncmp
+define_function strcpy
+define_function strcat
+define_function strcmp
This cannot possibly work, since none of the symbols are marked .globl, and are therefore
not exported from your libnative.so.
Furthermore, you placed your strings in .data, but then failed to change back to .text, so
none of the instructions are in an executable load segment.
I conclude that your testing succeeded only because no library calls were
replaced.
This is not sufficient testing.
In review of previous versions, I have mentioned that the x86 UD0 instruction has more
bytes than simply 0x0f 0xff -- at minimum 3 -- and moreover can be used in the assembler
to produce pc-relative values.
We can clean up the assembly as follows.
r~
-----
.macro special_instr sym
#if defined(__i386__)
ud0 \sym-1f, %eax; 1:
#elif defined(__x86_64__)
ud0 \sym(%rip), %eax
#elif defined(__arm__) || defined(__aarch64__)
hlt 0xffff
1: .word \sym - 1b
#elif defined(__mips__)
syscall 0xffff
1: .word \sym - 1b
#else
# error
#endif
.endm
.macro ret_instr
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
ret
#elif defined(__arm__)
bx lr
#elif defined(__mips__)
jr $ra
#else
# error
#endif
.endm
/* Symbols of native functions */
.macro define_function name
.text
\name:
special_instr 9f
ret_instr
.globl \name
.type \name, %function
.size \name, . - \name
.section .rodata
9: .asciz "\name"
.endm
define_function memcmp
define_function memcpy
define_function memset
define_function strcat
define_function strcmp
define_function strcpy
define_function strncmp
define_function strncpy