tlively created this revision.
tlively added reviewers: aheejin, sunfish.
Herald added subscribers: cfe-commits, jgravelle-google, sbc100, mgorny, 
dschuff.
Herald added a project: clang.
tlively updated this revision to Diff 253249.
tlively added a comment.

- Update license to match xmmintrin.h format


As the WebAssembly SIMD proposal nears stabilization, there is desire
to use it with toolchains other than Emscripten. Moving the intrinsics
header to clang will make it available to WASI toolchains as well.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D76959

Files:
  clang/lib/Headers/CMakeLists.txt
  clang/lib/Headers/wasm_simd128.h

Index: clang/lib/Headers/wasm_simd128.h
===================================================================
--- /dev/null
+++ clang/lib/Headers/wasm_simd128.h
@@ -0,0 +1,1240 @@
+/*===---- wasm_simd128.h - WebAssembly portable SIMD intrinsics ------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+// User-facing type
+typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16)));
+
+// Internal types determined by clang builtin definitions
+typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1)));
+typedef char __i8x16 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned char __u8x16
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef short __i16x8 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned short __u16x8
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef int __i32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned int __u32x4
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned long long __u64x2
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("simd128"),        \
+                 __min_vector_width__(128)))
+
+#define __REQUIRE_CONSTANT(e)                                                  \
+  _Static_assert(__builtin_constant_p(e), "Expected constant")
+
+// v128 wasm_v128_load(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void *__mem) {
+  // UB-free unaligned access copied from xmmintrin.h
+  struct __wasm_v128_load_struct {
+    __v128_u __v;
+  } __attribute__((__packed__, __may_alias__));
+  return ((const struct __wasm_v128_load_struct *)__mem)->__v;
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_v8x16_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v8x16_load_splat(const void *__mem) {
+  struct __wasm_v8x16_load_splat_struct {
+    char __v;
+  } __attribute__((__packed__, __may_alias__));
+  char v = ((const struct __wasm_v8x16_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i8x16){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
+}
+
+// v128_t wasm_v16x8_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v16x8_load_splat(const void *__mem) {
+  struct __wasm_v16x8_load_splat_struct {
+    short __v;
+  } __attribute__((__packed__, __may_alias__));
+  short v = ((const struct __wasm_v16x8_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i16x8){v, v, v, v, v, v, v, v};
+}
+
+// v128_t wasm_v32x4_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v32x4_load_splat(const void *__mem) {
+  struct __wasm_v32x4_load_splat_struct {
+    int __v;
+  } __attribute__((__packed__, __may_alias__));
+  int v = ((const struct __wasm_v32x4_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i32x4){v, v, v, v};
+}
+
+// v128_t wasm_v64x2_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v64x2_load_splat(const void *__mem) {
+  struct __wasm_v64x2_load_splat_struct {
+    long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  long long v = ((const struct __wasm_v64x2_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i64x2){v, v};
+}
+
+// v128_t wasm_i16x8_load_8x8(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_load_8x8(const void *__mem) {
+  typedef signed char __i8x8
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_i16x8_load_8x8_struct {
+    __i8x8 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __i8x8 v = ((const struct __wasm_i16x8_load_8x8_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __i16x8);
+}
+
+// v128_t wasm_i16x8_load_8x8(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u16x8_load_8x8(const void *__mem) {
+  typedef unsigned char __u8x8
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_u16x8_load_8x8_struct {
+    __u8x8 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __u8x8 v = ((const struct __wasm_u16x8_load_8x8_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __u16x8);
+}
+
+// v128_t wasm_i32x4_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_load_16x4(const void *__mem) {
+  typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_i32x4_load_16x4_struct {
+    __i16x4 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __i16x4 v = ((const struct __wasm_i32x4_load_16x4_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __i32x4);
+}
+
+// v128_t wasm_i32x4_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u32x4_load_16x4(const void *__mem) {
+  typedef unsigned short __u16x4
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_u32x4_load_16x4_struct {
+    __u16x4 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __u16x4 v = ((const struct __wasm_u32x4_load_16x4_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __u32x4);
+}
+
+// v128_t wasm_i64x2_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i64x2_load_32x2(const void *__mem) {
+  typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_i64x2_load_32x2_struct {
+    __i32x2 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __i32x2 v = ((const struct __wasm_i64x2_load_32x2_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __i64x2);
+}
+
+// v128_t wasm_i64x2_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u64x2_load_32x2(const void *__mem) {
+  typedef unsigned int __u32x2
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_u64x2_load_32x2_struct {
+    __u32x2 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __u32x2 v = ((const struct __wasm_u64x2_load_32x2_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __u64x2);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// wasm_v128_store(void* mem, v128 a)
+static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void *__mem,
+                                                          v128_t __a) {
+  // UB-free unaligned access copied from xmmintrin.h
+  struct __wasm_v128_store_struct {
+    __v128_u __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __wasm_v128_store_struct *)__mem)->__v = __a;
+}
+
+// wasm_i8x16_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_make(
+    int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6,
+    int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12,
+    int8_t c13, int8_t c14, int8_t c15) {
+  return (v128_t)(__i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
+                           c8, c9, c10, c11, c12, c13, c14, c15};
+}
+
+// wasm_i16x8_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4,
+                int16_t c5, int16_t c6, int16_t c7) {
+  return (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};
+}
+
+// wasm_i32x4_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t c0,
+                                                            int32_t c1,
+                                                            int32_t c2,
+                                                            int32_t c3) {
+  return (v128_t)(__i32x4){c0, c1, c2, c3};
+}
+
+// wasm_f32x4_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float c0, float c1,
+                                                            float c2,
+                                                            float c3) {
+  return (v128_t)(__f32x4){c0, c1, c2, c3};
+}
+
+// wasm_i64x2_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t c0,
+                                                            int64_t c1) {
+  return (v128_t)(__i64x2){c0, c1};
+}
+
+// wasm_f64x2_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_make(double c0,
+                                                            double c1) {
+  return (v128_t)(__f64x2){c0, c1};
+}
+
+// v128_t wasm_i8x16_constant(...)
+#define wasm_i8x16_const(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11,     \
+                         c12, c13, c14, c15)                                   \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    __REQUIRE_CONSTANT(c4);                                                    \
+    __REQUIRE_CONSTANT(c5);                                                    \
+    __REQUIRE_CONSTANT(c6);                                                    \
+    __REQUIRE_CONSTANT(c7);                                                    \
+    __REQUIRE_CONSTANT(c8);                                                    \
+    __REQUIRE_CONSTANT(c9);                                                    \
+    __REQUIRE_CONSTANT(c10);                                                   \
+    __REQUIRE_CONSTANT(c11);                                                   \
+    __REQUIRE_CONSTANT(c12);                                                   \
+    __REQUIRE_CONSTANT(c13);                                                   \
+    __REQUIRE_CONSTANT(c14);                                                   \
+    __REQUIRE_CONSTANT(c15);                                                   \
+    (v128_t)(__i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,                     \
+                      c8, c9, c10, c11, c12, c13, c14, c15};                   \
+  })
+
+// v128_t wasm_i16x8_constant(...)
+#define wasm_i16x8_const(c0, c1, c2, c3, c4, c5, c6, c7)                       \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    __REQUIRE_CONSTANT(c4);                                                    \
+    __REQUIRE_CONSTANT(c5);                                                    \
+    __REQUIRE_CONSTANT(c6);                                                    \
+    __REQUIRE_CONSTANT(c7);                                                    \
+    (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};                         \
+  })
+
+// v128_t wasm_i32x4_const(...)
+#define wasm_i32x4_const(c0, c1, c2, c3)                                       \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    (v128_t)(__i32x4){c0, c1, c2, c3};                                         \
+  })
+
+// v128_t wasm_f32x4_constant(...)
+#define wasm_f32x4_const(c0, c1, c2, c3)                                       \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    (v128_t)(__f32x4){c0, c1, c2, c3};                                         \
+  })
+
+// v128_t wasm_i64x2_constant(...)
+#define wasm_i64x2_const(c0, c1)                                               \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    (v128_t)(__i64x2){c0, c1};                                                 \
+  })
+
+// v128_t wasm_f64x2_constant(...)
+#define wasm_f64x2_const(c0, c1)                                               \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    (v128_t)(__f64x2){c0, c1};                                                 \
+  })
+
+// v128_t wasm_i8x16_splat(int8_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
+  return (v128_t)(__i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
+}
+
+// int8_t wasm_i8x16_extract_lane(v128_t a, imm i)
+#define wasm_i8x16_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(a), i))
+
+// int8_t wasm_u8x16_extract_lane(v128_t a, imm i)
+#define wasm_u8x16_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_u_i8x16((__i8x16)(a), i))
+
+// v128_t wasm_i8x16_replace_lane(v128_t a, imm i, int8_t b)
+#define wasm_i8x16_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i8x16((__i8x16)(a), i, b))
+
+// v128_t wasm_i16x8_splat(int16_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
+  return (v128_t)(__i16x8){a, a, a, a, a, a, a, a};
+}
+
+// int16_t wasm_i16x8_extract_lane(v128_t a, imm i)
+#define wasm_i16x8_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(a), i))
+
+// int16_t wasm_u16x8_extract_lane(v128_t a, imm i)
+#define wasm_u16x8_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_u_i16x8((__i16x8)(a), i))
+
+// v128_t wasm_i16x8_replace_lane(v128_t a, imm i, int16_t b)
+#define wasm_i16x8_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i16x8((__i16x8)(a), i, b))
+
+// v128_t wasm_i32x4_splat(int32_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
+  return (v128_t)(__i32x4){a, a, a, a};
+}
+
+// int32_t wasm_i32x4_extract_lane(v128_t a, imm i)
+#define wasm_i32x4_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_i32x4((__i32x4)(a), i))
+
+// v128_t wasm_i32x4_replace_lane(v128_t a, imm i, int32_t b)
+#define wasm_i32x4_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i32x4((__i32x4)(a), i, b))
+
+// v128_t wasm_i64x2_splat(int64_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
+  return (v128_t)(__i64x2){a, a};
+}
+
+// int64_t wasm_i64x2_extract_lane(v128_t a, imm i)
+#define wasm_i64x2_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_i64x2((__i64x2)(a), i))
+
+// v128_t wasm_i64x2_replace_lane(v128_t a, imm i, int64_t b)
+#define wasm_i64x2_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i64x2((__i64x2)(a), i, b))
+
+// v128_t wasm_f32x4_splat(float a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
+  return (v128_t)(__f32x4){a, a, a, a};
+}
+
+// float wasm_f32x4_extract_lane(v128_t a, imm i)
+#define wasm_f32x4_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_f32x4((__f32x4)(a), i))
+
+// v128_t wasm_f32x4_replace_lane(v128_t a, imm i, float b)
+#define wasm_f32x4_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_f32x4((__f32x4)(a), i, b))
+
+// v128_t wasm_f64x2_splat(double a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
+  return (v128_t)(__f64x2){a, a};
+}
+
+// double __builtin_wasm_extract_lane_f64x2(v128_t a, imm i)
+#define wasm_f64x2_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_f64x2((__f64x2)(a), i))
+
+// v128_t wasm_f64x4_replace_lane(v128_t a, imm i, double b)
+#define wasm_f64x2_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_f64x2((__f64x2)(a), i, b))
+
+// v128_t wasm_i8x16_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a == (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a != (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a < (__i8x16)b);
+}
+
+// v128_t wasm_u8x16_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a < (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a > (__i8x16)b);
+}
+
+// v128_t wasm_u8x16_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a > (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a <= (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a <= (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a >= (__i8x16)b);
+}
+
+// v128_t wasm_u8x16_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a >= (__u8x16)b);
+}
+
+// v128_t wasm_i16x8_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a == (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ne(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a != (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a < (__i16x8)b);
+}
+
+// v128_t wasm_u16x8_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a < (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a > (__i16x8)b);
+}
+
+// v128_t wasm_u16x8_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a > (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_le(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a <= (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_le(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a <= (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a >= (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a >= (__u16x8)b);
+}
+
+// v128_t wasm_i32x4_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a == (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ne(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a != (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a < (__i32x4)b);
+}
+
+// v128_t wasm_u32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a < (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a > (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a > (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a <= (__i32x4)b);
+}
+
+// v128_t wasm_u32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a <= (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a >= (__i32x4)b);
+}
+
+// v128_t wasm_u32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a >= (__u32x4)b);
+}
+
+// v128_t wasm_f32x4_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_eq(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a == (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ne(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a != (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a < (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a > (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a <= (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a >= (__f32x4)b);
+}
+
+// v128_t wasm_f64x2_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_eq(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a == (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ne(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a != (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_lt(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a < (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_gt(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a > (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_le(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a <= (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ge(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a >= (__f64x2)b);
+}
+
+// v128_t wasm_v128_not(v128 a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_not(v128_t a) {
+  return ~a;
+}
+
+// v128_t wasm_v128_and(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_and(v128_t a, v128_t b) {
+  return a & b;
+}
+
+// v128_t wasm_v128_or(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_or(v128_t a, v128_t b) {
+  return a | b;
+}
+
+// v128_t wasm_v128_xor(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_xor(v128_t a, v128_t b) {
+  return a ^ b;
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_v128_andnot(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_andnot(v128_t a,
+                                                             v128_t b) {
+  return a & ~b;
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_v128_bitselect(v128_t a, v128_t b, v128_t mask)
+// `a` is selected for each lane for which `mask` is nonzero.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t a,
+                                                                v128_t b,
+                                                                v128_t mask) {
+  return (v128_t)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b,
+                                          (__i32x4)mask);
+}
+
+// v128_t wasm_i8x16_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i8x16_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_neg(v128_t a) {
+  return (v128_t)(-(__u8x16)a);
+}
+
+// bool wasm_i8x16_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i8x16((__i8x16)a);
+}
+
+// bool wasm_i8x16_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i8x16_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i8x16)a << b);
+}
+
+// v128_t wasm_i8x64_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i8x16)a >> b);
+}
+
+// v128_t wasm_u8x16_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u8x16)a >> b);
+}
+
+// v128_t wasm_i8x16_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a + (__u8x16)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a - (__u8x16)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_mul(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a * (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_min_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_min_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_max_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_max_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_avgr_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_avgr_u(v128_t a,
+                                                              v128_t b) {
+  return (v128_t)__builtin_wasm_avgr_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i16x8_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i16x8_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_neg(v128_t a) {
+  return (v128_t)(-(__u16x8)a);
+}
+
+// bool wasm_i16x8_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i16x8((__i16x8)a);
+}
+
+// bool wasm_i16x8_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i16x8_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i16x8)a << b);
+}
+
+// v128_t wasm_i16x8_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i16x8)a >> b);
+}
+
+// v128_t wasm_u16x8_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u16x8)a >> b);
+}
+
+// v128_t wasm_i16x8_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a + (__u16x8)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a - (__i16x8)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a * (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_min_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_min_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_max_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_max_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_avgr_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_avgr_u(v128_t a,
+                                                              v128_t b) {
+  return (v128_t)__builtin_wasm_avgr_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i32x4_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_i32x4((__i32x4)a);
+}
+
+// v128_t wasm_i32x4_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_neg(v128_t a) {
+  return (v128_t)(-(__u32x4)a);
+}
+
+// bool wasm_i32x4_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i32x4((__i32x4)a);
+}
+
+// bool wasm_i32x4_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i32x4((__i32x4)a);
+}
+
+// v128_t wasm_i32x4_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i32x4)a << b);
+}
+
+// v128_t wasm_i32x4_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i32x4)a >> b);
+}
+
+// v128_t wasm_u32x4_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u32x4)a >> b);
+}
+
+// v128_t wasm_i32x4_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a + (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a - (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a * (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_min_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_s_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_min_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_u_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_max_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_s_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_max_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_u_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i64x2_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t a) {
+  return (v128_t)(-(__u64x2)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// bool wasm_i64x2_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i64x2((__i64x2)a);
+}
+
+// bool wasm_i64x2_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i64x2((__i64x2)a);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_i64x2_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i64x2)a << (int64_t)b);
+}
+
+// v128_t wasm_i64x2_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i64x2)a >> (int64_t)b);
+}
+
+// v128_t wasm_u64x2_shr_u(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u64x2)a >> (int64_t)b);
+}
+
+// v128_t wasm_i64x2_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t a, v128_t b) {
+  return (v128_t)((__u64x2)a + (__u64x2)b);
+}
+
+// v128_t wasm_i64x2_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u64x2)a - (__u64x2)b);
+}
+
+// v128_t  wasm_f32x4_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_f32x4((__f32x4)a);
+}
+
+// v128_t wasm_f32x4_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_neg(v128_t a) {
+  return (v128_t)(-(__f32x4)a);
+}
+
+// v128_t wasm_f32x4_sqrt(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(v128_t a) {
+  return (v128_t)__builtin_wasm_sqrt_f32x4((__f32x4)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_f32x4_qfma(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_qfma(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfma_f32x4((__f32x4)a, (__f32x4)b, (__f32x4)c);
+}
+
+// v128_t wasm_f32x4_qfms(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_qfms(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfms_f32x4((__f32x4)a, (__f32x4)b, (__f32x4)c);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_f32x4_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_add(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a + (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sub(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a - (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_mul(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a * (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_div(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_div(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a / (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_min(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_min(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_min_f32x4((__f32x4)a, (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_max(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_max_f32x4((__f32x4)a, (__f32x4)b);
+}
+
+// v128_t  wasm_f64x2_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)a);
+}
+
+// v128_t wasm_f64x2_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_neg(v128_t a) {
+  return (v128_t)(-(__f64x2)a);
+}
+
+// v128_t  wasm_f64x2_sqrt(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(v128_t a) {
+  return (v128_t)__builtin_wasm_sqrt_f64x2((__f64x2)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_f64x2_qfma(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_qfma(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfma_f64x2((__f64x2)a, (__f64x2)b, (__f64x2)c);
+}
+
+// v128_t wasm_f64x2_qfms(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_qfms(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfms_f64x2((__f64x2)a, (__f64x2)b, (__f64x2)c);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_f64x2_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_add(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a + (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sub(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a - (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_mul(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a * (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_div(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_div(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a / (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_min(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_min(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_min_f64x2((__f64x2)a, (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_max(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_max_f64x2((__f64x2)a, (__f64x2)b);
+}
+
+// v128_t wasm_i32x4_trunc_saturate_f32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_trunc_saturate_f32x4(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)a);
+}
+
+// v128_t wasm_u32x4_trunc_saturate_f32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u32x4_trunc_saturate_f32x4(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_u_i32x4_f32x4((__f32x4)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_i64x2_trunc_saturate_f32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i64x2_trunc_saturate_f64x2(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)a);
+}
+
+// v128_t wasm_u64x2_trunc_saturate_f64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u64x2_trunc_saturate_f64x2(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)a);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_f32x4_convert_i32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_i32x4(v128_t a) {
+  return (v128_t) __builtin_convertvector((__i32x4)a, __f32x4);
+}
+
+// v128_t wasm_f32x4_convert_u32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_u32x4(v128_t a) {
+  return (v128_t) __builtin_convertvector((__u32x4)a, __f32x4);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_f64x2_convert_i64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_i64x2(v128_t a) {
+  return (v128_t) __builtin_convertvector((__i64x2)a, __f64x2);
+}
+
+// v128_t wasm_f64x2_convert_u64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_u64x2(v128_t a) {
+  return (v128_t) __builtin_convertvector((__u64x2)a, __f64x2);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_v8x16_shuffle(v128_t a, v128_t b, c0, ..., c15)
+#define wasm_v8x16_shuffle(a, b, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,  \
+                           c11, c12, c13, c14, c15)                            \
+  ((v128_t)(__builtin_shufflevector((__u8x16)(a), (__u8x16)(b), c0, c1, c2,    \
+                                    c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, \
+                                    c13, c14, c15)))
+
+// v128_t wasm_v16x8_shuffle(v128_t a, v128_t b, c0, ..., c7)
+#define wasm_v16x8_shuffle(a, b, c0, c1, c2, c3, c4, c5, c6, c7)               \
+  ((v128_t)(__builtin_shufflevector((__u16x8)(a), (__u16x8)(b), c0, c1, c2,    \
+                                    c3, c4, c5, c6, c7)))
+
+// v128_t wasm_v32x4_shuffle(v128_t a, v128_t b, c0, ..., c3)
+#define wasm_v32x4_shuffle(a, b, c0, c1, c2, c3)                               \
+  ((v128_t)(                                                                   \
+      __builtin_shufflevector((__u32x4)(a), (__u32x4)(b), c0, c1, c2, c3)))
+
+// v128_t wasm_v64x2_shuffle(v128_t a, v128_t b, c0, c1)
+#define wasm_v64x2_shuffle(a, b, c0, c1)                                       \
+  ((v128_t)(__builtin_shufflevector((__u64x2)(a), (__u64x2)(b), c0, c1)))
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_v8x16_swizzle(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v8x16_swizzle(v128_t a,
+                                                               v128_t b) {
+  return (v128_t)__builtin_wasm_swizzle_v8x16((__i8x16)a, (__i8x16)b);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_i8x16_narrow_i16x8(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_narrow_i16x8(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_s_i8x16_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_u8x16_narrow_i16x8(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_narrow_i16x8(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_u_i8x16_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_narrow_i32x4(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_narrow_i32x4(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_s_i16x8_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_u16x8_narrow_i32x4(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_narrow_i32x4(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_u_i16x8_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i16x8_widen_low_i8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_low_i8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_s_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i16x8_widen_high_i8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_high_i8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_s_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i16x8_widen_low_u8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_low_u8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_u_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i16x8_widen_high_u8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_high_u8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_u_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i32x4_widen_low_i16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_low_i16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_s_i32x4_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i32x4_widen_high_i16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_high_i16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_s_i32x4_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i32x4_widen_low_u16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_low_u16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_u_i32x4_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i32x4_widen_high_u16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_high_u16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_u_i32x4_i16x8((__i16x8)a);
+}
Index: clang/lib/Headers/CMakeLists.txt
===================================================================
--- clang/lib/Headers/CMakeLists.txt
+++ clang/lib/Headers/CMakeLists.txt
@@ -109,6 +109,7 @@
   vecintrin.h
   vpclmulqdqintrin.h
   waitpkgintrin.h
+  wasm_simd128.h
   wbnoinvdintrin.h
   wmmintrin.h
   __wmmintrin_aes.h
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to