On Wednesday 10 March 2010, Laurent GUERBY wrote: > On Wed, 2010-03-10 at 21:54 +0200, Siarhei Siamashka wrote: > > I wonder why the compiler does not use real NEON instructions with > > -ffast-math option, it should be quite useful even for scalar code. > > > > something like: > > > > vld1.32 {d0[0]}, [r0] > > vadd.f32 d0, d0, d0 > > vst1.32 {d0[0]}, [r0] > > > > instead of: > > > > flds s0, [r0] > > fadds s0, s0, s0 > > fsts s0, [r0] > > > > for: > > > > *float_ptr = *float_ptr + *float_ptr; > > > > At least NEON is pipelined and should be a lot faster on more complex > > code examples where it can actually benefit from pipelining. On x86, SSE2 > > is used quite nicely for floating point math. > > Hi, > > Please open a report on http://gcc.gnu.org/bugzilla with your test > sources and command line, at least GCC developpers will notice there's > interest :).
This sounds reasonable :) > GCC comes with some builtins for neon, they're defined in arm_neon.h > see below. This does not sound like a good idea. If the code has to be modified and changed into something nonportable, there are way better options than intrinsics. Regarding the use of NEON instructions via C++ operator overloading. A test program is attached. # gcc -O3 -mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp -ffast-math -o neon_float neon_float.cpp === ieee754 floats === real 0m3.396s user 0m3.391s sys 0m0.000s === runfast floats === real 0m2.285s user 0m2.273s sys 0m0.008s === NEON C++ wrapper === real 0m1.312s user 0m1.313s sys 0m0.000s But the quality of generated code is quite bad. That's also something to be reported to gcc bugzilla :) -- Best regards, Siarhei Siamashka
#include <stdio.h> #include <arm_neon.h> #if 1 class fast_float { float32x2_t data; public: fast_float(float x) { data = vset_lane_f32(x, data, 0); } fast_float(const fast_float &x) { data = x.data; } fast_float(const float32x2_t &x) { data = x; } operator float () { return vget_lane_f32(data, 0); } friend fast_float operator+(const fast_float &a, const fast_float &b); friend fast_float operator*(const fast_float &a, const fast_float &b); const fast_float &operator+=(fast_float a) { data = vadd_f32(data, a.data); return *this; } }; fast_float operator+(const fast_float &a, const fast_float &b) { return vadd_f32(a.data, b.data); } fast_float operator*(const fast_float &a, const fast_float &b) { return vmul_f32(a.data, b.data); } #else typedef float fast_float; #endif float f(float *a, float *b) { int i; fast_float accumulator = 0; for (i = 0; i < 1024; i += 16) { accumulator += (fast_float)a[i + 0] * (fast_float)b[i + 0]; accumulator += (fast_float)a[i + 1] * (fast_float)b[i + 1]; accumulator += (fast_float)a[i + 2] * (fast_float)b[i + 2]; accumulator += (fast_float)a[i + 3] * (fast_float)b[i + 3]; accumulator += (fast_float)a[i + 4] * (fast_float)b[i + 4]; accumulator += (fast_float)a[i + 5] * (fast_float)b[i + 5]; accumulator += (fast_float)a[i + 6] * (fast_float)b[i + 6]; accumulator += (fast_float)a[i + 7] * (fast_float)b[i + 7]; accumulator += (fast_float)a[i + 8] * (fast_float)b[i + 8]; accumulator += (fast_float)a[i + 9] * (fast_float)b[i + 9]; accumulator += (fast_float)a[i + 10] * (fast_float)b[i + 10]; accumulator += (fast_float)a[i + 11] * (fast_float)b[i + 11]; accumulator += (fast_float)a[i + 12] * (fast_float)b[i + 12]; accumulator += (fast_float)a[i + 13] * (fast_float)b[i + 13]; accumulator += (fast_float)a[i + 14] * (fast_float)b[i + 14]; accumulator += (fast_float)a[i + 15] * (fast_float)b[i + 15]; } return accumulator; } volatile float dummy; float buf1[1024]; float buf2[1024]; int main() { int i; int tmp; __asm__ volatile( "fmrx %[tmp], fpscr\n" "orr %[tmp], %[tmp], #(1 << 24)\n" /* flush-to-zero */ "orr %[tmp], %[tmp], #(1 << 25)\n" /* default NaN */ "bic %[tmp], %[tmp], #((1 << 15) | (1 << 12) | (1 << 11) | (1 << 10) | (1 << 9) | (1 << 8))\n" /* clear exception bits */ "fmxr fpscr, %[tmp]\n" : [tmp] "=r" (tmp) ); for (i = 0; i < 1024; i++) { buf1[i] = buf2[i] = i % 16; } for (i = 0; i < 100000; i++) { dummy = f(buf1, buf2); } printf("%f\n", (double)dummy); return 0; }
_______________________________________________ maemo-developers mailing list maemo-developers@maemo.org https://lists.maemo.org/mailman/listinfo/maemo-developers