On 6/19/06, Richard Guenther <[EMAIL PROTECTED]> wrote:
Using -mfpmath=sse -msse2 is a workaround if you have a processor that supports SSE2 instructions. As opposed to -ffloat-store, it works reliably and with no performance impact.
Such slab test can be turned into a branchless sequence of SSE min/max, even for filtering infinities around dir ~= 0; it's much simpler and efficient to intersect 4 rays against one box at once though. Without intrinsics a NaN oblivious version would be like:
static float minf(const float a, const float b) { return (a < b) ? a : b; } static float maxf(const float a, const float b) { return (a > b) ? a : b; } bool_t intersect_ray_box(const aabb_t &box, const rt::mono::ray_t &ray, float &lmin, float &lmax) { float l1 = (box.min.x - ray.pos.x) * ray.inv_dir.x, l2 = (box.max.x - ray.pos.x) * ray.inv_dir.x; lmin = minf(l1,l2); lmax = maxf(l1,l2); l1 = (box.min.y - ray.pos.y) * ray.inv_dir.y; l2 = (box.max.y - ray.pos.y) * ray.inv_dir.y; lmin = maxf(minf(l1,l2), lmin); lmax = minf(maxf(l1,l2), lmax); l1 = (box.min.z - ray.pos.z) * ray.inv_dir.z; l2 = (box.max.z - ray.pos.z) * ray.inv_dir.z; lmin = maxf(minf(l1,l2), lmin); lmax = minf(maxf(l1,l2), lmax); return (lmax >= lmin) & (lmax >= 0.f); }