Re: [brlcad-devel] A more efficient algorithm for implementing bn_mat_identity and bn_mat_inverse

Check Nyah Thu, 25 Jul 2013 12:59:27 -0700

Well Sean,
here is the code I used to test the both the new and old algorithms.
Actually I did not write all of it from scratch. But adapted the code to
suit the test i needed to perform. Added a make file to enable compiler
code easily.


Cheers!
Nyah





On Thu, Jul 25, 2013 at 7:50 PM, Christopher Sean Morrison
<[email protected]>wrote:

>
>
> On Jul 25, 2013, at 01:25 PM, Check Nyah <[email protected]> wrote:
>
> Hey Sean,
> Just ran the tests comparing the old and new routines for computing the
> determinant and which gave me the following results:
>
>
> Excellent Nyah.  That definitely looks like a nice improvement.  Can you
> attach the test you used?
>
> Also, were the numbers stable?  That is, your two results are
> approximately 65s vs 55s which is about 15% reduction in time.  If you ran
> the comparison a couple more times with different matrices, is always about
> 15%?
>
> Cheers!
> Sean
>
> p.s. Please remember about bottom-posting and deleting irrelevant portions.
>
>
> ------------------------------------------------------------------------------
> See everything from the browser to the database with AppDynamics
> Get end-to-end visibility with application monitoring from AppDynamics
> Isolate bottlenecks and diagnose root cause in seconds.
> Start your free trial of AppDynamics Pro today!
> http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk
> _______________________________________________
> BRL-CAD Developer mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/brlcad-devel
>
>

/**
 *Determinant.c
 *This is the benchmark test for the determinant
 */
 
 
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include "functions.h"

#define   COUNT 1000000

/* CPU cycle counter -- available on x86 and x86-64 architectures.
 */
static inline uint64_t cpu_cycles(void)
{
   register unsigned int hi, lo;
   asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
    return ((uint64_t)lo) | ((uint64_t)hi << 32U);
}

/* Xorshift pseudorandom number generator.
 */
 static uint32_t xorshift_state[4] = { 123456789U, 362436069U, 521288629U, 88675123U };

static inline uint32_t xorshift(void){
    uint32_t  temp;
    temp = xorshift_state[0] ^ (xorshift_state[0] << 11U);
    xorshift_state[0] = xorshift_state[1];
    xorshift_state[1] = xorshift_state[2];
    xorshift_state[2] = xorshift_state[3];
    return xorshift_state[3] ^= (temp >> 8U) ^ temp ^ (xorshift_state[3] >> 19U);
}

static inline double drand(void)
{
  return ((double)xorshift() / 18446744073709551616.0)
          + ((double)xorshift() / 4294967296.0);
}

 static int compare_ints(const void *a, const void *b)
{
     return *(const int *)a - *(const int *)b;
}

 static int compare_fastf(const void *a, const void *b)
{
   const fastf_t d = *(const fastf_t *)a - *(const fastf_t *)b;
   if (d < 0.0)
       return -1;
   else if (d > 0.0)
       return +1;
   else
      return  0;
}

void sort_ints(int *const array, const size_t count)
{
   qsort(array, count, sizeof (int), compare_ints);
}

void sort_fastf(fastf_t *const array, const size_t count)
{
     qsort(array, count, sizeof (fastf_t), compare_fastf);
}

int main(void)
{
    uint64_t cpu_started, cpu_stopped;
    int *my_cycles, *bn_cycles;
    fastf_t *rel_error;
    mat_t my, bn;
    fastf_t my_det, bn_det;
    size_t i, k;

   /* Allocate arrays to hold the cycle counts. */
    my_cycles = malloc(COUNT * sizeof *my_cycles);
    bn_cycles = malloc(COUNT * sizeof *bn_cycles);
    rel_error = malloc(COUNT * sizeof *rel_error);
    if (!my_cycles || !bn_cycles || !rel_error) {
        fprintf(stderr, "%s.\n", strerror(ENOMEM));
          return 1;
    }

    for (i = 0; i < COUNT; i++) {

    /* Fill the matrix with random values. */
    for (k = 0; k < 16; k++)
    my[k] = bn[k] = drand() * 65536.0;

    /* Measure bn result */
    cpu_started = cpu_cycles();
    bn_det = bn_mat_determinant(my);
    cpu_stopped = cpu_cycles();
    bn_cycles[i] = cpu_stopped - cpu_started;

    /* Measure my result. */
    cpu_started = cpu_cycles();
    my_det = my_mat_determinant(my);
    cpu_stopped = cpu_cycles();
    my_cycles[i] = cpu_stopped - cpu_started;

    /* Store the relative error in the results. */
     if (bn_det != 0.0)
         rel_error[i] = (my_det - bn_det) / bn_det;
      else
         rel_error[i] = my_det;
    }

    sort_ints(my_cycles, COUNT);
    sort_ints(bn_cycles, COUNT);
    sort_fastf(rel_error, COUNT);

    printf("My implementation:\n");
    printf("\t%9d cycles minimum\n", my_cycles[0]);
    printf("\t%9d cycles median\n", my_cycles[COUNT/2]);

    printf("libbn implementation:\n");
    printf("\t%9d cycles minimum\n", bn_cycles[0]);
    printf("\t%9d cycles median\n", bn_cycles[COUNT/2]);

    printf("Results (relative differences of calculated determinants):\n");
    printf("\t%.16g minimum\n", rel_error[0]);
    printf("\t%.16g median\n", rel_error[COUNT/2]);
    printf("\t%.16g maximum\n", rel_error[COUNT-1]);

    return 0;
}

/**
 *Functions.c
 *Contains the function definitions used in test.
 */
 
 
#include "functions.h"



#if defined(vax)
/* DEC VAX "D" format, the most restrictive */
#  define MAX_FASTF		1.0e37	/* Very close to the largest number */
#  define SQRT_MAX_FASTF	1.0e18	/* This squared just avoids overflow */
#  define SMALL_FASTF		1.0e-37	/* Anything smaller is zero */
#  define SQRT_SMALL_FASTF	1.0e-18	/* This squared gives zero */
#else
/* IBM format, being the next most restrictive format */
#  define MAX_FASTF		1.0e73	/* Very close to the largest number */
#  define SQRT_MAX_FASTF	1.0e36	/* This squared just avoids overflow */
#  define SMALL_FASTF		1.0e-77	/* Anything smaller is zero */
#  if defined(aux)
#    define SQRT_SMALL_FASTF	1.0e-40 /* _doprnt error in libc */
#  else
#    define SQRT_SMALL_FASTF	1.0e-39	/* This squared gives zero */
#  endif
#endif

/**
 * Return truthfully whether a value is within a specified epsilon
 * distance from zero.
 */
#define NEAR_ZERO(val, epsilon)	(((val) > -epsilon) && ((val) < epsilon))

/**
 * Return truthfully whether a value is within a minimum
 * representation tolerance from zero.
 *
 * Use not recommended due to compilation-variant tolerance.
 */
#define ZERO(_a) NEAR_ZERO((_a), SMALL_FASTF)


/** @brief Copy a matrix. */
#define MAT_COPY(d, s) do { \
	(d)[0] = (s)[0]; \
	(d)[1] = (s)[1]; \
	(d)[2] = (s)[2]; \
	(d)[3] = (s)[3]; \
	(d)[4] = (s)[4]; \
	(d)[5] = (s)[5]; \
	(d)[6] = (s)[6]; \
	(d)[7] = (s)[7]; \
	(d)[8] = (s)[8]; \
	(d)[9] = (s)[9]; \
	(d)[10] = (s)[10]; \
	(d)[11] = (s)[11]; \
	(d)[12] = (s)[12]; \
	(d)[13] = (s)[13]; \
	(d)[14] = (s)[14]; \
	(d)[15] = (s)[15]; \
    } while (0)


/**
 * MY _ M A T _ D E T E R M I N A N T
 *
 * Calculates the determinant of the 4X4 matrix
 * (This implementation requires only 34 multiplications
 *  and 20 additions or substractions, saving 6 multiplications
 *  and 3 additions or substractions compared to the previous one.)
 */
fastf_t
my_mat_determinant(const mat_t input)
{
    const fastf_t  tmp1 = input[12] * input[ 9] - input[13] * input[ 8];
    const fastf_t  tmp2 = input[14] * input[ 8] - input[10] * input[12];
    const fastf_t  tmp3 = input[14] * input[ 9] - input[10] * input[13];

    return ( tmp3 * input[ 0] - tmp2 * input[ 1] - tmp1 * input[ 2] ) * input[ 7]
         - ( tmp3 * input[ 4] - tmp2 * input[ 5] - tmp1 * input[ 6] ) * input[ 3]
         + ( ( input[14] * input[ 1] - input[13] * input[ 2] ) * input[ 4]
           + ( input[12] * input[ 2] - input[ 0] * input[14] ) * input[ 5]
           + ( input[ 0] * input[13] - input[12] * input[ 1] ) * input[ 6] ) * input[11]
         + ( ( input[ 2] * input[ 9] - input[ 1] * input[10] ) * input[ 4]
           + ( input[ 0] * input[10] - input[ 8] * input[ 2] ) * input[ 5]
           + ( input[ 8] * input[ 1] - input[ 0] * input[ 9] ) * input[ 6] ) * input[15];
}


fastf_t
bn_mat_determinant(const mat_t m)
{
    fastf_t det[4];
    fastf_t sum;

    det[0] = m[5] * (m[10]*m[15] - m[11]*m[14])
	-m[6] * (m[ 9]*m[15] - m[11]*m[13])
	+m[7] * (m[ 9]*m[14] - m[10]*m[13]);

    det[1] = m[4] * (m[10]*m[15] - m[11]*m[14])
	-m[6] * (m[ 8]*m[15] - m[11]*m[12])
	+m[7] * (m[ 8]*m[14] - m[10]*m[12]);

    det[2] = m[4] * (m[ 9]*m[15] - m[11]*m[13])
	-m[5] * (m[ 8]*m[15] - m[11]*m[12])
	+m[7] * (m[ 8]*m[13] - m[ 9]*m[12]);

    det[3] = m[4] * (m[ 9]*m[14] - m[10]*m[13])
	-m[5] * (m[ 8]*m[14] - m[10]*m[12])
	+m[6] * (m[ 8]*m[13] - m[ 9]*m[12]);

    sum = m[0]*det[0] - m[1]*det[1] + m[2]*det[2] - m[3]*det[3];

    return sum;

}




int
bn_mat_inverse(register mat_t output, const mat_t input)
{
    register int i, j;	/* Indices */
    int k;		/* Indices */
    int z[4];		/* Temporary */
    fastf_t b[4];	/* Temporary */
    fastf_t c[4];	/* Temporary */

    MAT_COPY(output, input);	/* Duplicate */

    /* Initialization */
    for (j = 0; j < 4; j++)
	z[j] = j;

    /* Main Loop */
    for (i = 0; i < 4; i++) {
	register fastf_t y;		/* local temporary */

	k = i;
	y = output[i*4+i];
	for (j = i+1; j < 4; j++) {
	    register fastf_t w;		/* local temporary */

	    w = output[i*4+j];
	    if (fabs(w) > fabs(y)) {
		k = j;
		y = w;
	    }
	}

	if (ZERO(y)) {
	    /* SINGULAR */
	    return 0;
	}
	y = 1.0 / y;

	for (j = 0; j < 4; j++) {
	    register fastf_t temp;	/* Local */

	    c[j] = output[j*4+k];
	    output[j*4+k] = output[j*4+i];
	    output[j*4+i] = - c[j] * y;
	    temp = output[i*4+j] * y;
	    b[j] = temp;
	    output[i*4+j] = temp;
	}

	output[i*4+i] = y;
	j = z[i];
	z[i] = z[k];
	z[k] = j;
	for (k = 0; k < 4; k++) {
	    if (k == i)
		continue;
	    for (j = 0; j < 4; j++) {
		if (j == i)
		    continue;
		output[k*4+j] = output[k*4+j] - b[j] * c[k];
	    }
	}
    }

    /* Second Loop */
    for (i = 0; i < 4; i++) {
	while ((k = z[i]) != i) {
	    int p;			/* Local temp */

	    for (j = 0; j < 4; j++) {
		register fastf_t w;	/* Local temp */

		w = output[i*4+j];
		output[i*4+j] = output[k*4+j];
		output[k*4+j] = w;
	    }
	    p = z[i];
	    z[i] = z[k];
	    z[k] = p;
	}
    }

    return 1;
}


/**
 * MY _ M A T _ I N V E R S E
 *
 * The matrix pointed at by "input" is inverted and stored in the area
 * pointed at by "output".
 *
 * Invert a 4-by-4 matrix using direct computation.
 * Uses 100 multiplications and 61 additions or substractions, total.
 *
 * @return 1 if OK.
 * @return 0 if matrix is singular.
 */
int
my_mat_inverse(register mat_t output, const mat_t input)
{
    const fastf_t tmp01 = input[ 8] * input[ 2] - input[ 0] * input[10],
                  tmp02 = input[12] * input[ 1] - input[ 0] * input[13],
                  tmp03 = input[ 0] * input[15] - input[12] * input[ 3],
                  tmp04 = input[14] * input[ 1] - input[13] * input[ 2],
                  tmp05 = input[15] * input[ 1] - input[13] * input[ 3],
                  tmp06 = input[14] * input[ 3] - input[15] * input[ 2],
                  tmp07 = input[ 8] * input[ 1] - input[ 0] * input[ 9],
                  tmp08 = input[ 8] * input[ 3] - input[ 0] * input[11],
                  tmp09 = input[ 0] * input[14] - input[12] * input[ 2],
                  tmp10 = input[ 2] * input[ 9] - input[ 1] * input[10],
                  tmp11 = input[ 3] * input[ 9] - input[ 1] * input[11],
                  tmp12 = input[ 3] * input[10] - input[ 2] * input[11],
                  tmp13 = input[12] * input[ 9] - input[13] * input[ 8],
                  tmp14 = input[12] * input[10] - input[14] * input[ 8],
                  tmp15 = input[12] * input[11] - input[15] * input[ 8],
                  tmp16 = input[13] * input[10] - input[14] * input[ 9],
                  tmp17 = input[13] * input[11] - input[15] * input[ 9],
                  tmp18 = input[14] * input[11] - input[15] * input[10];
    const fastf_t det   = ( input[4] * ( tmp18 * input[1]
                                       - tmp17 * input[2]
                                       + tmp16 * input[3] )
                          + input[5] * ( tmp15 * input[2]
                                       - tmp18 * input[0]
                                       - tmp14 * input[3] )
                          + input[6] * ( tmp17 * input[0]
                                       - tmp15 * input[1]
                                       + tmp13 * input[3] )
                          + input[7] * ( tmp14 * input[1]
                                       - tmp16 * input[0]
                                       - tmp13 * input[2] ) );
    if (ZERO(det))
        return 0;
    
    output[ 0] = (tmp17 * input[6] - tmp18 * input[5] - tmp16 * input[7]) / det;
    output[ 1] = (tmp18 * input[1] - tmp17 * input[2] + tmp16 * input[3]) / det;
    output[ 2] = (tmp06 * input[5] - tmp04 * input[7] + tmp05 * input[6]) / det;
    output[ 3] = (tmp11 * input[6] - tmp12 * input[5] - tmp10 * input[7]) / det;
    output[ 4] = (tmp18 * input[4] - tmp15 * input[6] + tmp14 * input[7]) / det;
    output[ 5] = (tmp15 * input[2] - tmp18 * input[0] - tmp14 * input[3]) / det;
    output[ 6] = (tmp09 * input[7] - tmp06 * input[4] - tmp03 * input[6]) / det;
    output[ 7] = (tmp12 * input[4] - tmp08 * input[6] + tmp01 * input[7]) / det;
    output[ 8] = (tmp15 * input[5] - tmp17 * input[4] - tmp13 * input[7]) / det;
    output[ 9] = (tmp17 * input[0] - tmp15 * input[1] + tmp13 * input[3]) / det;
    output[10] = (tmp03 * input[5] - tmp05 * input[4] + tmp02 * input[7]) / det;
    output[11] = (tmp08 * input[5] - tmp11 * input[4] - tmp07 * input[7]) / det;
    output[12] = (tmp16 * input[4] - tmp14 * input[5] + tmp13 * input[6]) / det;
    output[13] = (tmp14 * input[1] - tmp16 * input[0] - tmp13 * input[2]) / det;
    output[14] = (tmp04 * input[4] - tmp09 * input[5] - tmp02 * input[6]) / det;
    output[15] = (tmp10 * input[4] - tmp01 * input[5] + tmp07 * input[6]) / det;

    return 1;
}

/**
 *Functions.h
 *The prototypes for the functions to be used in the test. Actual functions in functions.c
 */


#ifndef   FUNCTIONS_H
#define   FUNCTIONS_H

typedef double  fastf_t;
typedef fastf_t mat_t[16];

fastf_t my_mat_determinant(const mat_t m);
fastf_t bn_mat_determinant(const mat_t m);

int my_mat_inverse(mat_t output, const mat_t input);
int bn_mat_inverse(register mat_t output, const mat_t input);
#endif /* FUNCTIONS_H */

/**
 *inverse.c
 *The benchmark code for the matrix inversion is in inverse.c:
 */
 
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <math.h>
#include "functions.h"

#define   COUNT 100000

typedef struct {
     fastf_t x;
     fastf_t y;
     fastf_t z;
} vec_t;

/* CPU cycle counter -- available on x86 and x86-64 architectures.
 */
static inline uint64_t cpu_cycles(void)
{
    register unsigned int hi, lo;
    asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
    return ((uint64_t)lo) | ((uint64_t)hi << 32U);
}


/* Xorshift pseudorandom number generator.
*/
 static uint32_t xorshift_state[4] = { 123456789U, 362436069U, 521288629U, 88675123U };

static inline uint32_t xorshift(void)
{
    uint32_t  temp;
    temp = xorshift_state[0] ^ (xorshift_state[0] << 11U);
    xorshift_state[0] = xorshift_state[1];
    xorshift_state[1] = xorshift_state[2];
    xorshift_state[2] = xorshift_state[3];
    return xorshift_state[3] ^= (temp >> 8U) ^ temp ^ (xorshift_state[3] >> 19U);
}

static inline double drand(void)
{
    return ((double)xorshift() / 18446744073709551616.0)
          + ((double)xorshift() / 4294967296.0);
}

static inline vec_t vrand(void)
{
      return (vec_t){ drand(), drand(), drand() };
}

static inline vec_t vsub(const vec_t a, const vec_t b)
{
      return (vec_t){ a.x - b.x, a.y - b.y, a.z - b.z };
}

static inline vec_t vnormalize(const vec_t v)
{
     const fastf_t s = sqrt(v.x*v.x + v.y*v.y + v.z*v.z);
     if (s > 0.0)
        return (vec_t){ v.x / s, v.y / s, v.z / s };
     else
        return (vec_t){ 0.0, 0.0, 0.0 };
}

        static inline fastf_t vdot(const vec_t a, const vec_t b)
        {
            return a.x * b.x + a.y * b.y + a.z * b.z;
        }

        static inline vec_t vscale(const vec_t a, const fastf_t s)
        {
            return (vec_t){ a.x * s, a.y * s, a.z * s };
        }

        static inline vec_t vcross(const vec_t a, const vec_t b)
        {
            return (vec_t){ a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x };
        }

        static int compare_ints(const void *a, const void *b)
        {
            return *(const int *)a - *(const int *)b;
        }

        void sort_ints(int *const array, const size_t count)
        {
            qsort(array, count, sizeof (int), compare_ints);
        }

int main(void)
{
            uint64_t cpu_started, cpu_stopped;
            int *my_cycles, *bn_cycles;
            mat_t my, bn, my_out, bn_out;
            vec_t x, y, z, t;
            size_t i = 0;

            /* Allocate arrays to hold the cycle counts. */
            my_cycles = malloc(COUNT * sizeof *my_cycles);
            bn_cycles = malloc(COUNT * sizeof *bn_cycles);
            if (!my_cycles || !bn_cycles) {
                fprintf(stderr, "%s.\n", strerror(ENOMEM));
                return 1;
            }

            while (i < COUNT) {

                /* Generate a random transformation matrix. */
                x = vnormalize(vrand());
                y = vrand();
                y = vnormalize(vsub(y, vscale(y, vdot(x, y))));
                z = vcross(x, y);
                t = vrand();
                my[ 0] = bn[ 0] = x.x;
                my[ 1] = bn[ 1] = y.x;
                my[ 2] = bn[ 2] = z.x;
                my[ 3] = bn[ 3] = t.x;

                my[ 4] = bn[ 4] = x.y;
                my[ 5] = bn[ 5] = y.y;
                my[ 6] = bn[ 6] = z.y;
                my[ 7] = bn[ 7] = t.y;

                my[ 8] = bn[ 8] = x.z;
                my[ 9] = bn[ 9] = y.z;
                my[10] = bn[10] = z.z;
                my[11] = bn[11] = t.z;

                my[12] = bn[12] = 0.0;
                my[13] = bn[13] = 0.0;
                my[14] = bn[14] = 0.0;
                my[15] = bn[15] = 1.0;

                /* Ignore matrices that are singular. */
                
                /* Measure bn result */
                cpu_started = cpu_cycles();
                if (!bn_mat_inverse(bn_out, bn)) continue;
                cpu_stopped = cpu_cycles();
                bn_cycles[i] = cpu_stopped - cpu_started;

                /* Measure my result. */
                cpu_started = cpu_cycles();
                if (!my_mat_inverse(my_out, my)) continue;
                cpu_stopped = cpu_cycles();
                my_cycles[i] = cpu_stopped - cpu_started;

                /* Next. */
                i++;
            }

            /* Print the last matrix. */
            printf("Example:\n");
            printf("  My implementation:\n");
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]-1   [ %9.6f %9.6f %9.6g %9.6f ]\n", my[ 0], my[ 1], my[ 2], my[ 3], my_out[ 0], my_out[ 1], my_out[ 2], my_out[ 3]);
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]     [ %9.6f %9.6f %9.6g %9.6f ]\n", my[ 4], my[ 5], my[ 6], my[ 7], my_out[ 4], my_out[ 5], my_out[ 6], my_out[ 7]);
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]   = [ %9.6f %9.6f %9.6g %9.6f ]\n", my[ 8], my[ 9], my[10], my[11], my_out[ 8], my_out[ 9], my_out[10], my_out[11]);
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]     [ %9.6f %9.6f %9.6g %9.6f ]\n", my[12], my[13], my[14], my[15], my_out[12], my_out[13], my_out[14], my_out[15]);
            printf("  libbn implementation:\n");
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]-1   [ %9.6f %9.6f %9.6g %9.6f ]\n", bn[ 0], bn[ 1], bn[ 2], bn[ 3], bn_out[ 0], bn_out[ 1], bn_out[ 2], bn_out[ 3]);
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]     [ %9.6f %9.6f %9.6g %9.6f ]\n", bn[ 4], bn[ 5], bn[ 6], bn[ 7], bn_out[ 4], bn_out[ 5], bn_out[ 6], bn_out[ 7]);
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]   = [ %9.6f %9.6f %9.6g %9.6f ]\n", bn[ 8], bn[ 9], bn[10], bn[11], bn_out[ 8], bn_out[ 9], bn_out[10], bn_out[11]);
            printf("\t[ %9.6f %9.6f %9.6g %9.6f ]     [ %9.6f %9.6f %9.6g %9.6f ]\n", bn[12], bn[13], bn[14], bn[15], bn_out[12], bn_out[13], bn_out[14], bn_out[15]);

            sort_ints(my_cycles, COUNT);
            sort_ints(bn_cycles, COUNT);

            printf("My implementation:\n");
            printf("\t%9d cycles minimum\n", my_cycles[0]);
            printf("\t%9d cycles median\n", my_cycles[COUNT/2]);

            printf("libbn implementation:\n");
            printf("\t%9d cycles minimum\n", bn_cycles[0]);
            printf("\t%9d cycles median\n", bn_cycles[COUNT/2]);

            return 0;
}

Makefile
Description: Binary data

------------------------------------------------------------------------------
See everything from the browser to the database with AppDynamics
Get end-to-end visibility with application monitoring from AppDynamics
Isolate bottlenecks and diagnose root cause in seconds.
Start your free trial of AppDynamics Pro today!
http://pubads.g.doubleclick.net/gampad/clk?id=48808831&iu=/4140/ostg.clktrk

_______________________________________________
BRL-CAD Developer mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/brlcad-devel

Re: [brlcad-devel] A more efficient algorithm for implementing bn_mat_identity and bn_mat_inverse

Reply via email to