Hi All,I have made a port of gears that uses the OpenGL ES 1.1 Common API subset (using glDrawElements instead of display lists and optionally using vertex buffer objects). It should be visually equivalent to gears.c, although the flat faces are rendered with GL_SMOOTH to avoid additional draw calls.
While testing it, I noticed a severe screen corruption issue when using vertex buffer objects with the intel driver in ubuntu (both ubuntu 9.04 driver and ubuntu xorg-edgers 2.6.99.1+git20090416). I have verified the code is working with the nvidia-180 proprietary driver on another system so I believe it is an intel driver issue. I am wondering if the same issue exists upstream...
The attached gearsvbo.c source code can be compiled with: gcc gearsvbo.c -o gearsvbo -lGL -lglutIt can be run with or without vertex buffer objects (add -novbo command line option to disable).
With the Intel driver I see a 40% speedup with glDrawElements alone. With vertex buffer objects enabled I see almost 200% speedup although the front-buffer doesn't appear to being cleared or swapped correctly (see attached png). With the nvidia-180 driver I see no speed up (although it worked correctly with and without vbos).
An interesting discovery was made when changing the code to do a mix of vbo and non-vbo rendering (for example if I change one of the gears to be drawn without using its vbo), then the screen corruption issue goes away. It seems that only when all rendering is vbo based, then the issue arises.
./gears 3779 frames in 5.000 seconds = 755.800 FPS 3901 frames in 5.000 seconds = 780.200 FPS 3926 frames in 5.000 seconds = 785.200 FPS ./gearsvbo -novbo vertex buffer objects disabled 5246 frames in 5.000 seconds = 1049.200 FPS 5533 frames in 5.000 seconds = 1106.600 FPS 5546 frames in 5.000 seconds = 1109.200 FPS ./gearsvbo vertex buffer objects enabled 11399 frames in 5.000 seconds = 2279.800 FPS 11758 frames in 5.000 seconds = 2351.600 FPS 12007 frames in 5.000 seconds = 2401.400 FPSThese are the xorg, drm and intel driver packages I have installed while doing the tests (intel driver is from xorg-edgers ppa):
xserver-xorg-video-intel 2:2.6.99.1+git20090416.b9716b83-0ubuntu0tormod xserver-xorg-core 2:1.6.0-0ubuntu14 libdrm-intel1 2.4.9~git20090416.07646002-0ubuntu0tormod libdrm2 2.4.9~git20090416.07646002-0ubuntu0tormod libgl1-mesa-dev 7.4-0ubuntu3 libgl1-mesa-dri 7.4-0ubuntu3 libgl1-mesa-glx 7.4-0ubuntu3 Thanks, Michael.
/* * 3-D gear wheels. This program is in the public domain. * * Command line options: * -info print GL implementation information * -exit automatically exit after 30 seconds * * * Brian Paul */ /* Conversion to GLUT by Mark J. Kilgard */ /* Conversion to use vertex buffer objects by Michael J. Clark */ #include <math.h> #include <stdlib.h> #include <stdio.h> #include <string.h> #include <GL/glut.h> #ifndef M_PI #define M_PI 3.14159265 #endif static GLint T0 = 0; static GLint Frames = 0; static GLint autoexit = 0; static GLint win = 0; static GLboolean Visible = GL_TRUE; static GLboolean Animate = GL_TRUE; static GLfloat viewDist = 40.0; static GLboolean UseVBO = GL_FALSE; typedef struct { GLfloat pos[3]; GLfloat norm[3]; } vertex_t; typedef struct { vertex_t *vertices; GLshort *indices; GLfloat color[4]; int nvertices, nindices; GLuint vbo, ibo; } gear_t; static gear_t *red_gear; static gear_t *green_gear; static gear_t *blue_gear; /** Draw a gear wheel. You'll probably want to call this function when building a display list since we do a lot of trig here. Input: inner_radius - radius of hole at center outer_radius - radius at center of teeth width - width of gear teeth - number of teeth tooth_depth - depth of tooth **/ static gear_t* gear(GLfloat inner_radius, GLfloat outer_radius, GLfloat width, GLint teeth, GLfloat tooth_depth, GLfloat color[]) { GLint i, j; GLfloat r0, r1, r2; GLfloat ta, da; GLfloat u1, v1, u2, v2, len; GLfloat cos_ta, cos_ta_1da, cos_ta_2da, cos_ta_3da, cos_ta_4da; GLfloat sin_ta, sin_ta_1da, sin_ta_2da, sin_ta_3da, sin_ta_4da; GLshort ix0, ix1, ix2, ix3, ix4, ix5; vertex_t *vt, *nm; GLshort *ix; gear_t *gear = calloc(1, sizeof(gear_t)); gear->nvertices = teeth * 40; gear->nindices = teeth * 66 * 3; gear->vertices = calloc(gear->nvertices, sizeof(vertex_t)); gear->indices = calloc(gear->nindices, sizeof(GLshort)); memcpy(&gear->color[0], &color[0], sizeof(GLfloat) * 4); r0 = inner_radius; r1 = outer_radius - tooth_depth / 2.0; r2 = outer_radius + tooth_depth / 2.0; da = 2.0 * M_PI / teeth / 4.0; vt = gear->vertices; nm = gear->vertices; ix = gear->indices; #define VERTEX(x,y,z) ((vt->pos[0] = x),(vt->pos[1] = y),(vt->pos[2] = z), \ (vt++ - gear->vertices)) #define NORMAL(x,y,z) ((nm->norm[0] = x),(nm->norm[1] = y),(nm->norm[2] = z), \ (nm++ - gear->vertices)) #define INDEX(a,b,c) ((*ix++ = a),(*ix++ = b),(*ix++ = c)) for (i = 0; i < teeth; i++) { ta = i * 2.0 * M_PI / teeth; cos_ta = cos(ta); cos_ta_1da = cos(ta + da); cos_ta_2da = cos(ta + 2 * da); cos_ta_3da = cos(ta + 3 * da); cos_ta_4da = cos(ta + 4 * da); sin_ta = sin(ta); sin_ta_1da = sin(ta + da); sin_ta_2da = sin(ta + 2 * da); sin_ta_3da = sin(ta + 3 * da); sin_ta_4da = sin(ta + 4 * da); u1 = r2 * cos_ta_1da - r1 * cos_ta; v1 = r2 * sin_ta_1da - r1 * sin_ta; len = sqrt(u1 * u1 + v1 * v1); u1 /= len; v1 /= len; u2 = r1 * cos_ta_3da - r2 * cos_ta_2da; v2 = r1 * sin_ta_3da - r2 * sin_ta_2da; /* front face */ ix0 = VERTEX(r0 * cos_ta, r0 * sin_ta, width * 0.5); ix1 = VERTEX(r1 * cos_ta, r1 * sin_ta, width * 0.5); ix2 = VERTEX(r0 * cos_ta, r0 * sin_ta, width * 0.5); ix3 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, width * 0.5); ix4 = VERTEX(r0 * cos_ta_4da, r0 * sin_ta_4da, width * 0.5); ix5 = VERTEX(r1 * cos_ta_4da, r1 * sin_ta_4da, width * 0.5); for (j = 0; j < 6; j++) { NORMAL(0.0, 0.0, 1.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); INDEX(ix2, ix3, ix4); INDEX(ix3, ix5, ix4); /* front sides of teeth */ ix0 = VERTEX(r1 * cos_ta, r1 * sin_ta, width * 0.5); ix1 = VERTEX(r2 * cos_ta_1da, r2 * sin_ta_1da, width * 0.5); ix2 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, width * 0.5); ix3 = VERTEX(r2 * cos_ta_2da, r2 * sin_ta_2da, width * 0.5); for (j = 0; j < 4; j++) { NORMAL(0.0, 0.0, 1.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); /* back face */ ix0 = VERTEX(r1 * cos_ta, r1 * sin_ta, -width * 0.5); ix1 = VERTEX(r0 * cos_ta, r0 * sin_ta, -width * 0.5); ix2 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, -width * 0.5); ix3 = VERTEX(r0 * cos_ta, r0 * sin_ta, -width * 0.5); ix4 = VERTEX(r1 * cos_ta_4da, r1 * sin_ta_4da, -width * 0.5); ix5 = VERTEX(r0 * cos_ta_4da, r0 * sin_ta_4da, -width * 0.5); for (j = 0; j < 6; j++) { NORMAL(0.0, 0.0, -1.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); INDEX(ix2, ix3, ix4); INDEX(ix3, ix5, ix4); /* back sides of teeth */ ix0 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, -width * 0.5); ix1 = VERTEX(r2 * cos_ta_2da, r2 * sin_ta_2da, -width * 0.5); ix2 = VERTEX(r1 * cos_ta, r1 * sin_ta, -width * 0.5); ix3 = VERTEX(r2 * cos_ta_1da, r2 * sin_ta_1da, -width * 0.5); for (j = 0; j < 4; j++) { NORMAL(0.0, 0.0, -1.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); /* draw outward faces of teeth */ ix0 = VERTEX(r1 * cos_ta, r1 * sin_ta, width * 0.5); ix1 = VERTEX(r1 * cos_ta, r1 * sin_ta, -width * 0.5); ix2 = VERTEX(r2 * cos_ta_1da, r2 * sin_ta_1da, width * 0.5); ix3 = VERTEX(r2 * cos_ta_1da, r2 * sin_ta_1da, -width * 0.5); for (j = 0; j < 4; j++) { NORMAL(v1, -u1, 0.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); ix0 = VERTEX(r2 * cos_ta_1da, r2 * sin_ta_1da, width * 0.5); ix1 = VERTEX(r2 * cos_ta_1da, r2 * sin_ta_1da, -width * 0.5); ix2 = VERTEX(r2 * cos_ta_2da, r2 * sin_ta_2da, width * 0.5); ix3 = VERTEX(r2 * cos_ta_2da, r2 * sin_ta_2da, -width * 0.5); for (j = 0; j < 4; j++) { NORMAL(cos_ta, sin_ta, 0.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); ix0 = VERTEX(r2 * cos_ta_2da, r2 * sin_ta_2da, width * 0.5); ix1 = VERTEX(r2 * cos_ta_2da, r2 * sin_ta_2da, -width * 0.5); ix2 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, width * 0.5); ix3 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, -width * 0.5); for (j = 0; j < 4; j++) { NORMAL(v2, -u2, 0.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); ix0 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, width * 0.5); ix1 = VERTEX(r1 * cos_ta_3da, r1 * sin_ta_3da, -width * 0.5); ix2 = VERTEX(r1 * cos_ta_4da, r1 * sin_ta_4da, width * 0.5); ix3 = VERTEX(r1 * cos_ta_4da, r1 * sin_ta_4da, -width * 0.5); for (j = 0; j < 4; j++) { NORMAL(cos_ta, sin_ta, 0.0); } INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); /* draw inside radius cylinder */ ix0 = VERTEX(r0 * cos_ta, r0 * sin_ta, -width * 0.5); ix1 = VERTEX(r0 * cos_ta, r0 * sin_ta, width * 0.5); ix2 = VERTEX(r0 * cos_ta_4da, r0 * sin_ta_4da, -width * 0.5); ix3 = VERTEX(r0 * cos_ta_4da, r0 * sin_ta_4da, width * 0.5); NORMAL(-cos_ta, -sin_ta, 0.0); NORMAL(-cos_ta, -sin_ta, 0.0); NORMAL(-cos_ta_4da, -sin_ta_4da, 0.0); NORMAL(-cos_ta_4da, -sin_ta_4da, 0.0); INDEX(ix0, ix1, ix2); INDEX(ix1, ix3, ix2); } if (UseVBO) { glGenBuffers(1, &gear->vbo); glBindBuffer(GL_ARRAY_BUFFER, gear->vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(vertex_t) * gear->nvertices, &gear->vertices[0].pos[0], GL_STATIC_DRAW); glGenBuffers(1, &gear->ibo); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gear->ibo); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLshort) * gear->nindices, &gear->indices[0], GL_STATIC_DRAW); } return gear; } void draw_gear(gear_t* gear) { glMaterialfv(GL_FRONT_AND_BACK, GL_AMBIENT_AND_DIFFUSE, gear->color); if (UseVBO) { glBindBuffer(GL_ARRAY_BUFFER, gear->vbo); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gear->ibo); glVertexPointer(3, GL_FLOAT, sizeof(vertex_t), (char*)0); glNormalPointer(GL_FLOAT, sizeof(vertex_t), (char*)(sizeof(GLfloat)*3)); glDrawElements(GL_TRIANGLES, gear->nindices/3, GL_UNSIGNED_SHORT, 0); } else { glVertexPointer(3, GL_FLOAT, sizeof(vertex_t), gear->vertices[0].pos); glNormalPointer(GL_FLOAT, sizeof(vertex_t), gear->vertices[0].norm); glDrawElements(GL_TRIANGLES, gear->nindices/3, GL_UNSIGNED_SHORT, gear->indices); } } static GLfloat view_rotx = 20.0, view_roty = 30.0, view_rotz = 0.0; static gear_t *gear1, *gear2, *gear3; static GLfloat angle = 0.0; static void cleanup(void) { glutDestroyWindow(win); } static void draw(void) { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glPushMatrix(); glTranslatef(0.0, 0.0, -viewDist); glRotatef(view_rotx, 1.0, 0.0, 0.0); glRotatef(view_roty, 0.0, 1.0, 0.0); glRotatef(view_rotz, 0.0, 0.0, 1.0); glPushMatrix(); glTranslatef(-3.0, -2.0, 0.0); glRotatef(angle, 0.0, 0.0, 1.0); draw_gear(gear1); glPopMatrix(); glPushMatrix(); glTranslatef(3.1, -2.0, 0.0); glRotatef(-2.0 * angle - 9.0, 0.0, 0.0, 1.0); draw_gear(gear2); glPopMatrix(); glPushMatrix(); glTranslatef(-3.1, 4.2, 0.0); glRotatef(-2.0 * angle - 25.0, 0.0, 0.0, 1.0); draw_gear(gear3); glPopMatrix(); glPopMatrix(); glutSwapBuffers(); Frames++; { GLint t = glutGet(GLUT_ELAPSED_TIME); if (t - T0 >= 5000) { GLfloat seconds = (t - T0) / 1000.0; GLfloat fps = Frames / seconds; printf("%d frames in %6.3f seconds = %6.3f FPS\n", Frames, seconds, fps); T0 = t; Frames = 0; if ((t >= 999.0 * autoexit) && (autoexit)) { cleanup(); exit(0); } } } } static void idle(void) { static double t0 = -1.; double dt, t = glutGet(GLUT_ELAPSED_TIME) / 1000.0; if (t0 < 0.0) t0 = t; dt = t - t0; t0 = t; angle += 70.0 * dt; /* 70 degrees per second */ angle = fmod(angle, 360.0); /* prevents eventual overflow */ glutPostRedisplay(); } static void update_idle_func(void) { if (Visible && Animate) glutIdleFunc(idle); else glutIdleFunc(NULL); } /* change view angle, exit upon ESC */ /* ARGSUSED1 */ static void key(unsigned char k, int x, int y) { switch (k) { case 'z': view_rotz += 5.0; break; case 'Z': view_rotz -= 5.0; break; case 'd': viewDist += 1.0; break; case 'D': viewDist -= 1.0; break; case 'a': Animate = !Animate; update_idle_func(); break; case 27: /* Escape */ cleanup(); exit(0); break; default: return; } glutPostRedisplay(); } /* change view angle */ /* ARGSUSED1 */ static void special(int k, int x, int y) { switch (k) { case GLUT_KEY_UP: view_rotx += 5.0; break; case GLUT_KEY_DOWN: view_rotx -= 5.0; break; case GLUT_KEY_LEFT: view_roty += 5.0; break; case GLUT_KEY_RIGHT: view_roty -= 5.0; break; default: return; } glutPostRedisplay(); } /* new window size or exposure */ static void reshape(int width, int height) { GLfloat h = (GLfloat) height / (GLfloat) width; glViewport(0, 0, (GLint) width, (GLint) height); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glFrustum(-1.0, 1.0, -h, h, 5.0, 200.0); glMatrixMode(GL_MODELVIEW); } static void init(int argc, char *argv[]) { static GLfloat pos[4] = {5.0, 5.0, 10.0, 0.0}; static GLfloat red[4] = {0.8, 0.1, 0.0, 1.0}; static GLfloat green[4] = {0.0, 0.8, 0.2, 1.0}; static GLfloat blue[4] = {0.2, 0.2, 1.0, 1.0}; GLint i; glLightfv(GL_LIGHT0, GL_POSITION, pos); glEnable(GL_CULL_FACE); glEnable(GL_LIGHTING); glEnable(GL_LIGHT0); glEnable(GL_DEPTH_TEST); glShadeModel(GL_SMOOTH); glEnableClientState(GL_NORMAL_ARRAY); glEnableClientState(GL_VERTEX_ARRAY); UseVBO = glutExtensionSupported("GL_ARB_vertex_buffer_object"); for ( i=1; i<argc; i++ ) { if (strcmp(argv[i], "-info")==0) { printf("GL_RENDERER = %s\n", (char *) glGetString(GL_RENDERER)); printf("GL_VERSION = %s\n", (char *) glGetString(GL_VERSION)); printf("GL_VENDOR = %s\n", (char *) glGetString(GL_VENDOR)); printf("GL_EXTENSIONS = %s\n", (char *) glGetString(GL_EXTENSIONS)); } else if (strcmp(argv[i], "-novbo")==0) { UseVBO = GL_FALSE; } else if ( strcmp(argv[i], "-exit")==0) { autoexit = 30; printf("Auto Exit after %i seconds.\n", autoexit ); } } /* make the gears */ gear1 = gear(1.0, 4.0, 1.0, 20, 0.7, red); gear2 = gear(0.5, 2.0, 2.0, 10, 0.7, green); gear3 = gear(1.3, 2.0, 0.5, 10, 0.7, blue); printf("vertex buffer objects %s\n", (UseVBO ? "enabled" : "disabled")); } static void visible(int vis) { Visible = vis; update_idle_func(); } int main(int argc, char *argv[]) { glutInit(&argc, argv); glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE); glutInitWindowPosition(0, 0); glutInitWindowSize(300, 300); win = glutCreateWindow("Gears"); init(argc, argv); glutDisplayFunc(draw); glutReshapeFunc(reshape); glutKeyboardFunc(key); glutSpecialFunc(special); glutVisibilityFunc(visible); update_idle_func(); glutMainLoop(); return 0; /* ANSI C requires main to return int. */ }
<<inline: gearsvbo-intel-1.png>>
------------------------------------------------------------------------------ Stay on top of everything new and different, both inside and around Java (TM) technology - register by April 22, and save $200 on the JavaOne (SM) conference, June 2-5, 2009, San Francisco. 300 plus technical and hands-on sessions. Register today. Use priority code J9JMT32. http://p.sf.net/sfu/p
_______________________________________________ Mesa3d-dev mailing list Mesa3d-dev@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mesa3d-dev