I wanted to see how much faster I could get in C than in Python. The answer so far is only about five times.
// A simple little wave-mechanics display thing in SDL, by Kragen // Javier Sitaker, 2007-12-07 and 08. // Originally I wrote it in Python with PyGame (49 lines of code), but // I was disappointed with how slowly it ran, about 17fps on my // PIII-Coppermine-700 at 320x240. So I thought I'd try writing it in // C to see how much faster it was, and much to my surprise, it was // only something like 50% faster at 26fps --- although the size // penalty was not as bad as I thought, as my first C version is only // about 70 lines of code. And it was fast to write! It took under // an hour. Let's hear it for SDL! // But it seemed like the sqrt and sin functions were taking up a lot // of the run time, and any floating-point math seemed to be kind of // costly, so I wrote this "all-integer" version. In places, the // integers are scaled by 256 so as to be able to represent fractional // values, and there are tables that are initialized (to integers) // using floating-point math. (I thought floating-point was supposed // to be fast now, but I guess my 1999 CPU hasn't gotten the news // yet.) This quadrupled the speed to 105fps. // This version has a couple of interfering waves, one of which has a // moving center, and so it only gets 61fps instead of 105fps at // 320x240 on my laptop. But that's still faster than my screen // refreshes. // Unfortunately the moving center doesn't produce a Doppler effect as // it ought to. That will require a different approach. // I compiled with gcc -Wall -O5 -fomit-frame-pointer -g -lSDL on gcc // 4.1.2. #include <SDL/SDL.h> #include <stdio.h> #include <sys/time.h> #include <time.h> #include <assert.h> #include <math.h> // returns current time in 256ths of a second int getnow_scaled() { struct timeval now; gettimeofday(&now, 0); return (now.tv_sec << 8) + now.tv_usec * 256 / 1000000; } int make_grayscale_component(Uint32 mask, float level) { return (int)(mask * level) & mask; } #define max_grayscale_value 256 short grayscale_palette[max_grayscale_value]; void init_grayscale_palette(struct SDL_PixelFormat *format) { int ii; assert(sizeof(grayscale_palette[0]) == format->BytesPerPixel); for (ii = 0; ii < max_grayscale_value; ii++) { float level = ii / 256.0; grayscale_palette[ii] = make_grayscale_component(format->Rmask, level) + make_grayscale_component(format->Gmask, level) + make_grayscale_component(format->Bmask, level); } } // The idea here is that sqrtx256[x/256] == 256 * sqrt(x), which is // equivalent to saying that sqrtx256[x] == 256 * 16 * sqrt(x), so we // can do exact lookups for small numbers and linear interpolation for // large ones. // The max we'll currently encounter is 640*640 + 480*480 = 640 000, // which is 256 * 2500. #define max_sqrtx256 2500 int sqrtx256[max_sqrtx256]; void init_sqrtx256() { int ii; for (ii = 0; ii < max_sqrtx256; ii++) sqrtx256[ii] = (int)(0.5 + 256 * 16 * sqrt(ii)); } // returns an approximation of 256 * sqrt(sqr) int inline interp_sqrt(int sqr) { // This line eats 10% of performance: if (sqr < max_sqrtx256) return sqrtx256[sqr] >> 4; // the rest of this routine compiles inline to 10 instructions: mov // and sar mov mov sub imul sar lea jmp. int high = sqr >> 8; // we hope high < max_sqrtx256-1 int below = sqrtx256[high]; int above = sqrtx256[high+1]; int spread = above - below; int correction = ((sqr & 0xff) * spread) >> 8; return correction + below; } // 4 * pi * 256, accurate to six places; I'd take things modulo // 2 * pi * 256 but rounding that to an integer introduces 100x as // much error #define fourpi 3217 // rtable: the idea is that you index into rtable with a value that // represents an angle, in radians, scaled by 256, which in this case // comes from the distance from the center of some wave minus the // current time; and you get back a value scaled to the range [0, 256) // representing (1 + sin(theta))/2. unsigned char rtable[fourpi]; void init_rtable() { int ii; for (ii = 0; ii < fourpi; ii++) { int sin_val = 128 + 128 * sin(ii / 256.0) + 0.5; if (sin_val > 255) sin_val = 255; if (sin_val < 0) sin_val = 0; rtable[ii] = sin_val; } } int xorigin=0, yorigin=100; void redraw_world(SDL_Surface *screen) { int xx, yy, w = screen->w, h = screen->h; short *pix; int now_scaled = getnow_scaled(); assert(sizeof(*pix) == screen->format->BytesPerPixel); SDL_LockSurface(screen); pix = screen->pixels; for (yy = 0; yy < h; yy++) { for (xx = 0; xx < w; xx++) { int rx = xx - w/2, ry = yy - h/2; unsigned rscaled = interp_sqrt(rx*rx + ry*ry)/(w/64) - now_scaled; int dx = xx - xorigin, dy = yy - yorigin; unsigned rscaled2= interp_sqrt(dx*dx + dy*dy)/(w/32) - now_scaled * 4; *pix++ = grayscale_palette[(rtable[rscaled % fourpi] + rtable[rscaled2% fourpi])/2]; } } SDL_UnlockSurface(screen); } int lastframe; // global so main() can initialize it #define movement_scaling 16 void update_moving_center(SDL_Surface *screen) { int now = getnow_scaled(); int dt = now - lastframe; // delta time since last frame int dx, dy; static int dx_err = 0, dy_err = 0; // keep track of fractional pixels static int xdir = 1, ydir = 1; lastframe = now; // we only wanted lastframe to get dt dx = xdir * dt + dx_err; // now calculate desired pixel movements dy = ydir * dt + dy_err; xorigin += dx / movement_scaling; // apply them yorigin += dy / movement_scaling; dx_err = dx % movement_scaling; // save up fractional pixels for later dy_err = dy % movement_scaling; if (xorigin > screen->w) xdir = -1; // bounce if need be; it's OK to be if (xorigin < 0) xdir = 1; // a little bit off the screen if (yorigin > screen->h) ydir = -1; if (yorigin < 0) ydir = 1; } int main(int argc, char **argv) { int frames = 0; int start, end; SDL_Surface *screen; SDL_Init(SDL_INIT_EVERYTHING); screen = SDL_SetVideoMode(320, 240, 16, SDL_FULLSCREEN); //screen = SDL_SetVideoMode(640, 480, 16, SDL_FULLSCREEN); if (!screen) abort(); init_grayscale_palette(screen->format); init_sqrtx256(); //memcpy(sqrtx256, init_sqrtx256, 1024); just for fun init_rtable(); lastframe = start = getnow_scaled(); for (;;) { SDL_Event ev; if (SDL_PollEvent(&ev)) { if (ev.type == SDL_MOUSEBUTTONDOWN || ev.type == SDL_QUIT) break; } else { redraw_world(screen); SDL_Flip(screen); frames++; update_moving_center(screen); } } end = getnow_scaled(); SDL_Quit(); printf("%.2f seconds, %.2f fps\n", (end - start)/256.0, 256.0 * frames / (end - start)); return 0; }