I wanted to see how much faster I could get in C than in Python. The
answer so far is only about five times.
// A simple little wave-mechanics display thing in SDL, by Kragen
// Javier Sitaker, 2007-12-07 and 08.
// Originally I wrote it in Python with PyGame (49 lines of code), but
// I was disappointed with how slowly it ran, about 17fps on my
// PIII-Coppermine-700 at 320x240. So I thought I'd try writing it in
// C to see how much faster it was, and much to my surprise, it was
// only something like 50% faster at 26fps --- although the size
// penalty was not as bad as I thought, as my first C version is only
// about 70 lines of code. And it was fast to write! It took under
// an hour. Let's hear it for SDL!
// But it seemed like the sqrt and sin functions were taking up a lot
// of the run time, and any floating-point math seemed to be kind of
// costly, so I wrote this "all-integer" version. In places, the
// integers are scaled by 256 so as to be able to represent fractional
// values, and there are tables that are initialized (to integers)
// using floating-point math. (I thought floating-point was supposed
// to be fast now, but I guess my 1999 CPU hasn't gotten the news
// yet.) This quadrupled the speed to 105fps.
// This version has a couple of interfering waves, one of which has a
// moving center, and so it only gets 61fps instead of 105fps at
// 320x240 on my laptop. But that's still faster than my screen
// refreshes.
// Unfortunately the moving center doesn't produce a Doppler effect as
// it ought to. That will require a different approach.
// I compiled with gcc -Wall -O5 -fomit-frame-pointer -g -lSDL on gcc
// 4.1.2.
#include <SDL/SDL.h>
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <assert.h>
#include <math.h>
// returns current time in 256ths of a second
int getnow_scaled() {
struct timeval now;
gettimeofday(&now, 0);
return (now.tv_sec << 8) + now.tv_usec * 256 / 1000000;
}
int make_grayscale_component(Uint32 mask, float level) {
return (int)(mask * level) & mask;
}
#define max_grayscale_value 256
short grayscale_palette[max_grayscale_value];
void init_grayscale_palette(struct SDL_PixelFormat *format) {
int ii;
assert(sizeof(grayscale_palette[0]) == format->BytesPerPixel);
for (ii = 0; ii < max_grayscale_value; ii++) {
float level = ii / 256.0;
grayscale_palette[ii] =
make_grayscale_component(format->Rmask, level) +
make_grayscale_component(format->Gmask, level) +
make_grayscale_component(format->Bmask, level);
}
}
// The idea here is that sqrtx256[x/256] == 256 * sqrt(x), which is
// equivalent to saying that sqrtx256[x] == 256 * 16 * sqrt(x), so we
// can do exact lookups for small numbers and linear interpolation for
// large ones.
// The max we'll currently encounter is 640*640 + 480*480 = 640 000,
// which is 256 * 2500.
#define max_sqrtx256 2500
int sqrtx256[max_sqrtx256];
void init_sqrtx256() {
int ii;
for (ii = 0; ii < max_sqrtx256; ii++)
sqrtx256[ii] = (int)(0.5 + 256 * 16 * sqrt(ii));
}
// returns an approximation of 256 * sqrt(sqr)
int inline interp_sqrt(int sqr) {
// This line eats 10% of performance:
if (sqr < max_sqrtx256) return sqrtx256[sqr] >> 4;
// the rest of this routine compiles inline to 10 instructions: mov
// and sar mov mov sub imul sar lea jmp.
int high = sqr >> 8; // we hope high < max_sqrtx256-1
int below = sqrtx256[high];
int above = sqrtx256[high+1];
int spread = above - below;
int correction = ((sqr & 0xff) * spread) >> 8;
return correction + below;
}
// 4 * pi * 256, accurate to six places; I'd take things modulo
// 2 * pi * 256 but rounding that to an integer introduces 100x as
// much error
#define fourpi 3217
// rtable: the idea is that you index into rtable with a value that
// represents an angle, in radians, scaled by 256, which in this case
// comes from the distance from the center of some wave minus the
// current time; and you get back a value scaled to the range [0, 256)
// representing (1 + sin(theta))/2.
unsigned char rtable[fourpi];
void init_rtable() {
int ii;
for (ii = 0; ii < fourpi; ii++) {
int sin_val = 128 + 128 * sin(ii / 256.0) + 0.5;
if (sin_val > 255) sin_val = 255;
if (sin_val < 0) sin_val = 0;
rtable[ii] = sin_val;
}
}
int xorigin=0, yorigin=100;
void redraw_world(SDL_Surface *screen) {
int xx, yy, w = screen->w, h = screen->h;
short *pix;
int now_scaled = getnow_scaled();
assert(sizeof(*pix) == screen->format->BytesPerPixel);
SDL_LockSurface(screen);
pix = screen->pixels;
for (yy = 0; yy < h; yy++) {
for (xx = 0; xx < w; xx++) {
int rx = xx - w/2, ry = yy - h/2;
unsigned rscaled = interp_sqrt(rx*rx + ry*ry)/(w/64) - now_scaled;
int dx = xx - xorigin, dy = yy - yorigin;
unsigned rscaled2= interp_sqrt(dx*dx + dy*dy)/(w/32) - now_scaled * 4;
*pix++ = grayscale_palette[(rtable[rscaled % fourpi] +
rtable[rscaled2% fourpi])/2];
}
}
SDL_UnlockSurface(screen);
}
int lastframe; // global so main() can initialize it
#define movement_scaling 16
void update_moving_center(SDL_Surface *screen) {
int now = getnow_scaled();
int dt = now - lastframe; // delta time since last frame
int dx, dy;
static int dx_err = 0, dy_err = 0; // keep track of fractional pixels
static int xdir = 1, ydir = 1;
lastframe = now; // we only wanted lastframe to get dt
dx = xdir * dt + dx_err; // now calculate desired pixel movements
dy = ydir * dt + dy_err;
xorigin += dx / movement_scaling; // apply them
yorigin += dy / movement_scaling;
dx_err = dx % movement_scaling; // save up fractional pixels for later
dy_err = dy % movement_scaling;
if (xorigin > screen->w) xdir = -1; // bounce if need be; it's OK to be
if (xorigin < 0) xdir = 1; // a little bit off the screen
if (yorigin > screen->h) ydir = -1;
if (yorigin < 0) ydir = 1;
}
int main(int argc, char **argv) {
int frames = 0;
int start, end;
SDL_Surface *screen;
SDL_Init(SDL_INIT_EVERYTHING);
screen = SDL_SetVideoMode(320, 240, 16, SDL_FULLSCREEN);
//screen = SDL_SetVideoMode(640, 480, 16, SDL_FULLSCREEN);
if (!screen) abort();
init_grayscale_palette(screen->format);
init_sqrtx256();
//memcpy(sqrtx256, init_sqrtx256, 1024); just for fun
init_rtable();
lastframe = start = getnow_scaled();
for (;;) {
SDL_Event ev;
if (SDL_PollEvent(&ev)) {
if (ev.type == SDL_MOUSEBUTTONDOWN || ev.type == SDL_QUIT) break;
} else {
redraw_world(screen);
SDL_Flip(screen);
frames++;
update_moving_center(screen);
}
}
end = getnow_scaled();
SDL_Quit();
printf("%.2f seconds, %.2f fps\n", (end - start)/256.0,
256.0 * frames / (end - start));
return 0;
}