William Cohen wrote:
Looked at where the processor spends its time when browsing the web.
Hardware configuration:
OLPC Beta 2 machine
Linksys USB200M USB 10/100 for ethernet connection
4GB memorex Mini Travel Drive for storage of image
Software configuration:
/tmp/olpc-redhat-stream-development-build-299-20070308_1417-devel_ext3.img
kernel-2.6.21-20070309.olpc1p.dc5079fafb767e4
oprofile-0.9.2-3.fc6
Re ran the experiment on build 301 and installed the
xorg-x11-server-debuginfo-1.1.99.3-0.10.2.olpc1.i386.rpm on the olpc machine, so
I could take a look at where time is being spent in libfb.so.
# opreport -t 1 -l /usr/bin/Xorg
CPU: CPU with timer interrupt, speed 0 MHz (estimated)
Profiling through timer interrupt
samples % image name symbol name
6514 68.1096 libfb.so fbFetchTransformed
613 6.4095 libfb.so fbFetchPixel_x8r8g8b8
446 4.6633 libfb.so fbCompositeSolidMask_nx8x0565mmx
252 2.6349 libfb.so fbStore_r5g6b5
169 1.7670 libfb.so fbRasterizeEdges
137 1.4325 libfb.so fbCompositeSrc_8888x0565mmx
113 1.1815 libfb.so fbCopyAreammx
99 1.0351 libfb.so mmxCombineOverU
The attached file is a portion of the output from opannotate. There is a group
of MOD operations that are taking a significant portion of the time. The first
column is the number of samples and the second column is the percentage.
398 6.1099 : x1 = MOD (x1, pict->pDrawable->width);
383 5.8796 : x2 = MOD (x2, pict->pDrawable->width);
336 5.1581 : y1 = MOD (y1, pict->pDrawable->height);
355 5.4498 : y2 = MOD (y2, pict->pDrawable->height);
Following this there are also some other expensive operations to compute r. and
put it into buffer[i].
-Will
:static void fbFetchTransformed(PicturePtr pict, int x, int y,
int width, CARD32 *buffer)
:{ /* fbFetchTransformed total: 6514 100.000 */
: FbBits *bits;
: FbStride stride;
: int bpp;
: int xoff, yoff, dx, dy;
: fetchPixelProc fetch;
: PictVector v;
: PictVector unit;
: int i;
: BoxRec box;
: miIndexedPtr indexed = (miIndexedPtr)
pict->pFormat->index.devPrivate;
: Bool affine = TRUE;
:
: fetch = fetchPixelProcForPicture(pict);
:
1 0.0154 : fbGetDrawable(pict->pDrawable, bits, stride, bpp, xoff,
yoff);
: x += xoff;
: y += yoff;
:
: dx = pict->pDrawable->x;
: dy = pict->pDrawable->y;
:
: /* reference point is the center of the pixel */
: v.vector[0] = IntToxFixed(x - dx) + xFixed1 / 2;
: v.vector[1] = IntToxFixed(y - dy) + xFixed1 / 2;
: v.vector[2] = xFixed1;
:
: /* when using convolution filters one might get here
without a transform */
: if (pict->transform) {
1 0.0154 : if (!PictureTransformPoint3d (pict->transform, &v))
: return;
: unit.vector[0] = pict->transform->matrix[0][0];
: unit.vector[1] = pict->transform->matrix[1][0];
: unit.vector[2] = pict->transform->matrix[2][0];
: affine = v.vector[2] == xFixed1 && unit.vector[2] == 0;
: } else {
: unit.vector[0] = xFixed1;
: unit.vector[1] = 0;
: unit.vector[2] = 0;
: }
:
: if (pict->filter == PictFilterNearest)
: {
: if (pict->repeatType == RepeatNormal) {
: if (REGION_NUM_RECTS(pict->pCompositeClip) == 1) {
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: if (!affine) {
: y =
MOD(DIV(v.vector[1],v.vector[2]), pict->pDrawable->height);
: x =
MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width);
: } else {
: y = MOD(v.vector[1]>>16,
pict->pDrawable->height);
: x = MOD(v.vector[0]>>16,
pict->pDrawable->width);
: }
: buffer[i] = fetch(bits + (y +
dy)*stride, x + dx, indexed);
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: } else {
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: if (!affine) {
: y =
MOD(DIV(v.vector[1],v.vector[2]), pict->pDrawable->height);
: x =
MOD(DIV(v.vector[0],v.vector[2]), pict->pDrawable->width);
: } else {
: y = MOD(v.vector[1]>>16,
pict->pDrawable->height);
: x = MOD(v.vector[0]>>16,
pict->pDrawable->width);
: }
: if (POINT_IN_REGION (0,
pict->pCompositeClip, x + dx, y + dy, &box))
: buffer[i] = fetch(bits + (y +
dy)*stride, x + dx, indexed);
: else
: buffer[i] = 0;
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: }
: } else {
: if (REGION_NUM_RECTS(pict->pCompositeClip) == 1) {
: box = pict->pCompositeClip->extents;
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: if (!affine) {
: y = DIV(v.vector[1],v.vector[2]);
: x = DIV(v.vector[0],v.vector[2]);
: } else {
: y = v.vector[1]>>16;
: x = v.vector[0]>>16;
: }
: buffer[i] = ((x < box.x1-dx) | (x >=
box.x2-dx) | (y < box.y1-dy) | (y >= box.y2-dy)) ?
: 0 : fetch(bits + (y +
dy)*stride, x + dx, indexed);
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: } else {
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: if (!affine) {
: y = DIV(v.vector[1],v.vector[2]);
: x = DIV(v.vector[0],v.vector[2]);
: } else {
: y = v.vector[1]>>16;
: x = v.vector[0]>>16;
: }
: if (POINT_IN_REGION (0,
pict->pCompositeClip, x + dx, y + dy, &box))
: buffer[i] = fetch(bits + (y +
dy)*stride, x + dx, indexed);
: else
: buffer[i] = 0;
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: }
: }
: } else if (pict->filter == PictFilterBilinear) {
: /* adjust vector for maximum contribution at 0.5, 0.5
of each texel. */
1 0.0154 : v.vector[0] -= v.vector[2] / 2;
: v.vector[1] -= v.vector[2] / 2;
2 0.0307 : unit.vector[0] -= unit.vector[2] / 2;
: unit.vector[1] -= unit.vector[2] / 2;
:
: if (pict->repeatType == RepeatNormal) {
: if (REGION_NUM_RECTS(pict->pCompositeClip) == 1) {
45 0.6908 : for (i = 0; i < width; ++i) {
34 0.5220 : if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: int x1, x2, y1, y2, distx, idistx,
disty, idisty;
: FbBits *b;
: CARD32 tl, tr, bl, br, r;
: CARD32 ft, fb;
:
31 0.4759 : if (!affine) {
: xFixed_48_16 div;
: div = ((xFixed_48_16)v.vector[0] <<
16)/v.vector[2];
: x1 = div >> 16;
: distx = ((xFixed)div >> 8) & 0xff;
: div = ((xFixed_48_16)v.vector[1] <<
16)/v.vector[2];
: y1 = div >> 16;
: disty = ((xFixed)div >> 8) & 0xff;
: } else {
30 0.4605 : x1 = v.vector[0] >> 16;
16 0.2456 : distx = (v.vector[0] >> 8) & 0xff;
15 0.2303 : y1 = v.vector[1] >> 16;
37 0.5680 : disty = (v.vector[1] >> 8) & 0xff;
: }
19 0.2917 : x2 = x1 + 1;
9 0.1382 : y2 = y1 + 1;
:
51 0.7829 : idistx = 256 - distx;
: idisty = 256 - disty;
:
398 6.1099 : x1 = MOD (x1, pict->pDrawable->width);
383 5.8796 : x2 = MOD (x2, pict->pDrawable->width);
336 5.1581 : y1 = MOD (y1, pict->pDrawable->height);
355 5.4498 : y2 = MOD (y2, pict->pDrawable->height);
:
42 0.6448 : b = bits + (y1 + dy)*stride;
:
102 1.5659 : tl = fetch(b, x1 + dx, indexed);
164 2.5177 : tr = fetch(b, x2 + dx, indexed);
101 1.5505 : b = bits + (y2 + dy)*stride;
52 0.7983 : bl = fetch(b, x1 + dx, indexed);
77 1.1821 : br = fetch(b, x2 + dx, indexed);
:
: ft = FbGet8(tl,0) * idistx +
FbGet8(tr,0) * distx;
: fb = FbGet8(bl,0) * idistx +
FbGet8(br,0) * distx;
536 8.2284 : r = (((ft * idisty + fb * disty) >> 16)
& 0xff);
: ft = FbGet8(tl,8) * idistx +
FbGet8(tr,8) * distx;
: fb = FbGet8(bl,8) * idistx +
FbGet8(br,8) * distx;
482 7.3994 : r |= (((ft * idisty + fb * disty) >> 8)
& 0xff00);
: ft = FbGet8(tl,16) * idistx +
FbGet8(tr,16) * distx;
: fb = FbGet8(bl,16) * idistx +
FbGet8(br,16) * distx;
514 7.8907 : r |= (((ft * idisty + fb * disty)) &
0xff0000);
: ft = FbGet8(tl,24) * idistx +
FbGet8(tr,24) * distx;
: fb = FbGet8(bl,24) * idistx +
FbGet8(br,24) * distx;
: r |= (((ft * idisty + fb * disty) << 8)
& 0xff000000);
512 7.8600 : buffer[i] = r;
: }
13 0.1996 : v.vector[0] += unit.vector[0];
11 0.1689 : v.vector[1] += unit.vector[1];
39 0.5987 : v.vector[2] += unit.vector[2];
: }
: } else {
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: int x1, x2, y1, y2, distx, idistx,
disty, idisty;
: FbBits *b;
: CARD32 tl, tr, bl, br, r;
: CARD32 ft, fb;
:
: if (!affine) {
: xFixed_48_16 div;
: div = ((xFixed_48_16)v.vector[0] <<
16)/v.vector[2];
: x1 = div >> 16;
: distx = ((xFixed)div >> 8) & 0xff;
: div = ((xFixed_48_16)v.vector[1] <<
16)/v.vector[2];
: y1 = div >> 16;
: disty = ((xFixed)div >> 8) & 0xff;
: } else {
: x1 = v.vector[0] >> 16;
: distx = (v.vector[0] >> 8) & 0xff;
: y1 = v.vector[1] >> 16;
: disty = (v.vector[1] >> 8) & 0xff;
: }
: x2 = x1 + 1;
: y2 = y1 + 1;
:
: idistx = 256 - distx;
: idisty = 256 - disty;
:
: x1 = MOD (x1, pict->pDrawable->width);
: x2 = MOD (x2, pict->pDrawable->width);
: y1 = MOD (y1, pict->pDrawable->height);
: y2 = MOD (y2, pict->pDrawable->height);
:
: b = bits + (y1 + dy)*stride;
:
: tl = POINT_IN_REGION(0,
pict->pCompositeClip, x1 + dx, y1 + dy, &box)
: ? fetch(b, x1 + dx, indexed) : 0;
: tr = POINT_IN_REGION(0,
pict->pCompositeClip, x2 + dx, y1 + dy, &box)
: ? fetch(b, x2 + dx, indexed) : 0;
: b = bits + (y2 + dy)*stride;
: bl = POINT_IN_REGION(0,
pict->pCompositeClip, x1 + dx, y2 + dy, &box)
: ? fetch(b, x1 + dx, indexed) : 0;
: br = POINT_IN_REGION(0,
pict->pCompositeClip, x2 + dx, y2 + dy, &box)
: ? fetch(b, x2 + dx, indexed) : 0;
:
: ft = FbGet8(tl,0) * idistx +
FbGet8(tr,0) * distx;
: fb = FbGet8(bl,0) * idistx +
FbGet8(br,0) * distx;
: r = (((ft * idisty + fb * disty) >> 16)
& 0xff);
: ft = FbGet8(tl,8) * idistx +
FbGet8(tr,8) * distx;
: fb = FbGet8(bl,8) * idistx +
FbGet8(br,8) * distx;
: r |= (((ft * idisty + fb * disty) >> 8)
& 0xff00);
: ft = FbGet8(tl,16) * idistx +
FbGet8(tr,16) * distx;
: fb = FbGet8(bl,16) * idistx +
FbGet8(br,16) * distx;
: r |= (((ft * idisty + fb * disty)) &
0xff0000);
: ft = FbGet8(tl,24) * idistx +
FbGet8(tr,24) * distx;
: fb = FbGet8(bl,24) * idistx +
FbGet8(br,24) * distx;
: r |= (((ft * idisty + fb * disty) << 8)
& 0xff000000);
: buffer[i] = r;
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: }
: } else {
: if (REGION_NUM_RECTS(pict->pCompositeClip) == 1) {
: box = pict->pCompositeClip->extents;
24 0.3684 : for (i = 0; i < width; ++i) {
23 0.3531 : if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: int x1, x2, y1, y2, distx, idistx,
disty, idisty, x_off;
: FbBits *b;
: CARD32 tl, tr, bl, br, r;
: Bool x1_out, x2_out, y1_out, y2_out;
: CARD32 ft, fb;
:
16 0.2456 : if (!affine) {
: xFixed_48_16 div;
: div = ((xFixed_48_16)v.vector[0] <<
16)/v.vector[2];
: x1 = div >> 16;
: distx = ((xFixed)div >> 8) & 0xff;
: div = ((xFixed_48_16)v.vector[1] <<
16)/v.vector[2];
: y1 = div >> 16;
: disty = ((xFixed)div >> 8) & 0xff;
: } else {
26 0.3991 : x1 = v.vector[0] >> 16;
12 0.1842 : distx = (v.vector[0] >> 8) & 0xff;
14 0.2149 : y1 = v.vector[1] >> 16;
22 0.3377 : disty = (v.vector[1] >> 8) & 0xff;
: }
11 0.1689 : x2 = x1 + 1;
50 0.7676 : y2 = y1 + 1;
:
13 0.1996 : idistx = 256 - distx;
: idisty = 256 - disty;
:
53 0.8136 : b = bits + (y1 + dy)*stride;
38 0.5834 : x_off = x1 + dx;
:
61 0.9364 : x1_out = (x1 < box.x1-dx) | (x1 >=
box.x2-dx);
50 0.7676 : x2_out = (x2 < box.x1-dx) | (x2 >=
box.x2-dx);
43 0.6601 : y1_out = (y1 < box.y1-dy) | (y1 >=
box.y2-dy);
33 0.5066 : y2_out = (y2 < box.y1-dy) | (y2 >=
box.y2-dy);
:
77 1.1821 : tl = x1_out|y1_out ? 0 : fetch(b,
x_off, indexed);
94 1.4430 : tr = x2_out|y1_out ? 0 : fetch(b, x_off
+ 1, indexed);
14 0.2149 : b += stride;
76 1.1667 : bl = x1_out|y2_out ? 0 : fetch(b,
x_off, indexed);
79 1.2128 : br = x2_out|y2_out ? 0 : fetch(b, x_off
+ 1, indexed);
:
: ft = FbGet8(tl,0) * idistx +
FbGet8(tr,0) * distx;
: fb = FbGet8(bl,0) * idistx +
FbGet8(br,0) * distx;
247 3.7918 : r = (((ft * idisty + fb * disty) >> 16)
& 0xff);
: ft = FbGet8(tl,8) * idistx +
FbGet8(tr,8) * distx;
: fb = FbGet8(bl,8) * idistx +
FbGet8(br,8) * distx;
346 5.3116 : r |= (((ft * idisty + fb * disty) >> 8)
& 0xff00);
: ft = FbGet8(tl,16) * idistx +
FbGet8(tr,16) * distx;
: fb = FbGet8(bl,16) * idistx +
FbGet8(br,16) * distx;
257 3.9453 : r |= (((ft * idisty + fb * disty)) &
0xff0000);
: ft = FbGet8(tl,24) * idistx +
FbGet8(tr,24) * distx;
: fb = FbGet8(bl,24) * idistx +
FbGet8(br,24) * distx;
: r |= (((ft * idisty + fb * disty) << 8)
& 0xff000000);
390 5.9871 : buffer[i] = r;
: }
10 0.1535 : v.vector[0] += unit.vector[0];
9 0.1382 : v.vector[1] += unit.vector[1];
17 0.2610 : v.vector[2] += unit.vector[2];
: }
: } else {
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: int x1, x2, y1, y2, distx, idistx,
disty, idisty, x_off;
: FbBits *b;
: CARD32 tl, tr, bl, br, r;
: CARD32 ft, fb;
:
: if (!affine) {
: xFixed_48_16 div;
: div = ((xFixed_48_16)v.vector[0] <<
16)/v.vector[2];
: x1 = div >> 16;
: distx = ((xFixed)div >> 8) & 0xff;
: div = ((xFixed_48_16)v.vector[1] <<
16)/v.vector[2];
: y1 = div >> 16;
: disty = ((xFixed)div >> 8) & 0xff;
: } else {
: x1 = v.vector[0] >> 16;
: distx = (v.vector[0] >> 8) & 0xff;
: y1 = v.vector[1] >> 16;
: disty = (v.vector[1] >> 8) & 0xff;
: }
: x2 = x1 + 1;
: y2 = y1 + 1;
:
: idistx = 256 - distx;
: idisty = 256 - disty;
:
: b = bits + (y1 + dy)*stride;
: x_off = x1 + dx;
:
: tl = POINT_IN_REGION(0,
pict->pCompositeClip, x1 + dx, y1 + dy, &box)
: ? fetch(b, x_off, indexed) : 0;
: tr = POINT_IN_REGION(0,
pict->pCompositeClip, x2 + dx, y1 + dy, &box)
: ? fetch(b, x_off + 1, indexed) : 0;
: b += stride;
: bl = POINT_IN_REGION(0,
pict->pCompositeClip, x1 + dx, y2 + dy, &box)
: ? fetch(b, x_off, indexed) : 0;
: br = POINT_IN_REGION(0,
pict->pCompositeClip, x2 + dx, y2 + dy, &box)
: ? fetch(b, x_off + 1, indexed) : 0;
:
: ft = FbGet8(tl,0) * idistx +
FbGet8(tr,0) * distx;
: fb = FbGet8(bl,0) * idistx +
FbGet8(br,0) * distx;
: r = (((ft * idisty + fb * disty) >> 16)
& 0xff);
: ft = FbGet8(tl,8) * idistx +
FbGet8(tr,8) * distx;
: fb = FbGet8(bl,8) * idistx +
FbGet8(br,8) * distx;
: r |= (((ft * idisty + fb * disty) >> 8)
& 0xff00);
: ft = FbGet8(tl,16) * idistx +
FbGet8(tr,16) * distx;
: fb = FbGet8(bl,16) * idistx +
FbGet8(br,16) * distx;
: r |= (((ft * idisty + fb * disty)) &
0xff0000);
: ft = FbGet8(tl,24) * idistx +
FbGet8(tr,24) * distx;
: fb = FbGet8(bl,24) * idistx +
FbGet8(br,24) * distx;
: r |= (((ft * idisty + fb * disty) << 8)
& 0xff000000);
: buffer[i] = r;
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: }
: }
: } else if (pict->filter == PictFilterConvolution) {
: xFixed *params = pict->filter_params;
: INT32 cwidth = xFixedToInt(params[0]);
: INT32 cheight = xFixedToInt(params[1]);
: int xoff = (params[0] - xFixed1) >> 1;
: int yoff = (params[1] - xFixed1) >> 1;
: params += 2;
: for (i = 0; i < width; ++i) {
: if (!v.vector[2]) {
: buffer[i] = 0;
: } else {
: int x1, x2, y1, y2, x, y;
: INT32 srtot, sgtot, sbtot, satot;
: xFixed *p = params;
:
: if (!affine) {
: xFixed_48_16 tmp;
: tmp = ((xFixed_48_16)v.vector[0] <<
16)/v.vector[2] - xoff;
: x1 = xFixedToInt(tmp);
: tmp = ((xFixed_48_16)v.vector[1] <<
16)/v.vector[2] - yoff;
: y1 = xFixedToInt(tmp);
: } else {
: x1 = xFixedToInt(v.vector[0] - xoff);
: y1 = xFixedToInt(v.vector[1] - yoff);
: }
: x2 = x1 + cwidth;
: y2 = y1 + cheight;
:
: srtot = sgtot = sbtot = satot = 0;
:
: for (y = y1; y < y2; y++) {
: int ty = (pict->repeatType == RepeatNormal)
? MOD (y, pict->pDrawable->height) : y;
: for (x = x1; x < x2; x++) {
: if (*p) {
: int tx = (pict->repeatType ==
RepeatNormal) ? MOD (x, pict->pDrawable->width) : x;
: if (POINT_IN_REGION (0,
pict->pCompositeClip, tx + dx, ty + dy, &box)) {
: FbBits *b = bits + (ty +
dy)*stride;
: CARD32 c = fetch(b, tx + dx,
indexed);
:
: srtot += Red(c) * *p;
: sgtot += Green(c) * *p;
: sbtot += Blue(c) * *p;
: satot += Alpha(c) * *p;
: }
: }
: p++;
: }
: }
:
: satot >>= 16;
: srtot >>= 16;
: sgtot >>= 16;
: sbtot >>= 16;
:
: if (satot < 0) satot = 0; else if (satot >
0xff) satot = 0xff;
: if (srtot < 0) srtot = 0; else if (srtot >
0xff) srtot = 0xff;
: if (sgtot < 0) sgtot = 0; else if (sgtot >
0xff) sgtot = 0xff;
: if (sbtot < 0) sbtot = 0; else if (sbtot >
0xff) sbtot = 0xff;
:
: buffer[i] = ((satot << 24) |
: (srtot << 16) |
: (sgtot << 8) |
: (sbtot ));
: }
: v.vector[0] += unit.vector[0];
: v.vector[1] += unit.vector[1];
: v.vector[2] += unit.vector[2];
: }
: }
:}
_______________________________________________
Devel mailing list
[email protected]
http://mailman.laptop.org/mailman/listinfo/devel