In following up on some performance problems on the
zSeries, we've noticed that the zSeries is very
sensitive to working set size, especially for writes.
This may explain some of the "poor" performance that
people ascribe to the zSeries.
Is locality of reference as senstive on other
platforms? How does this effect such things as VM EC
guests, data base programs with large tables, and java
programs?
Consider the loop below:
It riffles through memory sequentially a byte at a
time. It writes (flag=1) to each byte within a page
before going to the next page. In other words, it has
a "nice" worksing set size. (See complete program
below for details).
byteSingle=255;
time(&start);
for (i=1;i<=iter;i++) {
hits=0;
for (page=0;page<bytes;page=page+4096)
for (byteINpage=0;byteINpage<4096;byteINpage++)
{
hits++;
byteAddr=page+byteINpage;
if (flag) byteArray[byteAddr]=byteSingle;
else byteSingle= byteArray[byteAddr];
}
}
time(&finish);
Consider what happens when you reverise the page and
byteINpage loops:
for (byteINpage=0;byteINpage<4096;byteINpage++)
for (page=0;page<bytes;page=page+4096)
where you touch a byte in each page before going to
the second byte. The working set becomes "terrible".
And so does the performance.
Complete program:
/*
MemSeq - memory loop, reading or writing,
sequentially
[EMAIL PROTECTED]
this is uncopyrighted material, August 5, 2003
use at your own risk
*/
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
void usage();
int main(
int argc,
char * argv[])
{
char byteArray[ 512*1024*1024 ];
char byteSingle;
int i,j;
int next;
int mb=1;
int bytes, byteAddr, byteINpage
int hits;
int page;
int iter=0;
int flag=0;
time_t start;
time_t finish;
/* pick apart args */
for (next = 1; next < argc; next++)
if (memcmp(argv[next], "-m", 2) == 0)
{
mb = atol(argv[next][2]? &argv[next][2]:
argv[++next]);
}
else if (memcmp(argv[next], "-i", 2) == 0)
{
iter = atol(argv[next][2]? &argv[next][2]:
argv[++next]);
}
else if (memcmp(argv[next], "-w", 2) == 0) flag=1;
else if (memcmp(argv[next], "-r", 2) == 0) flag=0;
else usage();
bytes = mb*1024*1024;
printf("MemSeq ");
if ( flag ) printf("Write");
else printf("Read " );
printf( " %d MB, %d iterations ", mb,iter);
/* initiliaze memory to something other than all zeros
*/
j=0;
if ( iter > 0)
for (i=0;i<bytes;i=i++) {
byteArray[i]=j;
j++;
if (j == 256) j=0;
}
/* riffle through memory, writing or reading,
sequentially */
/* the page and byteINpage loops are reversed from
MemPage */
byteSingle=255;
time(&start);
for (i=1;i<=iter;i++) {
hits=0;
for (page=0;page<bytes;page=page+4096)
for (byteINpage=0;byteINpage<4096;byteINpage++)
{
hits++;
byteAddr=page+byteINpage;
if (flag) byteArray[byteAddr]=byteSingle;
else byteSingle= byteArray[byteAddr];
}
}
time(&finish);
printf(" %.0f seconds %d bytes/iter
\n",difftime(finish,start),hits);
}
void
usage()
{
printf( "MemSeq USAGE:\n");
printf(
"MemSeq [-i KB ] [-m MB [-w | -r] \n"
" -i iterations (default=0 iterations) \n"
" (default is 1 \n"
" -m bytes to malloc in MB (default=1 MB)\n"
" -w memory write\n"
" -r memory read (default)\n"
);
exit(1);
}
=====
Jim Sibley
Implementor of Linux on zSeries in the beautiful Silicon Valley
"Computer are useless.They can only give answers." Pablo Picasso
__________________________________
Do you Yahoo!?
Yahoo! SiteBuilder - Free, easy-to-use web site design software
http://sitebuilder.yahoo.com