Thanks Matan, I was getting worried that I had some strange computers which
were playing tricks with me ;)

I forgot to mention that I am running Redhat 7.2 with kernel 2.4.14. My 
PIII has
384Mb dram, and runs at 667Mhz. The results are completely reproducible, 
without
running anything in the background.

I realised afterwards that I should have measured the time also in the 
server, and attach
the fixed program, which still gives the same results.

I guess there have been some major improvements on the kernel code, running
the same program on my laptop ( PIII 500 Mhz, 128MB Dram, kernel 2.4.18)
results in:

Memcpy'ed 2000 blocks of size 1048576 in 11 seconds => 181 Mbytes/second
Sent 2000 blocks of size 1048576 in 5 seconds over unix socket => 400 
Mbytes/second
Received 2097152000 bytes in 5 seconds over unix socket =>  400 
Mbytes/second

Even though zero-copy is not being done, isn't it surprising how much 
faster it
is to send data over a socket than just to copy it from one buffer to 
another ;)


Malcolm

Matan wrote:

>On Sun, 17 Mar 2002, Nadav Har'El wrote:
>
>>On Sun, Mar 17, 2002, guy keren wrote about "Re: pthreads question":
>>
>>>On Sat, 16 Mar 2002, Malcolm Kavalsky wrote:
>>>
>>>>I attach a program benchmark.c that compares speed of memcpy versus data
>>>>transfer
>>>>over unix sockets.
>>>>
>>>on my PC (AMD k-6 II 366MHz w/256MB RAM, kernel 2.2.20):
>>>
>>>[choo@simey ~]$ gcc -O2 benchmark.c
>>>[choo@simey ~]$ ./a.out
>>>Memcpy'ed 2000 blocks of size 1048576 in 38 seconds => 52 Mbytes/second
>>>Sent 2000 blocks of size 1048576 in 94 seconds over unixsocket => 21
>>>Mbytes/second
>>>[choo@simey ~]$
>>>
>>Similarly, on Redhat 7.2 and kernel 2.4.7-10, Pentium III (Katmai) 500 MHz:
>>$ a.out
>>Memcpy'ed 2000 blocks of size 1048576 in 12 seconds => 166 Mbytes/second
>>Sent 2000 blocks of size 1048576 in 14 seconds over unix socket => 142 Mbytes/second
>>
>
>Just for all to know that Kavalsky's results are not unique: this is on
>a MDK8.1 system (glibc-2.2.4, kernel 2.4.17) with Athlon 1GHz and 133MHz
>SDRAM:
>Memcpy'ed 2000 blocks of size 1048576 in 7 seconds => 285 Mbytes/second
>Sent 2000 blocks of size 1048576 in 4 seconds over unix socket => 500
>Mbytes/second
>
>(the 7s is sometimes 8s, and the 4s is sometimes 5s).
>
>So there might be some improvements in the kernel between 2.4.7 and
>2.4.17.
>
>

#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#define BUFSIZE 0x100000  /* 1 Megabyte */
#define NBLOCKS   2000
#define PORT_NAME    "/tmp/foo"

void server()
{
  struct sockaddr_un sin,from;
  int s,g,len,n;
  char *buf;
  float nbytes;
  time_t start_time, elapsed_time;
  
  buf = malloc( BUFSIZE );
  /* Create an unbound socket */
  if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
    printf( "Bad socket\n");
    return;
  }
  strcpy( sin.sun_path, PORT_NAME );
  sin.sun_family = PF_UNIX;
  if( bind( s, (struct sockaddr *)&sin, 
            strlen(sin.sun_path) + sizeof(sin.sun_family)) < 0){
    printf( "Bad bind\n");
    return;
  }
  listen( s, 5 );
  len = sizeof(from);
  g = accept( s, (struct sockaddr *)&from, &len );
  nbytes = read( g, buf, BUFSIZE );
  start_time = time(0);
  while( (n = read( g, buf, BUFSIZE )) > 0 ) {
    nbytes += n;
  }
  elapsed_time = time(0) - start_time;
  close(g);
  close(s);
  unlink( PORT_NAME );
  printf( "Received %10.0f bytes in %d seconds over unix socket =>",
          nbytes, (int)elapsed_time );
  printf( " %4.0f Mbytes/second \n", nbytes / (0x100000 * elapsed_time) );
}

void client()
{
  struct sockaddr_un sin;
  int s;
  char *buf;
  time_t start_time, elapsed_time;
  int i;
  
  buf = malloc( BUFSIZE );
  
  if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
    printf( "Bad socket\n");
    return;
  }
  strcpy( sin.sun_path, PORT_NAME );
  sin.sun_family = PF_UNIX;
  if( connect( s, (struct sockaddr *)&sin, sizeof(sin)) < 0 ){
    printf("Bad connect\n");
    close(s);
    return;
  }

  start_time = time(0);
  for( i=0; i< NBLOCKS && write(s, buf, BUFSIZE) == BUFSIZE ; i++ );
  elapsed_time = time(0) - start_time;
  close(s);
  printf( "Sent %d blocks of size %d in %d seconds over unix socket =>",
          i, BUFSIZE, (int)elapsed_time );
  printf( " %d Mbytes/second \n", (NBLOCKS * BUFSIZE) / (0x100000 * (int)elapsed_time) 
);

}

void memcpy_benchmark()
{
  char *src, *dst;
  time_t start_time, elapsed_time;
  int i;

  src = malloc ( BUFSIZE );
  dst = malloc ( BUFSIZE );
  start_time = time(0);
  for( i=0; i< NBLOCKS; i++ )
    memcpy( dst, src, BUFSIZE );
  elapsed_time = time(0) - start_time;

  printf( "Memcpy'ed %d blocks of size %d in %d seconds =>",
          NBLOCKS, BUFSIZE, (int)elapsed_time );
  printf( " %d Mbytes/second\n", (NBLOCKS * BUFSIZE) / (0x100000 * (int)elapsed_time) 
);
}

void socket_benchmark()
{
  int status;
  if ( fork() == 0 ) {
    server();
  } else {
    sleep(1); /* Dirty, but ensures client runs after server is ready */
    client();
  }
  wait(&status);
}

int main()
{
  memcpy_benchmark();
  socket_benchmark();
  return 0;
}

Reply via email to