Eureka!
Nadav Har'El wrote:
> On Mon, Mar 18, 2002, Malcolm Kavalsky wrote about "Re: pthreads
> question":
>
>> I asked one of the top Unix hackers that I know, and he said:
>>
>> "I would guess that if you do large af_unix transfers that are page
>> aligned then the system doesn't have to actually copy the data rather it
>> can share the page and do a copy on write. This preserves the socket
>> semantics and can be faster than memcpy. This was done many years ago in
>> Solaris."
>>
>> I wonder if digging deep enough in the kernel sources, will reveal
>> this ...
>
>
> You can try to check if this is the case, by following each send or
> memcpy
> by a memset() of the buffer. If the memcpy method suddenly becomes
> quicker,
> this explanation might be true.
> Strange though - how come malloc() returns page-aligned buffers? Does the
> Linux code really checks for this rare and rather esoteric case (if you
> write to the buffer after sending it, and the kernel can't know you're
> writing whole pages, it will have to do a copy-on- write and do the copy
> anyway).
>
This is exactly what happened! I added in memset after memcpy, and also
after sending
the buffer, the results are:
Memcpy'ed and memsetted 1000 blocks of size 1048576 in 18 seconds => 55
Mbytes/second
Started receiving at Mon Mar 18 13:41:13 2002
Received 1048576000 bytes in 17 seconds over unix socket => 59
Mbytes/second
Started sending at Mon Mar 18 13:41:13 2002
Sent and memsetted 1000 blocks of size 1048576 in 17 seconds over unix
socket => 58 Mbytes/second
(You notice that I also added printing exact time that send and receive
started, to ensure no
delay between the two)
I also attach the source file for reference.
Malcolm
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <time.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#define BUFSIZE 0x100000 /* 1 Megabyte */
#define NBLOCKS 1000
#define PORT_NAME "/tmp/foo"
void server()
{
struct sockaddr_un sin,from;
int s,g,len,n;
char *buf;
float nbytes;
time_t start_time, elapsed_time;
buf = malloc( BUFSIZE );
/* Create an unbound socket */
if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
printf( "Bad socket\n");
return;
}
strcpy( sin.sun_path, PORT_NAME );
sin.sun_family = PF_UNIX;
if( bind( s, (struct sockaddr *)&sin,
strlen(sin.sun_path) + sizeof(sin.sun_family)) < 0){
printf( "Bad bind\n");
return;
}
listen( s, 5 );
len = sizeof(from);
g = accept( s, (struct sockaddr *)&from, &len );
nbytes = read( g, buf, BUFSIZE );
start_time = time(0);
while( (n = read( g, buf, BUFSIZE )) > 0 ) {
nbytes += n;
}
elapsed_time = time(0) - start_time;
close(g);
close(s);
unlink( PORT_NAME );
printf("\nStarted receiving at %s", ctime( &start_time ));
printf( "Received %10.0f bytes in %d seconds over unix socket =>",
nbytes, (int)elapsed_time );
printf( " %4.0f Mbytes/second \n", nbytes / (0x100000 * elapsed_time) );
}
void client()
{
struct sockaddr_un sin;
int s;
char *buf;
time_t start_time, elapsed_time;
int i;
buf = malloc( BUFSIZE );
if( (s=socket( PF_UNIX, SOCK_STREAM, 0 )) < 0 ){
printf( "Bad socket\n");
return;
}
strcpy( sin.sun_path, PORT_NAME );
sin.sun_family = PF_UNIX;
if( connect( s, (struct sockaddr *)&sin, sizeof(sin)) < 0 ){
printf("Bad connect\n");
close(s);
return;
}
start_time = time(0);
for( i=0; i< NBLOCKS && write(s, buf, BUFSIZE) == BUFSIZE ; i++ ) {
memset( buf, 'A', BUFSIZE );
}
elapsed_time = time(0) - start_time;
close(s);
printf("\nStarted sending at %s", ctime( &start_time ));
printf( "Sent and memsetted %d blocks of size %d in %d seconds over unix socket =>",
i, BUFSIZE, (int)elapsed_time );
printf( " %d Mbytes/second \n", (NBLOCKS * BUFSIZE) / (0x100000 * (int)elapsed_time)
);
}
void memcpy_benchmark()
{
char *src, *dst;
time_t start_time, elapsed_time;
int i;
src = malloc ( BUFSIZE );
dst = malloc ( BUFSIZE );
start_time = time(0);
for( i=0; i< NBLOCKS; i++ ){
memcpy( dst, src, BUFSIZE );
memset( dst, 'A', BUFSIZE );
}
elapsed_time = time(0) - start_time;
printf( "Memcpy'ed and memsetted %d blocks of size %d in %d seconds =>",
NBLOCKS, BUFSIZE, (int)elapsed_time );
printf( " %d Mbytes/second\n", (NBLOCKS * BUFSIZE) / (0x100000 * (int)elapsed_time)
);
}
void socket_benchmark()
{
int status;
if ( fork() == 0 ) {
server();
} else {
sleep(1); /* Dirty, but ensures client runs after server is ready */
client();
}
wait(&status);
}
int main()
{
memcpy_benchmark();
socket_benchmark();
return 0;
}