> >Results are *very* interesting.
>
> I would be interested to see what you think of them.
I attach latest version of ctx.c with stack pollution and shared area
pollution.
My more or less obvious considerations follow:
- the cache has most dramatic effect
- context switch time has somewhat little impact (note that this have
theoretical greater effect on low end machine, but there to have many
components is not possible for other reasons)
- multi thread/multi process cannot share the stack and this bring to an
extra cache use (although this may be limited with a bit of care).
IMO all the efforts need to be directed to reduce cache footprint:
- to use shared library and to share all the shareable code is a
priority (one more argument for "one API")
- to share temporary areas
Fast resume:
1) Share everything
2) Keep it small
3) Buy more CPUs and use multiprocess model ;-)
About approaches: I think that the API need to be designed having in
mind both approaches. It have to be designed leaving the approach as an
implementation detail, I know that this may be more difficult but to
fail here means to prejudice near future possibilities.
--
Abramo Bagnara mailto:[EMAIL PROTECTED]
Opera Unica Phone: +39.546.656023
Via Emilia Interna, 140
48014 Castel Bolognese (RA) - Italy
ALSA project http://www.alsa-project.org
It sounds good!
/*
* Audio component approach cost meter
* Copyright (c) 2001 by Abramo Bagnara <[EMAIL PROTECTED]>
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Library General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
*/
#include <limits.h>
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <sys/shm.h>
#include <sys/wait.h>
#include <asm/msr.h>
#define KEY 0xabce
#define PERIOD_SIZE 64
#define RATE 48000
#define CHANNELS 26
#define BUFSIZE (PERIOD_SIZE*CHANNELS)
#define STACK_POLLUTION_SIZE (4*1024)
//#define STACK_POLLUTION_SIZE 0
#define SHARED_POLLUTION_SIZE (64*1024)
//#define SHARED_POLLUTION_SIZE 0
struct space {
float samples[BUFSIZE];
char pollution[SHARED_POLLUTION_SIZE];
};
volatile struct space *area;
void proc(unsigned int n)
{
unsigned int k;
float *samples = area->samples;
#if STACK_POLLUTION_SIZE > 0
{
char pollution[STACK_POLLUTION_SIZE];
memset(pollution, 0xaa, STACK_POLLUTION_SIZE);
}
#endif
#if SHARED_POLLUTION_SIZE > 0
memset(area->pollution, 0xaa, SHARED_POLLUTION_SIZE);
#endif
for (k = 0; k < BUFSIZE; k++)
samples[k]++;
}
unsigned long long singlethread(unsigned int workers, unsigned int periods)
{
unsigned long long begin, end;
unsigned int k;
float *samples;
unsigned int p;
area = malloc(sizeof(*area));
samples = area->samples;
for (k = 0; k < BUFSIZE; k++)
samples[k] = 0.0;
p = periods;
rdtscll(begin);
while (p-- > 0) {
int n;
for (n = 0; n < workers; ++n) {
proc(n);
}
}
rdtscll(end);
for (k = 0; k < BUFSIZE; k++)
assert(samples[k] == periods * workers);
free(area);
return end - begin;
}
struct thr {
pthread_t thread;
int in, out;
} threads[100];
void *thread(void *data)
{
char buf[1];
struct thr *t = data;
int err;
float *samples = area->samples;
while (1) {
unsigned int k;
err = read(t->in, buf, 1);
assert(err == 1);
if (!buf[0]) {
err = write(t->out, buf, 1);
assert(err == 1);
break;
}
#if STACK_POLLUTION_SIZE > 0
{
char pollution[STACK_POLLUTION_SIZE];
memset(pollution, 0xaa, STACK_POLLUTION_SIZE);
}
#endif
#if SHARED_POLLUTION_SIZE > 0
memset(area->pollution, 0xaa, SHARED_POLLUTION_SIZE);
#endif
for (k = 0; k < BUFSIZE; k++)
samples[k]++;
err = write(t->out, buf, 1);
assert(err == 1);
}
return 0;
}
unsigned long long multithread(unsigned int workers, unsigned int periods)
{
unsigned long long begin, end;
int err;
unsigned int k;
char buf[1] = { 1 };
float *samples;
unsigned int p;
int fds[workers + 1][2];
int in, out;
area = malloc(sizeof(*area));
samples = area->samples;
for (k = 0; k <= workers; ++k) {
err = pipe(fds[k]);
assert(err == 0);
}
out = fds[0][1];
in = fds[workers][0];
for (k = 0; k < workers; ++k) {
threads[k].in = fds[k][0];
threads[k].out = fds[k + 1][1];
err = pthread_create(&threads[k].thread, NULL, thread, &threads[k]);
assert(err == 0);
}
/* Ensure all is started */
err = write(out, buf, 1);
assert(err == 1);
err = read(in, buf, 1);
assert(err == 1);
for (k = 0; k < BUFSIZE; k++)
samples[k] = 0.0;
p = periods;
rdtscll(begin);
while (p-- > 0) {
err = write(out, buf, 1);
assert(err == 1);
err = read(in, buf, 1);
assert(err == 1);
}
rdtscll(end);
buf[0] = 0;
err = write(out, buf, 1);
assert(err == 1);
for (k = 0; k < workers; ++k) {
err = pthread_join(threads[k].thread, 0);
assert(err == 0);
}
for (k = 0; k <= workers; ++k) {
close(fds[k][0]);
close(fds[k][1]);
}
for (k = 0; k < BUFSIZE; k++)
assert(samples[k] == periods * workers);
free(area);
return end - begin;
}
struct pro {
pid_t pid;
int in, out;
} processes[100];
void process(struct pro *p)
{
char buf[1];
int err;
float *samples;
int shmid = shmget(KEY, sizeof(*area), 0666);
assert(shmid >= 0);
area = shmat(shmid, NULL, 0);
assert(area != (void *) -1);
samples = area->samples;
while (1) {
unsigned int k;
err = read(p->in, buf, 1);
assert(err == 1);
if (!buf[0]) {
err = write(p->out, buf, 1);
assert(err == 1);
break;
}
#if STACK_POLLUTION_SIZE > 0
{
char pollution[STACK_POLLUTION_SIZE];
memset(pollution, 0xaa, STACK_POLLUTION_SIZE);
}
#endif
#if SHARED_POLLUTION_SIZE > 0
memset(area->pollution, 0xaa, SHARED_POLLUTION_SIZE);
#endif
for (k = 0; k < BUFSIZE; k++)
samples[k]++;
err = write(p->out, buf, 1);
assert(err == 1);
}
exit(0);
}
unsigned long long multiprocess(unsigned int workers, unsigned int periods)
{
unsigned long long begin, end;
int err;
unsigned int k;
char buf[1] = { 1 };
float *samples;
int fds[workers + 1][2];
int in, out;
int shmid = shmget(KEY, sizeof(*area), IPC_CREAT | 0666);
unsigned int p;
assert(shmid >= 0);
area = shmat(shmid, NULL, 0);
assert(area != (void *) -1);
samples = area->samples;
for (k = 0; k <= workers; ++k) {
err = pipe(fds[k]);
assert(err == 0);
}
out = fds[0][1];
in = fds[workers][0];
for (k = 0; k < workers; ++k) {
processes[k].in = fds[k][0];
processes[k].out = fds[k + 1][1];
err = fork();
assert(err >= 0);
if (err == 0)
process(&processes[k]);
processes[k].pid = err;
}
/* Ensure all is started */
err = write(out, buf, 1);
assert(err == 1);
err = read(in, buf, 1);
assert(err == 1);
for (k = 0; k < BUFSIZE; k++)
samples[k] = 0.0;
p = periods;
rdtscll(begin);
while (p-- > 0) {
err = write(out, buf, 1);
assert(err == 1);
err = read(in, buf, 1);
assert(err == 1);
}
rdtscll(end);
buf[0] = 0;
err = write(out, buf, 1);
assert(err == 1);
for (k = 0; k < workers; ++k) {
err = waitpid(processes[k].pid, NULL, 0);
assert(err >= 0);
}
for (k = 0; k <= workers; ++k) {
close(fds[k][0]);
close(fds[k][1]);
}
for (k = 0; k < BUFSIZE; k++)
assert(samples[k] == periods * workers);
err = shmdt(area);
assert(err >= 0);
err = shmctl(shmid, IPC_RMID, 0);
assert(err >= 0);
return end - begin;
}
void setscheduler(void)
{
struct sched_param sched_param;
if (sched_getparam(0, &sched_param) < 0) {
printf("Scheduler getparam failed\n");
return;
}
sched_param.sched_priority = sched_get_priority_max(SCHED_RR);
if (!sched_setscheduler(0, SCHED_RR, &sched_param)) {
printf("Scheduler set to Round Robin with priority %i...\n",
sched_param.sched_priority);
return;
}
printf("Scheduler set to Round Robin with priority %i failed\n",
sched_param.sched_priority);
}
#define ST 0
#define MT 1
#define MP 2
/* Usage: ctx workers periods how mhz */
int main(int argc, char **argv)
{
unsigned long long t;
unsigned int workers = atoi(argv[1]);
unsigned int periods = atoi(argv[2]);
unsigned int how = atoi(argv[3]);
double mhz = atof(argv[4]);
double c;
int k;
setscheduler();
for (k = 0; k < 5; ++k) {
switch (how) {
case ST:
t = singlethread(workers, periods);
break;
case MT:
t = multithread(workers, periods);
break;
case MP:
t = multiprocess(workers, periods);
break;
default:
assert(0);
break;
}
c = (double) t / periods;
printf("Cycles per period: %f\n", c);
printf("%%CPU usage: %f\n", (100 * c / mhz) / (1000000.0 * PERIOD_SIZE /
RATE));
}
return 0;
}