> >Results are *very* interesting.
> 
> I would be interested to see what you think of them.

I attach latest version of ctx.c with stack pollution and shared area
pollution.

My more or less obvious considerations follow:
- the cache has most dramatic effect
- context switch time has somewhat little impact (note that this have
theoretical greater effect on low end machine, but there to have many
components is not possible for other reasons)
- multi thread/multi process cannot share the stack and this bring to an
extra cache use (although this may be limited with a bit of care).

IMO all the efforts need to be directed to reduce cache footprint:
- to use shared library and to share all the shareable code is a
priority (one more argument for "one API")
- to share temporary areas

Fast resume:
1) Share everything
2) Keep it small
3) Buy more CPUs and use multiprocess model ;-)

About approaches: I think that the API need to be designed having in
mind both approaches. It have to be designed leaving the approach as an
implementation detail, I know that this may be more difficult but to
fail here means to prejudice near future possibilities.

-- 
Abramo Bagnara                       mailto:[EMAIL PROTECTED]

Opera Unica                          Phone: +39.546.656023
Via Emilia Interna, 140
48014 Castel Bolognese (RA) - Italy

ALSA project               http://www.alsa-project.org
It sounds good!
/*
 *  Audio component approach cost meter
 *  Copyright (c) 2001 by Abramo Bagnara <[EMAIL PROTECTED]>
 *
 *   This library is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Library General Public License as
 *   published by the Free Software Foundation; either version 2 of
 *   the License, or (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Library General Public License for more details.
 *
 *   You should have received a copy of the GNU Library General Public
 *   License along with this library; if not, write to the Free Software
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

#include <limits.h>
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <sys/shm.h>
#include <sys/wait.h>
#include <asm/msr.h>

#define KEY 0xabce

#define PERIOD_SIZE 64
#define RATE 48000
#define CHANNELS 26

#define BUFSIZE (PERIOD_SIZE*CHANNELS)

#define STACK_POLLUTION_SIZE (4*1024)
//#define STACK_POLLUTION_SIZE 0

#define SHARED_POLLUTION_SIZE (64*1024)
//#define SHARED_POLLUTION_SIZE 0

struct space {
        float samples[BUFSIZE];
        char pollution[SHARED_POLLUTION_SIZE];
};

volatile struct space *area;

void proc(unsigned int n)
{
        unsigned int k;
        float *samples = area->samples;
#if STACK_POLLUTION_SIZE > 0
        {
                char pollution[STACK_POLLUTION_SIZE];
                memset(pollution, 0xaa, STACK_POLLUTION_SIZE);
        }
#endif
#if SHARED_POLLUTION_SIZE > 0
        memset(area->pollution, 0xaa, SHARED_POLLUTION_SIZE);
#endif
        for (k = 0; k < BUFSIZE; k++)
                samples[k]++;
}

unsigned long long singlethread(unsigned int workers, unsigned int periods)
{
        unsigned long long begin, end;
        unsigned int k;
        float *samples;
        unsigned int p;
        area = malloc(sizeof(*area));
        samples = area->samples;
        for (k = 0; k < BUFSIZE; k++)
                samples[k] = 0.0;

        p = periods;

        rdtscll(begin);
        while (p-- > 0) {
                int n;
                for (n = 0; n < workers; ++n) {
                        proc(n);
                }
        }
        rdtscll(end);

        for (k = 0; k < BUFSIZE; k++)
                assert(samples[k] == periods * workers);
        
        free(area);
        return end - begin;
}

struct thr {
        pthread_t thread;
        int in, out;
} threads[100];

void *thread(void *data)
{
        char buf[1];
        struct thr *t = data;
        int err;
        float *samples = area->samples;
        while (1) {
                unsigned int k;
                err = read(t->in, buf, 1);
                assert(err == 1);
                if (!buf[0]) {
                        err = write(t->out, buf, 1);
                        assert(err == 1);
                        break;
                }
#if STACK_POLLUTION_SIZE > 0
                {
                        char pollution[STACK_POLLUTION_SIZE];
                        memset(pollution, 0xaa, STACK_POLLUTION_SIZE);
                }
#endif
#if SHARED_POLLUTION_SIZE > 0
                memset(area->pollution, 0xaa, SHARED_POLLUTION_SIZE);
#endif
                for (k = 0; k < BUFSIZE; k++)
                        samples[k]++;
                err = write(t->out, buf, 1);
                assert(err == 1);
        }
        return 0;
}

unsigned long long multithread(unsigned int workers, unsigned int periods)
{
        unsigned long long begin, end;
        int err;
        unsigned int k;
        char buf[1] = { 1 };
        float *samples;
        unsigned int p;
        int fds[workers + 1][2];
        int in, out;
        area = malloc(sizeof(*area));

        samples = area->samples;

        for (k = 0; k <= workers; ++k) {
                err = pipe(fds[k]);
                assert(err == 0);
        }

        out = fds[0][1];
        in = fds[workers][0];
        for (k = 0; k < workers; ++k) {
                threads[k].in = fds[k][0];
                threads[k].out = fds[k + 1][1];
                err = pthread_create(&threads[k].thread, NULL, thread, &threads[k]);
                assert(err == 0);
        }

        /* Ensure all is started */
        err = write(out, buf, 1);
        assert(err == 1);
        err = read(in, buf, 1);
        assert(err == 1);

        for (k = 0; k < BUFSIZE; k++)
                samples[k] = 0.0;
        p = periods;

        rdtscll(begin);
        while (p-- > 0) {
                err = write(out, buf, 1);
                assert(err == 1);
                err = read(in, buf, 1);
                assert(err == 1);
        }
        rdtscll(end);

        buf[0] = 0;
        err = write(out, buf, 1);
        assert(err == 1);

        for (k = 0; k < workers; ++k) {
                err = pthread_join(threads[k].thread, 0);
                assert(err == 0);
        }
        for (k = 0; k <= workers; ++k) {
                close(fds[k][0]);
                close(fds[k][1]);
        }

        for (k = 0; k < BUFSIZE; k++)
                assert(samples[k] == periods * workers);

        free(area);
        return end - begin;
}

struct pro {
        pid_t pid;
        int in, out;
} processes[100];

void process(struct pro *p)
{
        char buf[1];
        int err;
        float *samples;
        int shmid = shmget(KEY, sizeof(*area), 0666);
        assert(shmid >= 0);
        area = shmat(shmid, NULL, 0);
        assert(area != (void *) -1);
        samples = area->samples;
        while (1) {
                unsigned int k;
                err = read(p->in, buf, 1);
                assert(err == 1);
                if (!buf[0]) {
                        err = write(p->out, buf, 1);
                        assert(err == 1);
                        break;
                }
#if STACK_POLLUTION_SIZE > 0
                {
                        char pollution[STACK_POLLUTION_SIZE];
                        memset(pollution, 0xaa, STACK_POLLUTION_SIZE);
                }
#endif
#if SHARED_POLLUTION_SIZE > 0
                memset(area->pollution, 0xaa, SHARED_POLLUTION_SIZE);
#endif
                for (k = 0; k < BUFSIZE; k++)
                        samples[k]++;
                err = write(p->out, buf, 1);
                assert(err == 1);
        
        }
        exit(0);
}

unsigned long long multiprocess(unsigned int workers, unsigned int periods)
{
        unsigned long long begin, end;
        int err;
        unsigned int k;
        char buf[1] = { 1 };
        float *samples;
        int fds[workers + 1][2];
        int in, out;
        int shmid = shmget(KEY, sizeof(*area), IPC_CREAT | 0666);
        unsigned int p;
        assert(shmid >= 0);
        area = shmat(shmid, NULL, 0);
        assert(area != (void *) -1);

        samples = area->samples;

        for (k = 0; k <= workers; ++k) {
                err = pipe(fds[k]);
                assert(err == 0);
        }

        out = fds[0][1];
        in = fds[workers][0];
        for (k = 0; k < workers; ++k) {
                processes[k].in = fds[k][0];
                processes[k].out = fds[k + 1][1];
                err = fork();
                assert(err >= 0);
                if (err == 0)
                        process(&processes[k]);
                processes[k].pid = err;
        }

        /* Ensure all is started */
        err = write(out, buf, 1);
        assert(err == 1);
        err = read(in, buf, 1);
        assert(err == 1);
        
        for (k = 0; k < BUFSIZE; k++)
                samples[k] = 0.0;
        p = periods;

        rdtscll(begin);
        while (p-- > 0) {
                err = write(out, buf, 1);
                assert(err == 1);
                err = read(in, buf, 1);
                assert(err == 1);
        }
        rdtscll(end);

        buf[0] = 0;
        err = write(out, buf, 1);
        assert(err == 1);

        for (k = 0; k < workers; ++k) {
                err = waitpid(processes[k].pid, NULL, 0);
                assert(err >= 0);
        }
        for (k = 0; k <= workers; ++k) {
                close(fds[k][0]);
                close(fds[k][1]);
        }

        for (k = 0; k < BUFSIZE; k++)
                assert(samples[k] == periods * workers);

        err = shmdt(area);
        assert(err >= 0);
        err = shmctl(shmid, IPC_RMID, 0);
        assert(err >= 0);

        return end - begin;
}

void setscheduler(void)
{
        struct sched_param sched_param;

        if (sched_getparam(0, &sched_param) < 0) {
                printf("Scheduler getparam failed\n");
                return;
        }
        sched_param.sched_priority = sched_get_priority_max(SCHED_RR);
        if (!sched_setscheduler(0, SCHED_RR, &sched_param)) {
                printf("Scheduler set to Round Robin with priority %i...\n", 
sched_param.sched_priority);
                return;
        }
        printf("Scheduler set to Round Robin with priority %i failed\n", 
sched_param.sched_priority);
}


#define ST 0
#define MT 1
#define MP 2

/* Usage: ctx workers periods how mhz */

int main(int argc, char **argv)
{
        unsigned long long t;
        unsigned int workers = atoi(argv[1]);
        unsigned int periods = atoi(argv[2]);
        unsigned int how = atoi(argv[3]);
        double mhz = atof(argv[4]);
        double c;
        int k;

        setscheduler();

        for (k = 0; k < 5; ++k) {
        switch (how) {
        case ST:
                t = singlethread(workers, periods);
                break;
        case MT:
                t = multithread(workers, periods);
                break;
        case MP:
                t = multiprocess(workers, periods);
                break;
        default:
                assert(0);
                break;
        }

        c = (double) t / periods;
        printf("Cycles per period: %f\n", c);
        printf("%%CPU usage: %f\n", (100 * c / mhz) / (1000000.0 * PERIOD_SIZE / 
RATE));
        }
        return 0;
}

Reply via email to