Steve Harris wrote:
> 
> Ooops think I forgot to attach it.
> 

I attach what I've obtained from a Pentium III 800 Mhz in single user
mode.

I attach also the tools used (there was a bug in Steve script and value
used was the greater).

Multi process approach cost always less than 2% per worker.

-- 
Abramo Bagnara                       mailto:[EMAIL PROTECTED]

Opera Unica                          Phone: +39.546.656023
Via Emilia Interna, 140
48014 Castel Bolognese (RA) - Italy

ALSA project               http://www.alsa-project.org
It sounds good!

PNG image

/*
 *  Audio component approach cost meter
 *  Copyright (c) 2001 by Abramo Bagnara <[EMAIL PROTECTED]>
 *
 *   This library is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU Library General Public License as
 *   published by the Free Software Foundation; either version 2 of
 *   the License, or (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU Library General Public License for more details.
 *
 *   You should have received a copy of the GNU Library General Public
 *   License along with this library; if not, write to the Free Software
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 */

#include <limits.h>
#include <pthread.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <assert.h>
#include <sys/shm.h>
#include <sys/wait.h>
#include <asm/msr.h>

#define MAX_WORKERS 16

#define KEY 0xabce

#define PERIOD_SIZE 64
#define RATE 48000
#define CHANNELS 26

#define BUFSIZE (PERIOD_SIZE*CHANNELS)

size_t STACK_FOOTPRINT = 4 * 1024;
size_t WORKER_FOOTPRINT = 32 * 1024;
size_t SHARED_FOOTPRINT = 32 * 1024;

struct space {
        float samples[BUFSIZE];
        char shared_area[0];
};

char *worker_area[MAX_WORKERS];

struct space *area;

void proc(unsigned int n)
{
        unsigned int k;
        float *samples = area->samples;
        {
                char stack_area[STACK_FOOTPRINT];
                memset(stack_area, 0xaa, STACK_FOOTPRINT);
        }
        memset(area->shared_area, 0xaa, SHARED_FOOTPRINT);
        memset(worker_area[n], 0xaa, WORKER_FOOTPRINT);
        for (k = 0; k < BUFSIZE; k++)
                samples[k]++;
}

unsigned long long singlethread(unsigned int workers, unsigned int periods)
{
        unsigned long long begin, end;
        unsigned int k;
        float *samples;
        unsigned int p;
        area = malloc(sizeof(*area) + SHARED_FOOTPRINT);
        samples = area->samples;
        for (k = 0; k < BUFSIZE; k++)
                samples[k] = 0.0;

        for (k = 0; k < workers; k++)
                worker_area[k] = malloc(WORKER_FOOTPRINT);

        p = periods;

        rdtscll(begin);
        while (p-- > 0) {
                int n;
                for (n = 0; n < workers; ++n) {
                        proc(n);
                }
        }
        rdtscll(end);

        for (k = 0; k < BUFSIZE; k++)
                assert(samples[k] == periods * workers);
        
        free(area);
        return end - begin;
}

struct thr {
        int worker;
        pthread_t thread;
        int in, out;
} threads[MAX_WORKERS];

void *thread(void *data)
{
        char buf[1];
        struct thr *t = data;
        int err;
        float *samples = area->samples;
        while (1) {
                unsigned int k;
                err = read(t->in, buf, 1);
                assert(err == 1);
                if (!buf[0]) {
                        err = write(t->out, buf, 1);
                        assert(err == 1);
                        break;
                }
                {
                        char stack_area[STACK_FOOTPRINT];
                        memset(stack_area, 0xaa, STACK_FOOTPRINT);
                }
                memset(area->shared_area, 0xaa, SHARED_FOOTPRINT);
                memset(worker_area[t->worker], 0xaa, WORKER_FOOTPRINT);
                for (k = 0; k < BUFSIZE; k++)
                        samples[k]++;
                err = write(t->out, buf, 1);
                assert(err == 1);
        }
        return 0;
}

unsigned long long multithread(unsigned int workers, unsigned int periods)
{
        unsigned long long begin, end;
        int err;
        unsigned int k;
        char buf[1] = { 1 };
        float *samples;
        unsigned int p;
        int fds[workers + 1][2];
        int in, out;
        area = malloc(sizeof(*area) + SHARED_FOOTPRINT);

        samples = area->samples;

        for (k = 0; k <= workers; ++k) {
                err = pipe(fds[k]);
                assert(err == 0);
                worker_area[k] = malloc(WORKER_FOOTPRINT);
        }

        out = fds[0][1];
        in = fds[workers][0];
        for (k = 0; k < workers; ++k) {
                threads[k].worker = k;
                threads[k].in = fds[k][0];
                threads[k].out = fds[k + 1][1];
                err = pthread_create(&threads[k].thread, NULL, thread, &threads[k]);
                assert(err == 0);
        }

        /* Ensure all is started */
        err = write(out, buf, 1);
        assert(err == 1);
        err = read(in, buf, 1);
        assert(err == 1);

        for (k = 0; k < BUFSIZE; k++)
                samples[k] = 0.0;
        p = periods;

        rdtscll(begin);
        while (p-- > 0) {
                err = write(out, buf, 1);
                assert(err == 1);
                err = read(in, buf, 1);
                assert(err == 1);
        }
        rdtscll(end);

        buf[0] = 0;
        err = write(out, buf, 1);
        assert(err == 1);

        for (k = 0; k < workers; ++k) {
                err = pthread_join(threads[k].thread, 0);
                assert(err == 0);
        }
        for (k = 0; k <= workers; ++k) {
                close(fds[k][0]);
                close(fds[k][1]);
        }

        for (k = 0; k < BUFSIZE; k++)
                assert(samples[k] == periods * workers);

        free(area);
        return end - begin;
}

struct pro {
        int worker;
        pid_t pid;
        int in, out;
} processes[MAX_WORKERS];

void process(struct pro *p)
{
        char buf[1];
        int err;
        float *samples;
        int shmid = shmget(KEY, sizeof(*area) + SHARED_FOOTPRINT, 0666);
        assert(shmid >= 0);
        area = shmat(shmid, NULL, 0);
        assert(area != (void *) -1);
        samples = area->samples;
        while (1) {
                unsigned int k;
                err = read(p->in, buf, 1);
                assert(err == 1);
                if (!buf[0]) {
                        err = write(p->out, buf, 1);
                        assert(err == 1);
                        break;
                }
                {
                        char stack_area[STACK_FOOTPRINT];
                        memset(stack_area, 0xaa, STACK_FOOTPRINT);
                }
                memset(area->shared_area, 0xaa, SHARED_FOOTPRINT);
                memset(worker_area[0], 0xaa, WORKER_FOOTPRINT);
                for (k = 0; k < BUFSIZE; k++)
                        samples[k]++;
                err = write(p->out, buf, 1);
                assert(err == 1);
        
        }
        exit(0);
}

unsigned long long multiprocess(unsigned int workers, unsigned int periods)
{
        unsigned long long begin, end;
        int err;
        unsigned int k;
        char buf[1] = { 1 };
        float *samples;
        int fds[workers + 1][2];
        int in, out;
        int shmid = shmget(KEY, sizeof(*area) + SHARED_FOOTPRINT, IPC_CREAT | 0666);
        unsigned int p;
        assert(shmid >= 0);
        area = shmat(shmid, NULL, 0);
        assert(area != (void *) -1);

        samples = area->samples;

        worker_area[0] = malloc(WORKER_FOOTPRINT);

        for (k = 0; k <= workers; ++k) {
                err = pipe(fds[k]);
                assert(err == 0);
        }

        out = fds[0][1];
        in = fds[workers][0];
        for (k = 0; k < workers; ++k) {
                processes[k].worker = k;
                processes[k].in = fds[k][0];
                processes[k].out = fds[k + 1][1];
                err = fork();
                assert(err >= 0);
                if (err == 0)
                        process(&processes[k]);
                processes[k].pid = err;
        }

        /* Ensure all is started */
        err = write(out, buf, 1);
        assert(err == 1);
        err = read(in, buf, 1);
        assert(err == 1);
        
        for (k = 0; k < BUFSIZE; k++)
                samples[k] = 0.0;
        p = periods;

        rdtscll(begin);
        while (p-- > 0) {
                err = write(out, buf, 1);
                assert(err == 1);
                err = read(in, buf, 1);
                assert(err == 1);
        }
        rdtscll(end);

        buf[0] = 0;
        err = write(out, buf, 1);
        assert(err == 1);

        for (k = 0; k < workers; ++k) {
                err = waitpid(processes[k].pid, NULL, 0);
                assert(err >= 0);
        }
        for (k = 0; k <= workers; ++k) {
                close(fds[k][0]);
                close(fds[k][1]);
        }

        for (k = 0; k < BUFSIZE; k++)
                assert(samples[k] == periods * workers);

        err = shmdt(area);
        assert(err >= 0);
        err = shmctl(shmid, IPC_RMID, 0);
        assert(err >= 0);

        return end - begin;
}

void setscheduler(void)
{
        struct sched_param sched_param;

        if (sched_getparam(0, &sched_param) < 0) {
                printf("Scheduler getparam failed\n");
                return;
        }
        sched_param.sched_priority = sched_get_priority_max(SCHED_RR);
        if (!sched_setscheduler(0, SCHED_RR, &sched_param)) {
                printf("Scheduler set to Round Robin with priority %i...\n", 
sched_param.sched_priority);
                return;
        }
        printf("Scheduler set to Round Robin with priority %i failed\n", 
sched_param.sched_priority);
}


#define ST 0
#define MT 1
#define MP 2

/* Usage: ctx how workers periods mhz shared worker stack */

int main(int argc, char **argv)
{
        unsigned long long t;
        double c;
        int k;
        char *mode;
        unsigned int how = atoi(argv[1]);
        unsigned int workers = atoi(argv[2]);
        unsigned int periods = atoi(argv[3]);
        double mhz = atof(argv[4]);
        if (argc > 5)
                SHARED_FOOTPRINT = atoi(argv[5]) * 1024;
        if (argc > 6)
                WORKER_FOOTPRINT = atoi(argv[6]) * 1024;
        if (argc > 7)
                STACK_FOOTPRINT = atoi(argv[7]) * 1024;
        
        setscheduler();

        for (k = 0; k < 5; ++k) {
        switch (how) {
        case ST:
                mode = "Single thread";
                t = singlethread(workers, periods);
                break;
        case MT:
                mode = "Multi thread";
                t = multithread(workers, periods);
                break;
        case MP:
                mode = "Multi process";
                t = multiprocess(workers, periods);
                break;
        default:
                assert(0);
                break;
        }

        c = (double) t / periods;
        printf("%s: workers=%d periods=%d shared=%d worker=%d stack=%d\n",  mode, 
workers, periods, SHARED_FOOTPRINT, WORKER_FOOTPRINT, STACK_FOOTPRINT);
        printf("Cycles per period: %f\n", c);
        printf("%%CPU usage: %f\n", (100 * c / mhz) / (1000000.0 * PERIOD_SIZE / 
RATE));
        }
        return 0;
}

worker-diff-graph.pl

Reply via email to