On Sun, Apr 03, 2011 at 08:10:25PM +0200, Jakub Jelinek wrote: > On Sun, Apr 03, 2011 at 07:27:12PM +0900, Sho Nakatani wrote: > > Then, I'll compare the trees created by gcc and icc, and point out > > that the implementation of OpenMP Task uses Lazy Task Creation while > > gcc does not. > > Depends on what you mean by lazy task creation, gcc schedules > tasks lazily if they aren't if (0), some data structure if created > for them when encountering #pragma omp task directive, but I guess > any implementation will do something like that. > > What your testcase shows is not whether tasks are created lazily or not, but > how good/poor #pragma omp taskwait implementation is. And, for your testcase > libgomp/task.c (GOMP_taskwait) definitely could be improved. Currently it > only > tries to schedule in children that will be awaited by the current tasks and if > there are no such children, goes to sleep, waiting for them to complete. > Scheduling in random unrelated tasks is problematic, because the unrelated > task might take too long to complete and delay the taskwait for way too long > (note, gcc doesn't have untied tasks, all tasks are tied once they are > scheduled > onto some particular tasks - setcontext/swapcontext is quite fragile thing to > do). > But it is true it could very well schedule tasks that are taskwaited by tasks > taskwaited by current task, and transitively further. Plus, be able to > temporarily > awake such a sleeping thread if there are tasks it can transitively taskwait > for, as if those don't complete, the current taskwait won't return.
Just FYI, I've tried to implement something like that as a quick hack, but it unfortunately slowed things down, at least on the attached fib testcase with arguments 40 25. Guess partly the problem is that after a task waiting in taskwait_sem is awaken it now needs to take task_lock lock to unqueue itself from the new in_taskwait_list, and partly because the search for grand-grand children etc. is more expensive, the FIFO isn't a good data structure for that. Jakub
--- libgomp/team.c.jj 2011-04-04 18:14:58.000000000 +0200 +++ libgomp/team.c 2011-04-04 20:00:45.000000000 +0200 @@ -166,6 +166,7 @@ gomp_new_team (unsigned nthreads) gomp_mutex_init (&team->task_lock); team->task_queue = NULL; + team->in_taskwait_list = NULL; team->task_count = 0; team->task_running_count = 0; --- libgomp/libgomp.h.jj 2011-04-04 18:19:46.000000000 +0200 +++ libgomp/libgomp.h 2011-04-04 20:00:45.000000000 +0200 @@ -311,6 +311,7 @@ struct gomp_team gomp_mutex_t task_lock; struct gomp_task *task_queue; + struct gomp_task *in_taskwait_list; int task_count; int task_running_count; --- libgomp/task.c.jj 2009-04-14 16:33:07.000000000 +0200 +++ libgomp/task.c 2011-04-04 20:02:18.000000000 +0200 @@ -176,6 +176,26 @@ GOMP_task (void (*fn) (void *), void *da gomp_team_barrier_set_task_pending (&team->barrier); do_wake = team->task_running_count + !parent->in_tied_task < team->nthreads; + if (!do_wake && team->in_taskwait_list) + { + struct gomp_task *t = team->in_taskwait_list; + do + { + struct gomp_task *p = parent; + int i; + + for (i = 0; i < 10 && p; i++, p = p->parent) + if (p == t || p->kind == GOMP_TASK_IMPLICIT) + break; + if (p == t) + { + gomp_sem_post (&t->taskwait_sem); + break; + } + t = t->next_queue; + } + while (t != team->in_taskwait_list); + } gomp_mutex_unlock (&team->task_lock); if (do_wake) gomp_team_barrier_wake (&team->barrier, 1); @@ -301,10 +321,35 @@ GOMP_taskwait (void) } return; } - if (task->children->kind == GOMP_TASK_WAITING) + child_task = task->children; + if (child_task->kind != GOMP_TASK_WAITING && team->task_queue) + { + /* Try harder, look for grandchildren etc. */ + for (child_task = team->task_queue;; + child_task = child_task->next_queue) + { + if (child_task->kind == GOMP_TASK_WAITING) + { + struct gomp_task *p = child_task->parent; + int i; + + for (i = 0; i < 10 && p; i++, p = p->parent) + if (p == task || p->kind == GOMP_TASK_IMPLICIT) + break; + if (p == task) + break; + } + if (child_task->next_queue == team->task_queue) + { + child_task = task->children; + break; + } + } + } + if (child_task->kind == GOMP_TASK_WAITING) { - child_task = task->children; - task->children = child_task->next_child; + if (child_task->parent->children == child_task) + child_task->parent->children = child_task->next_child; child_task->prev_queue->next_queue = child_task->next_queue; child_task->next_queue->prev_queue = child_task->prev_queue; if (team->task_queue == child_task) @@ -320,9 +365,25 @@ GOMP_taskwait (void) gomp_team_barrier_clear_task_pending (&team->barrier); } else - /* All tasks we are waiting for are already running - in other threads. Wait for them. */ - task->in_taskwait = true; + { + child_task = NULL; + /* All tasks we are waiting for are already running + in other threads. Wait for them. */ + task->in_taskwait = true; + if (team->in_taskwait_list) + { + task->next_queue = team->in_taskwait_list; + task->prev_queue = team->in_taskwait_list->prev_queue; + task->next_queue->prev_queue = task; + task->prev_queue->next_queue = task; + } + else + { + task->next_queue = task; + task->prev_queue = task; + team->in_taskwait_list = task; + } + } gomp_mutex_unlock (&team->task_lock); if (to_free) { @@ -337,22 +398,23 @@ GOMP_taskwait (void) thr->task = task; } else - { - gomp_sem_wait (&task->taskwait_sem); - task->in_taskwait = false; - return; - } + gomp_sem_wait (&task->taskwait_sem); gomp_mutex_lock (&team->task_lock); if (child_task) { + struct gomp_task *parent = child_task->parent; child_task->prev_child->next_child = child_task->next_child; child_task->next_child->prev_child = child_task->prev_child; - if (task->children == child_task) + if (parent->children == child_task) { if (child_task->next_child != child_task) - task->children = child_task->next_child; + parent->children = child_task->next_child; else - task->children = NULL; + { + parent->children = NULL; + if (parent->in_taskwait) + gomp_sem_post (&parent->taskwait_sem); + } } gomp_clear_parent (child_task->children); to_free = child_task; @@ -360,5 +422,14 @@ GOMP_taskwait (void) team->task_count--; team->task_running_count--; } + else + { + task->in_taskwait = false; + task->prev_queue->next_queue = task->next_queue; + task->next_queue->prev_queue = task->prev_queue; + if (team->in_taskwait_list == task) + team->in_taskwait_list + = task->next_queue == task ? NULL : task->next_queue; + } } }
#include <omp.h> #include <stdio.h> long fib (long n, long l) { long i, j; if (n < 2) return n; else if (l) { #pragma omp task shared(i) firstprivate(n, l) i = fib (n - 1, l - 1); #pragma omp task shared(j) firstprivate(n, l) j = fib (n - 2, l - 1); #pragma omp taskwait } else { i = fib (n - 1, 0); j = fib (n - 2, 0); } return i + j; } int main (int argc, char *argv[]) { long n = argv[1] ? atoi (argv[1]) : 50; long l = argv[1] && argv[2] ? atoi (argv[2]) : 10; double t1, t2; long result; omp_set_dynamic (0); #pragma omp parallel shared(n) #pragma omp single { t1 = omp_get_wtime (); result = fib (n, l); t2 = omp_get_wtime (); printf ("fib (%ld, %ld) %5f %ld\n", n, l, t2 - t1, result); } return 0; }