Hi Jeff, I have a minimal MPI program to test the TM interface and strangely I seem to get errors during tm_init call. Could you explain what could be wrong? Have you seen anything similar. Here is the MPI code:
#include <stdio.h> #include <tm.h> #include <mpi.h> extern char **environ; void do_check(int val, char *msg) { if (TM_SUCCESS != val) { printf("ret is %d instead of %d: %s\n", val, TM_SUCCESS, msg); exit(1); } } main (int argc, char *argv[]) { int size, rank, ret, err, numnodes, local_err; MPI_Status status; char **input; input[0] = "/bin/echo"; input[1] = "Hello There"; struct tm_roots task_root; tm_node_id *nodelist; tm_event_t event; tm_task_id task_id; char hostname[64]; char buf[]="11000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"; gethostname(hostname, 64); ret = MPI_Init (&argc, &argv); if (ret) { printf ("Error: %d\n", ret); return (1); } ret = MPI_Comm_size (MPI_COMM_WORLD, &size); if (ret) { printf("Error: %d\n", ret); return (1); } ret = MPI_Comm_rank (MPI_COMM_WORLD, &rank); if (ret) { printf("Error: %d\n", ret); return (1); } printf ("First Hostname: %s node %d out of %d\n", hostname, rank, size); if (size%2 && rank==size-1) printf("Sitting out\n"); else { if (rank%2==0) MPI_Send(buf, strlen(buf), MPI_BYTE, rank+1, 11, MPI_COMM_WORLD); else MPI_Recv(buf, sizeof(buf), MPI_BYTE, rank-1, 11, MPI_COMM_WORLD, &status); } printf ("Second Hostname: %s node %d out of %d\n", hostname, rank, size); if (rank == 1) { ret = tm_init(NULL, &task_root); do_check(ret, "tm_init failed"); printf ("Special Hostname: %s node %d out of %d\n", hostname, rank, size); task_id = 0xabcdef; event = 0xabcdef; printf("%s\t%s", input[0], input[1]); tm_finalize(); } MPI_Finalize (); return (0); } The error I am getting is: First Hostname: wins05 node 0 out of 4 First Hostname: wins03 node 1 out of 4 First Hostname: wins02 node 2 out of 4 First Hostname: wins01 node 3 out of 4 Second Hostname: wins05 node 0 out of 4 Second Hostname: wins02 node 2 out of 4 Second Hostname: wins03 node 1 out of 4 Second Hostname: wins01 node 3 out of 4 tm_poll: protocol number dis error 11 ret is 17002 instead of 0: tm_init failed 3 processes killed (possibly by Open MPI) I am using Torque-2.0.0p7 and Open MPI-1.0.1. Thanks, Prakash