Hello @ll.

I'm having a problem when i try to access to data->procs->addr[vpid] when
the vpid belong to a recently killed process. I'm sending here a piece of my
code. The problem is that the execution is always entering in the last if
clause maybe because the information of the dead process is no longer
available, or maybe i'm doing something wrong when accessing.

Any help will be apreciated.

                                        *command =
ORTE_DAEMON_REPORT_JOB_INFO_CMD;*
*                                        buffer = OBJ_NEW(opal_buffer_t);*
*                                        if (ORTE_SUCCESS != (rc =
opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {*
*                                            ORTE_ERROR_LOG(rc);*
*                                            OBJ_RELEASE(buffer);*
*                                            return rc;*
*                                        }*
*                                        if (ORTE_SUCCESS != (rc =
opal_dss.pack(buffer, &proc->jobid, 1, ORTE_JOBID))) {*
*                                            ORTE_ERROR_LOG(rc);*
*                                            OBJ_RELEASE(buffer);*
*                                            return rc;*
*                                        }*
*                                        /* do the send */*
*                                        if (0 > (rc =
orte_rml.send_buffer(ORTE_PROC_MY_HNP, buffer, ORTE_RML_TAG_DAEMON, 0))) {*
*                                            ORTE_ERROR_LOG(rc);*
*                                            OBJ_RELEASE(buffer);*
*                                            return rc;*
*                                        }*
*                                        OBJ_RELEASE(buffer);*
*                                        buffer = OBJ_NEW(opal_buffer_t);*
*
*
*
 orte_rml.recv_buffer(ORTE_NAME_WILDCARD, buffer, ORTE_RML_TAG_TOOL, 0);*
*                                        *
*                                        opal_dss.unpack(buffer, &response,
&n, OPAL_INT32);*
*
*
*                                        if(response==0){*
*                                            OPAL_OUTPUT_VERBOSE((5,
orte_errmgr_base.output,"NO ESCRIBÍ AL HNP\n "));*
*                                        }else{*
*                                            opal_dss.unpack(buffer, &jdata,
&n, ORTE_JOB);*
*                                        }*
*
*
*                                        procs =
(orte_proc_t**)jdata->procs->addr;*
*                                        if(procs==NULL){*
*                                            OPAL_OUTPUT_VERBOSE((5,
orte_errmgr_base.output, "grave: procs==null"));*
*                                        }*
*
*
*                                        command =
ORTE_DAEMON_UPDATE_STATE_CMD;*
*
*
*                                        OBJ_RELEASE(buffer);*
*                                        buffer = OBJ_NEW(opal_buffer_t);*
*                                        *
*                                        if (ORTE_SUCCESS != (rc =
opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {*
*                                            ORTE_ERROR_LOG(rc);*
*                                            OBJ_RELEASE(buffer);*
*                                            goto CLEANUP;*
*                                        }*
*
*
*                                        orte_proc_state_t state =
ORTE_PROC_STATE_FAULT;*
*                                        /* Pack the faulty vpid */*
*                                        if (ORTE_SUCCESS != (rc =
opal_dss.pack(buffer, &proc, 1, ORTE_NAME))) {*
*                                            ORTE_ERROR_LOG(rc);*
*                                            goto CLEANUP;*
*                                        }*
*
*
*                                        /* Pack the state */*
*                                        if (ORTE_SUCCESS != (rc =
opal_dss.pack(buffer, &state, 1, OPAL_UINT16))) {*
*                                            ORTE_ERROR_LOG(rc);*
*                                            goto CLEANUP;*
*                                        }*
*
*
*                                        if (NULL == procs[proc->vpid] ||
NULL == procs[proc->vpid]->node) {*
*                                            OPAL_OUTPUT_VERBOSE((5,
orte_errmgr_base.output, "PROBLEM: procs[proc.vpid]==null"));*
*                                        }*
*
*
Thanks a lot.

Hugo Meyer

Reply via email to