Hello @ll. I'm having a problem when i try to access to data->procs->addr[vpid] when the vpid belong to a recently killed process. I'm sending here a piece of my code. The problem is that the execution is always entering in the last if clause maybe because the information of the dead process is no longer available, or maybe i'm doing something wrong when accessing.
Any help will be apreciated. *command = ORTE_DAEMON_REPORT_JOB_INFO_CMD;* * buffer = OBJ_NEW(opal_buffer_t);* * if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {* * ORTE_ERROR_LOG(rc);* * OBJ_RELEASE(buffer);* * return rc;* * }* * if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &proc->jobid, 1, ORTE_JOBID))) {* * ORTE_ERROR_LOG(rc);* * OBJ_RELEASE(buffer);* * return rc;* * }* * /* do the send */* * if (0 > (rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, buffer, ORTE_RML_TAG_DAEMON, 0))) {* * ORTE_ERROR_LOG(rc);* * OBJ_RELEASE(buffer);* * return rc;* * }* * OBJ_RELEASE(buffer);* * buffer = OBJ_NEW(opal_buffer_t);* * * * orte_rml.recv_buffer(ORTE_NAME_WILDCARD, buffer, ORTE_RML_TAG_TOOL, 0);* * * * opal_dss.unpack(buffer, &response, &n, OPAL_INT32);* * * * if(response==0){* * OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output,"NO ESCRIBĂ AL HNP\n "));* * }else{* * opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB);* * }* * * * procs = (orte_proc_t**)jdata->procs->addr;* * if(procs==NULL){* * OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, "grave: procs==null"));* * }* * * * command = ORTE_DAEMON_UPDATE_STATE_CMD;* * * * OBJ_RELEASE(buffer);* * buffer = OBJ_NEW(opal_buffer_t);* * * * if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &command, 1, ORTE_DAEMON_CMD))) {* * ORTE_ERROR_LOG(rc);* * OBJ_RELEASE(buffer);* * goto CLEANUP;* * }* * * * orte_proc_state_t state = ORTE_PROC_STATE_FAULT;* * /* Pack the faulty vpid */* * if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &proc, 1, ORTE_NAME))) {* * ORTE_ERROR_LOG(rc);* * goto CLEANUP;* * }* * * * /* Pack the state */* * if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &state, 1, OPAL_UINT16))) {* * ORTE_ERROR_LOG(rc);* * goto CLEANUP;* * }* * * * if (NULL == procs[proc->vpid] || NULL == procs[proc->vpid]->node) {* * OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base.output, "PROBLEM: procs[proc.vpid]==null"));* * }* * * Thanks a lot. Hugo Meyer