Github user linwen commented on a diff in the pull request:
https://github.com/apache/incubator-hawq/pull/58#discussion_r43363199
--- Diff: depends/libyarn/src/libyarnclient/LibYarnClient.cpp ---
@@ -214,6 +214,60 @@ int LibYarnClient::createJob(string &jobName, string
&queue,string &jobId) {
}
}
+int LibYarnClient::forceKillJob(string &jobId) {
+
+#ifndef MOCKTEST
+ if ( keepRun ) {
+ keepRun=false;
+ void *thrc = NULL;
+ int rc = pthread_join(heartbeatThread, &thrc);
+ if ( rc != 0 ) {
+ LOG(INFO, "LibYarnClient::foreceKillJob, fail to join
heart-beat thread. "
+ "error code %d", rc);
+ return FR_FAILED;
+ }
+ }
+#endif
+
+ try{
+ if (jobId != clientJobId) {
+ throw std::invalid_argument("The jobId is wrong, please check
the jobId argument");
+ }
+
+ for (map<int,Container*>::iterator it = jobIdContainers.begin();
it != jobIdContainers. end(); it++) {
+ ostringstream key;
+ Container *container = it->second;
+ key << container->getNodeId().getHost() << ":" <<
container->getNodeId().getPort();
+ Token nmToken = nmTokenCache[key.str()];
+ ((ContainerManagement*)nmClient)->stopContainer((*container),
nmToken);
+ LOG(INFO,"LibYarnClient::foreceKillJob, container:%d are
stopped",container->getId().getId());
+ }
+
+ ((ApplicationClient*)
appClient)->forceKillApplication(clientAppId);
+ LOG(INFO, "LibYarnClient::foreceKillJob, forceKillApplication");
+
+ for (map<int,Container*>::iterator it = jobIdContainers.begin();
it != jobIdContainers.end(); it++) {
+ LOG(INFO,"LibYarnClient::foreceKillJob, container:%d in
jobIdContainers are delete",it->second->getId().getId());
+ delete it->second;
+ it->second = NULL;
+ }
+ jobIdContainers.clear();
+ activeFailContainerIds.clear();
+ return FR_SUCCEEDED;
+ } catch(std::exception& e){
+ stringstream errorMsg;
+ errorMsg << "LibYarnClient::forceKillJob, Catch the Exception:" <<
e.what();
+ setErrorMessage(errorMsg.str());
+ return FR_FAILED;
+ } catch (...) {
+ stringstream errorMsg;
+ errorMsg << "LibYarnClient::forceKillJob, catch unexpected
exception.";
+ setErrorMessage(errorMsg.str());
+ return FR_FAILED;
+ }
+}
+
--- End diff --
yes. in this case no container are returned to hawq. what I am thinking is,
in some other cases in future, forceKillApplication() is called when hawq works
normally, then containers should be returned.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---