Github user jiny2 commented on a diff in the pull request:
https://github.com/apache/incubator-hawq/pull/58#discussion_r43362489
--- Diff: depends/libyarn/src/libyarnclient/LibYarnClient.cpp ---
@@ -214,6 +214,60 @@ int LibYarnClient::createJob(string &jobName, string
&queue,string &jobId) {
}
}
+int LibYarnClient::forceKillJob(string &jobId) {
+
+#ifndef MOCKTEST
+ if ( keepRun ) {
+ keepRun=false;
+ void *thrc = NULL;
+ int rc = pthread_join(heartbeatThread, &thrc);
+ if ( rc != 0 ) {
+ LOG(INFO, "LibYarnClient::foreceKillJob, fail to join
heart-beat thread. "
+ "error code %d", rc);
+ return FR_FAILED;
+ }
+ }
+#endif
+
+ try{
+ if (jobId != clientJobId) {
+ throw std::invalid_argument("The jobId is wrong, please check
the jobId argument");
+ }
+
+ for (map<int,Container*>::iterator it = jobIdContainers.begin();
it != jobIdContainers. end(); it++) {
+ ostringstream key;
+ Container *container = it->second;
+ key << container->getNodeId().getHost() << ":" <<
container->getNodeId().getPort();
+ Token nmToken = nmTokenCache[key.str()];
+ ((ContainerManagement*)nmClient)->stopContainer((*container),
nmToken);
+ LOG(INFO,"LibYarnClient::foreceKillJob, container:%d are
stopped",container->getId().getId());
+ }
+
+ ((ApplicationClient*)
appClient)->forceKillApplication(clientAppId);
+ LOG(INFO, "LibYarnClient::foreceKillJob, forceKillApplication");
+
+ for (map<int,Container*>::iterator it = jobIdContainers.begin();
it != jobIdContainers.end(); it++) {
+ LOG(INFO,"LibYarnClient::foreceKillJob, container:%d in
jobIdContainers are delete",it->second->getId().getId());
+ delete it->second;
+ it->second = NULL;
+ }
+ jobIdContainers.clear();
+ activeFailContainerIds.clear();
+ return FR_SUCCEEDED;
+ } catch(std::exception& e){
+ stringstream errorMsg;
+ errorMsg << "LibYarnClient::forceKillJob, Catch the Exception:" <<
e.what();
+ setErrorMessage(errorMsg.str());
+ return FR_FAILED;
+ } catch (...) {
+ stringstream errorMsg;
+ errorMsg << "LibYarnClient::forceKillJob, catch unexpected
exception.";
+ setErrorMessage(errorMsg.str());
+ return FR_FAILED;
+ }
+}
+
--- End diff --
Curious if we have to stop container one by one? In case we have exception
raised when creating application, we can not expect yarn to automatically stop
all the containers belonging to this app?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---