Repository: trafodion Updated Branches: refs/heads/master 45727da59 -> f38ade8de
[TRAFODION-3180] At times establishing a JDBC/ODBC connection takes observably long time Analysis revealed that the mxosrvr process in connecting state was attempting to open the ssmp process on the node for a non-unique query as part of establishing connection. The ssmp process has many ports in CLOSE_WAIT state. It looks like the client happens to hit on a port that is in CLOSE_WAIT state. The port transitions to ESTABLISHED state after some time. Hence the connection was taking a longer time. The mxssmp process keeps the port in CLOSE_WAIT because the socket wasn't closed on the server side when client exits gracefully as well as abruptly. The seabed layer in Trafodion doesn't handle more than one open to a process in a correct way. I have changed the IPC infrastructure in SQL to ensure that the ssmp process is opened only once in mxosrvr process. The API msg_get_phandle opens the process with the given name to obtain the handle. This API is now replaced with XFILENAME_TO_PROCESSHANDLE_ Project: http://git-wip-us.apache.org/repos/asf/trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/trafodion/commit/02b9a0ea Tree: http://git-wip-us.apache.org/repos/asf/trafodion/tree/02b9a0ea Diff: http://git-wip-us.apache.org/repos/asf/trafodion/diff/02b9a0ea Branch: refs/heads/master Commit: 02b9a0eac55840325a869102c5bbf28aaa516a3a Parents: 0d30493 Author: selvaganesang <selva.govindara...@esgyn.com> Authored: Tue Aug 7 18:03:04 2018 +0000 Committer: selvaganesang <selva.govindara...@esgyn.com> Committed: Tue Aug 7 18:03:04 2018 +0000 ---------------------------------------------------------------------- core/sql/common/ComRtUtils.cpp | 11 ++------ core/sql/common/ComRtUtils.h | 2 +- core/sql/common/IpcGuardian.cpp | 29 ++++++-------------- .../main/java/org/trafodion/sql/HDFSClient.java | 5 ++-- .../main/java/org/trafodion/sql/HdfsScan.java | 11 ++++++-- 5 files changed, 24 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafodion/blob/02b9a0ea/core/sql/common/ComRtUtils.cpp ---------------------------------------------------------------------- diff --git a/core/sql/common/ComRtUtils.cpp b/core/sql/common/ComRtUtils.cpp index 764b8ee..4c11b00 100644 --- a/core/sql/common/ComRtUtils.cpp +++ b/core/sql/common/ComRtUtils.cpp @@ -1075,11 +1075,10 @@ Int16 getBDRClusterName(char *bdrClusterName) return error; } -SB_Phandle_Type *get_phandle_with_retry(char *pname, short *fserr) +int get_phandle_with_retry(char *pname, SB_Phandle_Type *phandle) { Int32 retrys = 0; int lv_fserr = FEOK; - SB_Phandle_Type *phandle = NULL; const Int32 NumRetries = 10; timespec retryintervals[NumRetries] = { { 0, 10*1000*1000 } // 10 ms @@ -1096,7 +1095,7 @@ SB_Phandle_Type *get_phandle_with_retry(char *pname, short *fserr) for (;;) { - phandle = msg_get_phandle (pname, &lv_fserr); + lv_fserr = XFILENAME_TO_PROCESSHANDLE_(pname, strlen(pname), phandle); if (retrys >= NumRetries) break; if ((lv_fserr == FEPATHDOWN) || @@ -1105,11 +1104,7 @@ SB_Phandle_Type *get_phandle_with_retry(char *pname, short *fserr) else break; } - - if (fserr) - *fserr = (short) lv_fserr; - - return phandle; + return lv_fserr; } // A function to return the string "UNKNOWN (<val>)" which can be http://git-wip-us.apache.org/repos/asf/trafodion/blob/02b9a0ea/core/sql/common/ComRtUtils.h ---------------------------------------------------------------------- diff --git a/core/sql/common/ComRtUtils.h b/core/sql/common/ComRtUtils.h index d26e52e..a1bf2cf 100644 --- a/core/sql/common/ComRtUtils.h +++ b/core/sql/common/ComRtUtils.h @@ -286,7 +286,7 @@ void dumpTrafStack(LIST(TrafAddrStack *) *la, const char *header, bool toFile = Int16 getBDRClusterName(char *bdrClusterName); -SB_Phandle_Type *get_phandle_with_retry(char *pname, short *fserr = NULL); +int get_phandle_with_retry(char *pname, SB_Phandle_Type *phandle); pid_t ComRtGetConfiguredPidMax(); http://git-wip-us.apache.org/repos/asf/trafodion/blob/02b9a0ea/core/sql/common/IpcGuardian.cpp ---------------------------------------------------------------------- diff --git a/core/sql/common/IpcGuardian.cpp b/core/sql/common/IpcGuardian.cpp index c8ebcf8..f223d7b 100644 --- a/core/sql/common/IpcGuardian.cpp +++ b/core/sql/common/IpcGuardian.cpp @@ -212,14 +212,10 @@ NABoolean GuaProcessHandle::compare(const GuaProcessHandle &other) const NABoolean GuaProcessHandle::fromAscii(const char *ascii) { - SB_Phandle_Type *tempPhandle; - tempPhandle = get_phandle_with_retry((char *)ascii); - - if (!tempPhandle) + int retcode = get_phandle_with_retry((char *)ascii, &phandle_); + if (retcode != FEOK) return FALSE; - - memcpy ((void *)&phandle_, (void *)tempPhandle, sizeof(SB_Phandle_Type)); return TRUE; } @@ -4110,7 +4106,7 @@ void IpcGuardianServer::spawnProcess(ComDiagsArea **diags, void IpcGuardianServer::useProcess(ComDiagsArea **diags, CollHeap *diagsHeap) { - NSK_PORT_HANDLE *procHandle; + SB_Phandle_Type procHandle; NSK_PORT_HANDLE procHandleCopy; short usedlength; char processName[50]; @@ -4135,13 +4131,9 @@ void IpcGuardianServer::useProcess(ComDiagsArea **diags, short i = 0; while (i < 3) { - short gprc = 0; - procHandle = get_phandle_with_retry(tmpProcessName, &gprc); - if (procHandle != NULL) - rc = 0; - else - rc = gprc; - if ((rc != 0) || (procHandle == NULL)) + int gprc = 0; + gprc = get_phandle_with_retry(tmpProcessName, &procHandle); + if (rc != FEOK) { serverState_ = ERROR_STATE; guardianError_ = rc; @@ -4157,7 +4149,7 @@ void IpcGuardianServer::useProcess(ComDiagsArea **diags, else { //Phandle wrapper in porting layer - NAProcessHandle phandle(procHandle); + NAProcessHandle phandle(&procHandle); rc = phandle.decompose(); if (rc != 0) @@ -4174,9 +4166,7 @@ void IpcGuardianServer::useProcess(ComDiagsArea **diags, } } - memcpy(&procHandleCopy, procHandle, sizeof(NSK_PORT_HANDLE)); - IpcProcessId serverProcId( - (const GuaProcessHandle &)procHandleCopy); + IpcProcessId serverProcId((const GuaProcessHandle &)procHandle); controlConnection_ = new(getServerClass()->getEnv()->getHeap()) GuaConnectionToServer(getServerClass()->getEnv(), @@ -4190,9 +4180,6 @@ void IpcGuardianServer::useProcess(ComDiagsArea **diags, castToGuaConnectionToServer()->getGuardianError(); delete controlConnection_; controlConnection_ = NULL; - // clear phandle cache -- ALM CR8248 - msg_set_phandle((char *)processName_, NULL); - msg_mon_close_process(procHandle); DELAY(10); } else http://git-wip-us.apache.org/repos/asf/trafodion/blob/02b9a0ea/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java ---------------------------------------------------------------------- diff --git a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java index c04c256..1995851 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java +++ b/core/sql/src/main/java/org/trafodion/sql/HDFSClient.java @@ -310,7 +310,8 @@ public class HDFSClient // If the range has a length more than the buffer length, the range is chunked // in HdfsScan public HDFSClient(int bufNo, int ioByteArraySizeInKB, int rangeNo, String filename, ByteBuffer buffer, long position, - int length, short compressionType, boolean sequenceFile, byte recDelimiter, CompressionInputStream inStream) throws IOException + int length, short compressionType, boolean sequenceFile, byte recDelimiter, CompressionInputStream inStream) + throws IOException, EOFException { bufNo_ = bufNo; rangeNo_ = rangeNo; @@ -363,7 +364,7 @@ public class HDFSClient while reading the rows. The columns in the value is delimited by column delimiter 001(octal). */ - public void initSequenceFileRead() throws IOException + public void initSequenceFileRead() throws IOException, EOFException { SequenceFile.Reader.Option seqPos = SequenceFile.Reader.start(pos_); SequenceFile.Reader.Option seqLen = SequenceFile.Reader.length(lenRemain_); http://git-wip-us.apache.org/repos/asf/trafodion/blob/02b9a0ea/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java ---------------------------------------------------------------------- diff --git a/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java b/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java index 3ef1be7..4d7b90d 100644 --- a/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java +++ b/core/sql/src/main/java/org/trafodion/sql/HdfsScan.java @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; import java.nio.ByteBuffer; import java.io.IOException; +import java.io.EOFException; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.Executors; @@ -168,11 +169,17 @@ public class HdfsScan if (! scanCompleted_) { if (logger_.isDebugEnabled()) logger_.debug(" CurrentRange " + hdfsScanRanges_[currRange_].tdbRangeNum_ + " LenRemain " + currRangeLenRemain_ + " BufNo " + bufNo); - hdfsClient_[bufNo] = new HDFSClient(bufNo, ioByteArraySizeInKB_, hdfsScanRanges_[currRange_].tdbRangeNum_, + try { + hdfsClient_[bufNo] = new HDFSClient(bufNo, ioByteArraySizeInKB_, hdfsScanRanges_[currRange_].tdbRangeNum_, hdfsScanRanges_[currRange_].filename_, buf_[bufNo], currRangePos_, readLength, hdfsScanRanges_[currRange_].compressionType_, sequenceFile_, recDelimiter_, currInStream_); - + } catch (EOFException e) + { + // Skip this range + currRange_++; + scheduleHdfsScanRange(bufNo, 0); + } } }