hive data modification detection: commit #2
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/1820da1c Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/1820da1c Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/1820da1c Branch: refs/heads/master Commit: 1820da1cf3776a318951c241814e92a4980ccd34 Parents: 772b4a3 Author: Anoop Sharma <[email protected]> Authored: Sat May 28 01:10:23 2016 +0000 Committer: Anoop Sharma <[email protected]> Committed: Sat May 28 01:10:23 2016 +0000 ---------------------------------------------------------------------- core/sql/cli/SessionDefaults.cpp | 3 +- core/sql/comexe/ComTdbHdfsScan.cpp | 36 ++++++--- core/sql/comexe/ComTdbHdfsScan.h | 15 ++-- core/sql/executor/ExHdfsScan.cpp | 11 ++- core/sql/exp/ExpLOBaccess.cpp | 134 +++++++++++++++++++------------- core/sql/exp/ExpLOBaccess.h | 7 +- core/sql/exp/ExpLOBinterface.cpp | 6 +- core/sql/exp/ExpLOBinterface.h | 2 +- core/sql/generator/GenRelScan.cpp | 24 ++++-- core/sql/optimizer/HDFSHook.cpp | 49 ++++++++---- core/sql/optimizer/HDFSHook.h | 3 + core/sql/optimizer/NATable.cpp | 10 ++- core/sql/regress/hive/EXPECTED005 | 133 +++++++++++++++++++++++-------- core/sql/regress/hive/TEST005 | 33 +++++++- 14 files changed, 328 insertions(+), 138 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/cli/SessionDefaults.cpp ---------------------------------------------------------------------- diff --git a/core/sql/cli/SessionDefaults.cpp b/core/sql/cli/SessionDefaults.cpp index 593804c..024754b 100644 --- a/core/sql/cli/SessionDefaults.cpp +++ b/core/sql/cli/SessionDefaults.cpp @@ -750,6 +750,7 @@ static const QueryString cqdInfo[] = , {"transform_to_sidetree_insert"}, {"OFF"} , {"METADATA_CACHE_SIZE"}, {"0"} , {"QUERY_CACHE"}, {"0"} +, {"TRAF_RELOAD_NATABLE_CACHE"}, {"ON"} }; static const AQRInfo::AQRErrorMap aqrErrorMap[] = @@ -781,7 +782,7 @@ static const AQRInfo::AQRErrorMap aqrErrorMap[] = // parallel purgedata failed AQREntry( 8022, 0, 3, 60, 0, 0, "", 0, 1), - AQREntry( 8436, 0, 1, 0, 0, 1, "04", 0, 0), + AQREntry( 8436, 0, 1, 0, 0, 2, "04:05", 0, 0), // FS memory errors AQREntry( 8550, 30, 1, 60, 0, 0, "", 0, 0), http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/comexe/ComTdbHdfsScan.cpp ---------------------------------------------------------------------- diff --git a/core/sql/comexe/ComTdbHdfsScan.cpp b/core/sql/comexe/ComTdbHdfsScan.cpp index a6aac21..91d7468 100755 --- a/core/sql/comexe/ComTdbHdfsScan.cpp +++ b/core/sql/comexe/ComTdbHdfsScan.cpp @@ -69,9 +69,10 @@ ComTdbHdfsScan::ComTdbHdfsScan( char * loggingLocation, char * errCountId, - char * hdfsFilesDir, + char * hdfsRootDir, Int64 modTSforDir, - Lng32 numFilesInDir + Lng32 numOfPartCols, + Queue * hdfsDirsToCheck ) : ComTdb( ComTdb::ex_HDFS_SCAN, @@ -113,9 +114,10 @@ ComTdbHdfsScan::ComTdbHdfsScan( errCountTable_(errCountTable), loggingLocation_(loggingLocation), errCountRowId_(errCountId), - hdfsFilesDir_(hdfsFilesDir), + hdfsRootDir_(hdfsRootDir), modTSforDir_(modTSforDir), - numFilesInDir_(numFilesInDir) + numOfPartCols_(numOfPartCols), + hdfsDirsToCheck_(hdfsDirsToCheck) {}; ComTdbHdfsScan::~ComTdbHdfsScan() @@ -151,7 +153,8 @@ Long ComTdbHdfsScan::pack(void * space) loggingLocation_.pack(space); errCountRowId_.pack(space); - hdfsFilesDir_.pack(space); + hdfsRootDir_.pack(space); + hdfsDirsToCheck_.pack(space); return ComTdb::pack(space); } @@ -185,7 +188,8 @@ Lng32 ComTdbHdfsScan::unpack(void * base, void * reallocator) if (loggingLocation_.unpack(base)) return -1; if (errCountRowId_.unpack(base)) return -1; - if (hdfsFilesDir_.unpack(base)) return -1; + if (hdfsRootDir_.unpack(base)) return -1; + if (hdfsDirsToCheck_.unpack(base, reallocator)) return -1; return ComTdb::unpack(base, reallocator); } @@ -434,14 +438,26 @@ void ComTdbHdfsScan::displayContents(Space * space,ULng32 flag) } } - if (hdfsFilesDir_) + if (hdfsRootDir_) { - str_sprintf(buf, "hdfsDir: %s", hdfsFilesDir_); + str_sprintf(buf, "hdfsRootDir: %s", hdfsRootDir_); space->allocateAndCopyToAlignedSpace(buf, str_len(buf), sizeof(short)); - str_sprintf(buf, "modTSforDir_ = %Ld, numFilesInDir_ = %d", - modTSforDir_, numFilesInDir_); + str_sprintf(buf, "modTSforDir_ = %Ld, numOfPartCols_ = %d", + modTSforDir_, numOfPartCols_); space->allocateAndCopyToAlignedSpace(buf, str_len(buf), sizeof(short)); + + if (hdfsDirsToCheck()) + { + hdfsDirsToCheck()->position(); + char * dir = NULL; + while ((dir = (char*)hdfsDirsToCheck()->getNext()) != NULL) + { + str_sprintf(buf, "Dir Name: %s", dir); + space->allocateAndCopyToAlignedSpace(buf, str_len(buf), sizeof(short)); + } + } + } } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/comexe/ComTdbHdfsScan.h ---------------------------------------------------------------------- diff --git a/core/sql/comexe/ComTdbHdfsScan.h b/core/sql/comexe/ComTdbHdfsScan.h index 0b17947..c60b192 100755 --- a/core/sql/comexe/ComTdbHdfsScan.h +++ b/core/sql/comexe/ComTdbHdfsScan.h @@ -136,11 +136,12 @@ class ComTdbHdfsScan : public ComTdb // next 3 params used to check if data under hdfsFileDir // was modified after query was compiled. - NABasicPtr hdfsFilesDir_; // 192 - 199 + NABasicPtr hdfsRootDir_; // 192 - 199 Int64 modTSforDir_; // 200 - 207 - Lng32 numFilesInDir_; // 208 - 211 + Lng32 numOfPartCols_; // 208 - 211 + QueuePtr hdfsDirsToCheck_; // 212 - 219 - char fillersComTdbHdfsScan2_[12]; // 212 - 223 + char fillersComTdbHdfsScan2_[4]; // 220 - 223 public: enum HDFSFileType @@ -195,9 +196,10 @@ public: // next 3 params used to check if data under hdfsFileDir // was modified after query was compiled. - char * hdfsFilesDir = NULL, + char * hdfsRootDir = NULL, Int64 modTSforDir = -1, - Lng32 numFilesInDir = -1 + Lng32 numOfPartCols = -1, + Queue * hdfsDirsToCheck = NULL ); ~ComTdbHdfsScan(); @@ -329,7 +331,8 @@ public: { return workCriDesc_->getTupleDescriptor(moveExprColsTuppIndex_); } - + + Queue * hdfsDirsToCheck() { return hdfsDirsToCheck_; } }; inline ComTdb * ComTdbHdfsScan::getChildTdb() http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/executor/ExHdfsScan.cpp ---------------------------------------------------------------------- diff --git a/core/sql/executor/ExHdfsScan.cpp b/core/sql/executor/ExHdfsScan.cpp index dbb5e7c..9010c83 100644 --- a/core/sql/executor/ExHdfsScan.cpp +++ b/core/sql/executor/ExHdfsScan.cpp @@ -423,14 +423,19 @@ ExWorkProcRetcode ExHdfsScanTcb::work() case CHECK_FOR_DATA_MOD: case CHECK_FOR_DATA_MOD_AND_DONE: { - char * dirPath = hdfsScanTdb().hdfsFilesDir_; + char * dirPath = hdfsScanTdb().hdfsRootDir_; if (! dirPath) dataModCheckDone_ = TRUE; if (NOT dataModCheckDone_) { Int64 modTS = hdfsScanTdb().modTSforDir_; - Lng32 numFilesInDir = hdfsScanTdb().numFilesInDir_; + Lng32 numOfPartLevels = hdfsScanTdb().numOfPartCols_; + + if (hdfsScanTdb().hdfsDirsToCheck()) + { + // TBD + } retcode = ExpLOBinterfaceDataModCheck (lobGlob_, @@ -438,7 +443,7 @@ ExWorkProcRetcode ExHdfsScanTcb::work() hdfsScanTdb().hostName_, hdfsScanTdb().port_, modTS, - numFilesInDir); + numOfPartLevels); if (retcode < 0) { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/exp/ExpLOBaccess.cpp ---------------------------------------------------------------------- diff --git a/core/sql/exp/ExpLOBaccess.cpp b/core/sql/exp/ExpLOBaccess.cpp index 5c1d2fa..3ac537b 100644 --- a/core/sql/exp/ExpLOBaccess.cpp +++ b/core/sql/exp/ExpLOBaccess.cpp @@ -110,13 +110,16 @@ ExLob::~ExLob() } +__thread hdfsFS *globalFS = NULL; + Ex_Lob_Error ExLob::initialize(char *lobFile, Ex_Lob_Mode mode, char *dir, LobsStorage storage, char *hdfsServer, Int64 hdfsPort, char *lobLocation, int bufferSize , short replication , - int blockSize, Int64 lobMaxSize, ExLobGlobals *lobGlobals) + int blockSize, Int64 lobMaxSize, + ExLobGlobals *lobGlobals) { int openFlags; mode_t filePerms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH; @@ -141,34 +144,41 @@ Ex_Lob_Error ExLob::initialize(char *lobFile, Ex_Lob_Mode mode, } - if (storage_ != Lob_Invalid_Storage) + hdfsServer_ = hdfsServer; + hdfsPort_ = hdfsPort; + + if (globalFS == NULL) + { + globalFS = new hdfsFS; + *globalFS = NULL; + } + + if (*globalFS == NULL) + { + *globalFS = hdfsConnect(hdfsServer_, hdfsPort_); + if (*globalFS == NULL) + return LOB_HDFS_CONNECT_ERROR; + } + + fs_ = *globalFS; + if (lobGlobals) + lobGlobals->setHdfsFs(fs_); + + if (storage_ != Lob_Invalid_Storage) { return LOB_INIT_ERROR; - } else + } + else { storage_ = storage; } stats_.init(); - hdfsServer_ = hdfsServer; - hdfsPort_ = hdfsPort; if (lobLocation) lobLocation_ = lobLocation; clock_gettime(CLOCK_MONOTONIC, &startTime); - if (lobGlobals->getHdfsFs() == NULL) - { - fs_ = hdfsConnect(hdfsServer_, hdfsPort_); - if (fs_ == NULL) - return LOB_HDFS_CONNECT_ERROR; - lobGlobals->setHdfsFs(fs_); - } - else - { - fs_ = lobGlobals->getHdfsFs(); - } - clock_gettime(CLOCK_MONOTONIC, &endTime); secs = endTime.tv_sec - startTime.tv_sec; @@ -379,28 +389,17 @@ Ex_Lob_Error ExLob::writeDataSimple(char *data, Int64 size, LobsSubOper subOpera return LOB_OPER_OK; } -Ex_Lob_Error ExLob::dataModCheck( +Ex_Lob_Error ExLob::dataModCheck2( char * dirPath, Int64 inputModTS, - Lng32 inputNumFilesInDir, - Lng32 &numFilesInDir) + Lng32 numOfPartLevels) { - // find mod time of dir - hdfsFileInfo *fileInfos = hdfsGetPathInfo(fs_, dirPath); - if (fileInfos == NULL) - { - return LOB_DATA_FILE_NOT_FOUND_ERROR; - } - - Int64 currModTS = fileInfos[0].mLastMod; - hdfsFreeFileInfo(fileInfos, 1); - if ((inputModTS > 0) && - (currModTS > inputModTS)) - return LOB_DATA_MOD_CHECK_ERROR; + if (numOfPartLevels == 0) + return LOB_OPER_OK; - // find number of files in dirPath. Lng32 currNumFilesInDir = 0; - fileInfos = hdfsListDirectory(fs_, dirPath, &currNumFilesInDir); + hdfsFileInfo * fileInfos = + hdfsListDirectory(fs_, dirPath, &currNumFilesInDir); if ((currNumFilesInDir > 0) && (fileInfos == NULL)) { return LOB_DATA_FILE_NOT_FOUND_ERROR; @@ -412,17 +411,9 @@ Ex_Lob_Error ExLob::dataModCheck( hdfsFileInfo &fileInfo = fileInfos[i]; if (fileInfo.mKind == kObjectKindDirectory) { - if (dataModCheck(fileInfo.mName, inputModTS, - inputNumFilesInDir, numFilesInDir) == - LOB_DATA_MOD_CHECK_ERROR) - { - failed = TRUE; - } - } - else if (fileInfo.mKind == kObjectKindFile) - { - numFilesInDir++; - if (numFilesInDir > inputNumFilesInDir) + Int64 currModTS = fileInfo.mLastMod; + if ((inputModTS > 0) && + (currModTS > inputModTS)) failed = TRUE; } } @@ -431,6 +422,47 @@ Ex_Lob_Error ExLob::dataModCheck( if (failed) return LOB_DATA_MOD_CHECK_ERROR; + numOfPartLevels--; + Ex_Lob_Error err = LOB_OPER_OK; + if (numOfPartLevels > 0) + { + for (Lng32 i = 0; ((NOT failed) && (i < currNumFilesInDir)); i++) + { + hdfsFileInfo &fileInfo = fileInfos[i]; + err = dataModCheck2(fileInfo.mName, inputModTS, numOfPartLevels); + if (err != LOB_OPER_OK) + return err; + } + } + + return LOB_OPER_OK; +} + +// numOfPartLevels: 0, if not partitioned +// N, number of partitioning cols +Ex_Lob_Error ExLob::dataModCheck( + char * dirPath, + Int64 inputModTS, + Lng32 numOfPartLevels) +{ + // find mod time of root dir + hdfsFileInfo *fileInfos = hdfsGetPathInfo(fs_, dirPath); + if (fileInfos == NULL) + { + return LOB_DATA_FILE_NOT_FOUND_ERROR; + } + + Int64 currModTS = fileInfos[0].mLastMod; + hdfsFreeFileInfo(fileInfos, 1); + if ((inputModTS > 0) && + (currModTS > inputModTS)) + return LOB_DATA_MOD_CHECK_ERROR; + + if (numOfPartLevels > 0) + { + return dataModCheck2(dirPath, inputModTS, numOfPartLevels); + } + return LOB_OPER_OK; } @@ -2387,17 +2419,13 @@ Ex_Lob_Error ExLobsOper ( { lobPtr->initialize(NULL, EX_LOB_RW, NULL, storage, hdfsServer, hdfsPort, NULL, - bufferSize, replication, blockSize); + bufferSize, replication, blockSize, lobMaxSize, + lobGlobals); Int64 inputModTS = *(Int64*)blackBox; - Int32 inputNumFilesInDir = + Int32 inputNumOfPartLevels = *(Lng32*)&((char*)blackBox)[sizeof(inputModTS)]; - Int32 numFilesInDir = 0; - err = lobPtr->dataModCheck(dir, inputModTS, - inputNumFilesInDir, numFilesInDir); - if ((err == LOB_OPER_OK) && - (numFilesInDir != inputNumFilesInDir)) - err = LOB_DATA_MOD_CHECK_ERROR; + err = lobPtr->dataModCheck(dir, inputModTS, inputNumOfPartLevels); } break; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/exp/ExpLOBaccess.h ---------------------------------------------------------------------- diff --git a/core/sql/exp/ExpLOBaccess.h b/core/sql/exp/ExpLOBaccess.h index 138116c..518fbd7 100644 --- a/core/sql/exp/ExpLOBaccess.h +++ b/core/sql/exp/ExpLOBaccess.h @@ -489,8 +489,11 @@ class ExLob Ex_Lob_Error dataModCheck( char * dirPath, Int64 modTS, - Lng32 inputNumFilesInDir, - Lng32 &numFilesInDir); + Lng32 numOfPartLevels); + Ex_Lob_Error dataModCheck2( + char * dirPath, + Int64 modTS, + Lng32 numOfPartLevels); Ex_Lob_Error emptyDirectory(); ExLobStats *getStats() { return &stats_; } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/exp/ExpLOBinterface.cpp ---------------------------------------------------------------------- diff --git a/core/sql/exp/ExpLOBinterface.cpp b/core/sql/exp/ExpLOBinterface.cpp index a984635..6fe6fa9 100644 --- a/core/sql/exp/ExpLOBinterface.cpp +++ b/core/sql/exp/ExpLOBinterface.cpp @@ -236,7 +236,7 @@ Lng32 ExpLOBinterfaceDataModCheck(void * lobGlob, char * lobHdfsServer, Lng32 lobHdfsPort, Int64 modTS, - Lng32 numFilesInDir) + Lng32 numOfPartLevels) { Ex_Lob_Error err; @@ -247,8 +247,8 @@ Lng32 ExpLOBinterfaceDataModCheck(void * lobGlob, char dirInfoBuf[100]; *(Int64*)dirInfoBuf = modTS; - *(Lng32*)&dirInfoBuf[sizeof(modTS)] = numFilesInDir; - Lng32 dirInfoBufLen = sizeof(modTS) + sizeof(numFilesInDir); + *(Lng32*)&dirInfoBuf[sizeof(modTS)] = numOfPartLevels; + Lng32 dirInfoBufLen = sizeof(modTS) + sizeof(numOfPartLevels); err = ExLobsOper((char*)"", NULL, 0, lobHdfsServer, lobHdfsPort, http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/exp/ExpLOBinterface.h ---------------------------------------------------------------------- diff --git a/core/sql/exp/ExpLOBinterface.h b/core/sql/exp/ExpLOBinterface.h index a9b7597..689e422 100644 --- a/core/sql/exp/ExpLOBinterface.h +++ b/core/sql/exp/ExpLOBinterface.h @@ -303,7 +303,7 @@ Lng32 ExpLOBinterfaceDataModCheck(void * lobGlob, char * lobHdfsServer, Lng32 lobHdfsPort, Int64 modTS, - Lng32 numFilesInDir); + Lng32 numOfPartLevels); Lng32 ExpLOBinterfaceEmptyDirectory(void * lobGlob, char * lobName, http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/generator/GenRelScan.cpp ---------------------------------------------------------------------- diff --git a/core/sql/generator/GenRelScan.cpp b/core/sql/generator/GenRelScan.cpp index a781815..5474d94 100644 --- a/core/sql/generator/GenRelScan.cpp +++ b/core/sql/generator/GenRelScan.cpp @@ -1147,19 +1147,29 @@ if (hTabStats->isOrcFile()) space->AllocateAndCopyToAlignedSpace(GenGetQualifiedName(getIndexDesc()->getNAFileSet()->getFileSetName()), 0); // info needed to validate hdfs file structs - // const HHDFSTableStats* hTabStats = - // getIndexDesc()->getNAFileSet()->getHHDFSTableStats(); - char * hdfsDir = NULL; + char * hdfsRootDir = NULL; Int64 modTS = -1; - Lng32 numFilesInDir = -1; + Lng32 numOfPartLevels = -1; + Queue * hdfsDirsToCheck = NULL; if (CmpCommon::getDefault(HIVE_DATA_MOD_CHECK) == DF_ON) { - hdfsDir = + hdfsRootDir = space->allocateAndCopyToAlignedSpace(hTabStats->tableDir().data(), hTabStats->tableDir().length(), 0); modTS = hTabStats->getModificationTS(); - numFilesInDir = hTabStats->getNumFiles(); + numOfPartLevels = hTabStats->numOfPartCols(); + + // if specific directories are to checked based on the query struct + // (for example, when certain partitions are explicitly specified), + // add them to hdfsDirsToCheck. + // At runtime, only these dirs will be checked for data modification. + // ** TBD ** + + // Right now, timestamp info is not being generated correctly for + // partitioned files. Skip data mod check for them. + if (numOfPartLevels > 0) + hdfsRootDir = NULL; } // create hdfsscan_tdb @@ -1202,7 +1212,7 @@ if (hTabStats->isOrcFile()) logLocation, errCountRowId, - hdfsDir, modTS, numFilesInDir + hdfsRootDir, modTS, numOfPartLevels, hdfsDirsToCheck ); generator->initTdbFields(hdfsscan_tdb); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/optimizer/HDFSHook.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/HDFSHook.cpp b/core/sql/optimizer/HDFSHook.cpp index 90df234..a165b51 100644 --- a/core/sql/optimizer/HDFSHook.cpp +++ b/core/sql/optimizer/HDFSHook.cpp @@ -1069,41 +1069,58 @@ void HHDFSTableStats::print(FILE *ofd) fprintf(ofd,"====================================================================\n"); } +extern __thread hdfsFS *globalFS; + NABoolean HHDFSTableStats::connectHDFS(const NAString &host, Int32 port) { NABoolean result = TRUE; // establish connection to HDFS if needed - if (fs_ == NULL || + if (globalFS == NULL || + *globalFS == NULL || currHdfsHost_ != host || currHdfsPort_ != port) { - if (fs_) + if (globalFS && *globalFS) + disconnectHDFS(); + + if (globalFS == NULL) { - hdfsDisconnect(fs_); - fs_ = NULL; + globalFS = new hdfsFS; + *globalFS = NULL; } - fs_ = hdfsConnect(host, port); - - if (fs_ == NULL) + + if (*globalFS == NULL) { - NAString errMsg("hdfsConnect to "); - errMsg += host; - errMsg += ":"; - errMsg += port; - errMsg += " failed"; - diags_.recordError(errMsg, "HHDFSTableStats::connectHDFS"); - result = FALSE; + *globalFS = hdfsConnect(host, port); } + currHdfsHost_ = host; currHdfsPort_ = port; } + + fs_ = *globalFS; + if (fs_ == NULL) + { + NAString errMsg("hdfsConnect to "); + errMsg += host; + errMsg += ":"; + errMsg += port; + errMsg += " failed"; + diags_.recordError(errMsg, "HHDFSTableStats::connectHDFS"); + result = FALSE; + } + return result; } void HHDFSTableStats::disconnectHDFS() { - if (fs_) - hdfsDisconnect(fs_); + if (globalFS && *globalFS) + { + hdfsDisconnect(*globalFS); + *globalFS = NULL; + } + fs_ = NULL; } http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/optimizer/HDFSHook.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/HDFSHook.h b/core/sql/optimizer/HDFSHook.h index 1ab474c..4f80904 100644 --- a/core/sql/optimizer/HDFSHook.h +++ b/core/sql/optimizer/HDFSHook.h @@ -330,6 +330,9 @@ public: const NAString &tableDir() const { return tableDir_; } + const Lng32 numOfPartCols() const { return numOfPartCols_; } + const Lng32 totalNumPartitions() const { return totalNumPartitions_; } + private: enum FileType { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/optimizer/NATable.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/NATable.cpp b/core/sql/optimizer/NATable.cpp index 9f46209..81eb5b0 100644 --- a/core/sql/optimizer/NATable.cpp +++ b/core/sql/optimizer/NATable.cpp @@ -7308,6 +7308,13 @@ NATable * NATableDB::get(const ExtendedQualName* key, BindWA* bindWA, NABoolean } } + if ((cachedNATable->isHiveTable()) && + (CmpCommon::getDefault(HIVE_DATA_MOD_CHECK) == DF_ON) && + (CmpCommon::getDefault(TRAF_RELOAD_NATABLE_CACHE) == DF_ON)) + { + removeEntry = TRUE; + } + //Found in cache. If that's all the caller wanted, return now. if ( !removeEntry && findInCacheOnly ) return cachedNATable; @@ -7896,7 +7903,8 @@ NATable * NATableDB::get(CorrName& corrName, BindWA * bindWA, table = NULL; } - if (table && ((table->isHbaseTable() || table->isSeabaseTable()) && !(table->isSeabaseMDTable()))) + if (table && ((table->isHbaseTable() || table->isSeabaseTable()) && + !(table->isSeabaseMDTable()))) { if ((CmpCommon::getDefault(TRAF_RELOAD_NATABLE_CACHE) == DF_ON)) { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/regress/hive/EXPECTED005 ---------------------------------------------------------------------- diff --git a/core/sql/regress/hive/EXPECTED005 b/core/sql/regress/hive/EXPECTED005 index eff0c42..6118229 100644 --- a/core/sql/regress/hive/EXPECTED005 +++ b/core/sql/regress/hive/EXPECTED005 @@ -4,7 +4,7 @@ --- SQL operation complete. >>set terminal_charset utf8; >> ->>cqd AUTO_QUERY_RETRY 'OFF'; +>>cqd AUTO_QUERY_RETRY_WARNINGS 'ON'; --- SQL operation complete. >>cqd HIVE_MAX_STRING_LENGTH '25' ; @@ -13,9 +13,6 @@ >>cqd mode_seahive 'ON'; --- SQL operation complete. ->>cqd CALL_EMBEDDED_ARKCMP 'OFF'; - ---- SQL operation complete. >>cqd HIST_ROWCOUNT_REQUIRING_STATS '50000'; --- SQL operation complete. @@ -72,6 +69,10 @@ +> order by 1 +> ; +*** WARNING[8597] Statement was automatically retried 1 time(s). Delay before each retry was 0 seconds. See next entry for the error that caused this retry. + +*** WARNING[8436] Mismatch detected between compiletime and runtime hive table definitions. + C_PREFERRED_CUST_FLAG (EXPR) ------------------------- -------------------- @@ -218,12 +219,16 @@ xyz >>-- s1 should still return 0 rows - for now >>execute s2; +*** WARNING[8597] Statement was automatically retried 1 time(s). Delay before each retry was 0 seconds. See next entry for the error that caused this retry. + +*** WARNING[8436] Mismatch detected between compiletime and runtime hive table definitions. + C_PREFERRED_CUST_FLAG (EXPR) ------------------------- -------------------- -N 9789 -Y 9525 -? 685 +N 19631 +Y 18984 +? 1384 --- 3 row(s) selected. >>execute s3; @@ -270,6 +275,10 @@ Y 9525 --- 1 row(s) inserted. >>select a,b from newtable; +*** WARNING[8597] Statement was automatically retried 1 time(s). Delay before each retry was 0 seconds. See next entry for the error that caused this retry. + +*** WARNING[8436] Mismatch detected between compiletime and runtime hive table definitions. + A B ----------- ------------------------- @@ -307,9 +316,9 @@ A B C_PREFERRED_CUST_FLAG (EXPR) ------------------------- -------------------- -N 9789 -Y 9525 -? 685 +N 19631 +Y 18984 +? 1384 --- 3 row(s) selected. >>execute s4; @@ -453,21 +462,21 @@ TINT SM I BIG STR F --- SQL operation complete. >>select c1, CONVERTTOHEX(c2) from tbl_gbk; -C1 (EXPR) +C1 (EXPR) ----------- -------------------------------------------------- - 3 EC8B90EC978E - 5 EC8B90EC978E - 2 EC8B90EC978E - 4 EC8B90EC978E - 6 EC8B90EC978E - 7 EC8B90EC978E - 8 EC8B90EC978E - 3 ECBB93EB9F8FECAB97EB9B91 - 2 ECBB93EB9F8FECAB97EB9B91 - 6 ECBB93EB9F8FECAB97EB9B91 - 19 ECBB93EB9F8FECAB97EB9B91 - 8 ECBB93EB9F8FECAB97EB9B91 + 3 EC8B90EC978E + 5 EC8B90EC978E + 2 EC8B90EC978E + 4 EC8B90EC978E + 6 EC8B90EC978E + 7 EC8B90EC978E + 8 EC8B90EC978E + 3 ECBB93EB9F8FECAB97EB9B91 + 2 ECBB93EB9F8FECAB97EB9B91 + 6 ECBB93EB9F8FECAB97EB9B91 + 19 ECBB93EB9F8FECAB97EB9B91 + 8 ECBB93EB9F8FECAB97EB9B91 --- 12 row(s) selected. >>cqd HIVE_FILE_CHARSET reset; @@ -516,7 +525,7 @@ C1 C2 C3 C4 --- 10 row(s) loaded. >>select * from trafodion.seabase.tbl_dos_num; -C1 C2 +C1 C2 ----------- ----------- 0 39478 @@ -544,16 +553,16 @@ C1 C2 --- SQL operation complete. >>select * from tbl_bad; -C1 C2 C3 C4 C5 C6 C7 C8 ------------ -------------------- ------------------------- --------------- ------ -------------------------- ------------------------- ------ +C1 C2 C3 C4 C5 C6 C7 C8 +----------- -------------------- ------------------------- --------------- ------ -------------------------- ------------------------- ------ - ? ? c ? ? ? ? ? - ? ? c ? ? 2017-01-01 10:10:10.000000 1.01000000000000000E+000 1 - ? ? ? ? ? ? ? - 1 1 averylongstring -1.0000000E+000 0 2017-01-01 10:10:10.000000 1.00010000000000000E+002 1 - 2 2 good 1.1000000E+000 2 2017-01-01 10:10:10.000000 2.00000000000000000E+002 1000 - 3 3 good 1.0000000E+000 2 2017-01-01 10:10:10.000000 2.10000000000000000E+002 10 - ? 4294967295 good 3.3999999E+038 ? 2017-01-01 10:10:10.000000 1.69999999999999968E+308 10 + ? ? c ? ? ? ? ? + ? ? c ? ? 2017-01-01 10:10:10.000000 1.01000000000000000E+000 1 + ? ? ? ? ? ? ? + 1 1 averylongstring -1.0000000E+000 0 2017-01-01 10:10:10.000000 1.00010000000000000E+002 1 + 2 2 good 1.1000000E+000 2 2017-01-01 10:10:10.000000 2.00000000000000000E+002 1000 + 3 3 good 1.0000000E+000 2 2017-01-01 10:10:10.000000 2.10000000000000000E+002 10 + ? 4294967295 good 3.3999999E+038 ? 2017-01-01 10:10:10.000000 1.69999999999999968E+308 10 0 9999999999 bad ? ? ? ? ? --- 8 row(s) selected. @@ -631,4 +640,62 @@ C1 C2 C3 C4 C >>cqd HIVE_SCAN_SPECIAL_MODE reset; --- SQL operation complete. +>> +>>-- tests for hive timestamp mismatch check +>>cqd auto_query_retry_warnings 'ON'; + +--- SQL operation complete. +>> +>>sh echo "drop table thive;" > TEST005_junk; +>>sh regrhive.ksh -f TEST005_junk; +>> +>>sh echo "create table thive(a int);" > TEST005_junk; +>>sh regrhive.ksh -f TEST005_junk; +>> +>>select * from hive.hive.thive; + +--- 0 row(s) selected. +>> +>>sh echo "insert into thive values (1);" > TEST005_junk; +>>sh regrhive.ksh -f TEST005_junk; +>> +>>select * from hive.hive.thive; + +A +----------- + + 1 + +--- 1 row(s) selected. +>>insert into hive.hive.thive values (2); + +--- 1 row(s) inserted. +>>select * from hive.hive.thive; + +A +----------- + + 1 + 2 + +--- 2 row(s) selected. +>> +>>sh echo "drop table thive;" > TEST005_junk; +>>sh regrhive.ksh -f TEST005_junk; +>> +>>sh echo "create table thive(a int, b int);" > TEST005_junk; +>>sh regrhive.ksh -f TEST005_junk; +>> +>>sh echo "insert into thive values (1,2);" > TEST005_junk; +>>sh regrhive.ksh -f TEST005_junk; +>> +>>select * from hive.hive.thive; + +A B +----------- ----------- + + 1 2 + +--- 1 row(s) selected. +>> >>log; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/1820da1c/core/sql/regress/hive/TEST005 ---------------------------------------------------------------------- diff --git a/core/sql/regress/hive/TEST005 b/core/sql/regress/hive/TEST005 index ad7cf0e..e44b633 100644 --- a/core/sql/regress/hive/TEST005 +++ b/core/sql/regress/hive/TEST005 @@ -58,10 +58,9 @@ log LOG005 clear; set schema hive.hive; set terminal_charset utf8; -cqd AUTO_QUERY_RETRY 'OFF'; +cqd AUTO_QUERY_RETRY_WARNINGS 'ON'; cqd HIVE_MAX_STRING_LENGTH '25' ; cqd mode_seahive 'ON'; -cqd CALL_EMBEDDED_ARKCMP 'OFF'; cqd HIST_ROWCOUNT_REQUIRING_STATS '50000'; ------------------------------------------------------------ -- Testing query plan invalidation in the compiler, but @@ -283,4 +282,34 @@ c8 smallint cqd HIVE_SCAN_SPECIAL_MODE '2'; insert into trafodion.seabase.traf_tbl_bad select * from tbl_bad; cqd HIVE_SCAN_SPECIAL_MODE reset; + +-- tests for hive timestamp mismatch check +cqd auto_query_retry_warnings 'ON'; + +sh echo "drop table thive;" > TEST005_junk; +sh regrhive.ksh -f TEST005_junk; + +sh echo "create table thive(a int);" > TEST005_junk; +sh regrhive.ksh -f TEST005_junk; + +select * from hive.hive.thive; + +sh echo "insert into thive values (1);" > TEST005_junk; +sh regrhive.ksh -f TEST005_junk; + +select * from hive.hive.thive; +insert into hive.hive.thive values (2); +select * from hive.hive.thive; + +sh echo "drop table thive;" > TEST005_junk; +sh regrhive.ksh -f TEST005_junk; + +sh echo "create table thive(a int, b int);" > TEST005_junk; +sh regrhive.ksh -f TEST005_junk; + +sh echo "insert into thive values (1,2);" > TEST005_junk; +sh regrhive.ksh -f TEST005_junk; + +select * from hive.hive.thive; + log;
