[
https://issues.apache.org/jira/browse/TRAFODION-2263?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15552379#comment-15552379
]
ASF GitHub Bot commented on TRAFODION-2263:
-------------------------------------------
Github user DaveBirdsall commented on a diff in the pull request:
https://github.com/apache/incubator-trafodion/pull/746#discussion_r82220981
--- Diff: core/sql/cli/Statement.cpp ---
@@ -2102,775 +2071,109 @@ Statement *
Statement::getCurrentOfCursorStatement(char * cursorName)
}
-///////////////////////////////////////////////////////////////////////
-// RETURN: doSimCheck: if true, do similarity check.
-// doFixup: if true and doSimCheck is false, do fixup again.
-// Note: fixup is always done after sim check so if
-// doSimCheck is returned as true, then doFixup
-// is ignored.
-///////////////////////////////////////////////////////////////////////
-RETCODE Statement::resolveNames(LateNameInfoList * lnil,
- Descriptor * inputDesc,
- ComDiagsArea &diagsArea,
- NABoolean &doSimCheck,
- NABoolean &doFixup)
+RETCODE Statement::doHiveTableSimCheck(TrafSimilarityTableInfo *si,
+ void * lobGlob,
+ NABoolean &simCheckFailed,
+ ComDiagsArea &diagsArea)
{
- doSimCheck = FALSE;
- doFixup = FALSE;
- Lng32 curAnsiNameLen = 0;
- char * curAnsiName = 0;
- NABoolean contextChanged = FALSE;
- AnsiOrNskName *curName = NULL;
- Int16 retCode;
- char * parts[4];
- Lng32 numParts;
- bool isNskName = FALSE;
- char * fullyQualifiedSchemaName = schemaName_;
-
- if (lnil->definePresent())
- {
- // if this is the first time names are being resolved, then
- // do it even if the define context has not changed.
- unsigned short defcon = context_->getCurrentDefineContext();
- if ((NOT firstResolveDone()) ||
- (defcon != defineContext()))
- {
- contextChanged = TRUE;
- }
- defineContext() = defcon;
- }
-
- for (Int32 l = 0; l < (Int32) (lnil->getNumEntries()); l++)
- {
- LateNameInfo * lni = &(lnil->getLateNameInfo(l));
-
- lni->setAnsiNameChange(0);
-
-
- if (lni->isVariable())
- {
-
- if (lni->isDefine())
- {
- }
- else if (lni->isEnvVar())
- {
- // get name from env var.
- char envName[ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES+1];
- curAnsiName = cliGlobals_->getEnv(lni->variableName());
-
- if ((! curAnsiName)||
- ((curAnsiNameLen = str_len(curAnsiName)) >
- ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES))
- {
-
- diagsArea << DgSqlCode(-EXE_INVALID_DEFINE_OR_ENVVAR)
- <<DgString0(lni->variableName());
- return ERROR;
- }
- str_cpy_all(envName,curAnsiName,str_len(curAnsiName));
- envName[curAnsiNameLen] = '\0';
-
- curName = new (&heap_) AnsiOrNskName(envName);
- if (curName->convertAnsiOrNskName())
- {
- diagsArea <<
DgSqlCode(-CLI_INVALID_SQL_ID)<<DgString0(envName);
- delete curName;
- return ERROR;
- }
- } //endif env var
-
- else if (lni->isCachedParam())
- {
- // cached params are only resolved on the first execute after
- // prepare. These values are set from constants and
- // cannot change for multiple executions of the same stmt.
- if (firstResolveDone())
- continue; // Go back to the for loop to continue with the
next lni
-
- curAnsiName =
- &root_tdb->getParameterBuffer()
- [lni->getCachedParamOffset()];
- curName = new (&heap_) AnsiOrNskName(curAnsiName);
- if (curName->convertAnsiOrNskName())
- {
- diagsArea <<
DgSqlCode(-CLI_INVALID_SQL_ID)<<DgString0(curAnsiName);
- delete curName;
- return ERROR;
- }
-
- } //endif cached param
- else // hvar or param
- {
- // Here we need to validate what the hvar contains
- // We need to make sure it is a string hvar and we call
- // convDoIt so that we only get the length of the
- // string that we need.
-
- char *source= 0;
- Lng32 sourceLen = 0;
- Lng32 sourceType = 0;
- Lng32 sourceCharset = CharInfo::UnknownCharSet;
- void *var_ptr = 0;
-
- inputDesc->getDescItem(lni->getInputListIndex(),
- SQLDESC_VAR_PTR, &var_ptr,
- 0, 0, 0, 0);
- inputDesc->getDescItem(lni->getInputListIndex(),
- SQLDESC_LENGTH, &sourceLen,
- 0,0,0,0);
-
- inputDesc->getDescItem(lni->getInputListIndex(),
- SQLDESC_TYPE_FS, &sourceType,
- 0,0,0,0);
-
- // Check if input is a string type
-
- if ((sourceType>=REC_MIN_CHARACTER) &&
- (sourceType <= REC_MAX_CHARACTER))
- {
- inputDesc->getDescItem(lni->getInputListIndex(),
- SQLDESC_CHAR_SET, &sourceCharset,
- 0,0,0,0);
-
- if (sourceCharset == CharInfo::UnknownCharSet &&
- (sourceType == REC_BYTE_F_ASCII ||
- sourceType == REC_BYTE_V_ASCII ||
- sourceType == REC_BYTE_V_ANSI))
- {
- // some clients aren't accustomed yet to setting the
- // charset of the SQL statement, treat those as
ISO88591
- // for single-byte based data types
- sourceCharset = CharInfo::ISO88591;
- }
-
- if (stmt_type == STATIC_STMT &&
- sourceCharset == CharInfo::ISO88591)
- {
- // our convention is to pass UTF-8 or UCS2 for
- // names in embedded programs. However, embedded
- // SQL does not support UTF-8 at this time.
- // Temporary fix: Set input charset to UTF-8
- // here until UTF-8 host varables are supported
- // in embedded programs (if that ever happens)
- sourceCharset = CharInfo::UTF8;
- }
- }
- else
- {
- diagsArea << DgSqlCode(-CLI_INVALID_OBJECTNAME);
- return ERROR;
- }
- source = (char *)var_ptr;
-
- if (DFS2REC::isSQLVarChar(sourceType))
- {
- // the first 2 bytes of data are actually the variable
- // length indicator
- short VCLen;
- str_cpy_all((char *) &VCLen, source, sizeof(short));
- sourceLen = (Lng32) VCLen;
- source = &source[sizeof(short)];
- }
-
- ComDiagsArea *diagsPtr = NULL;
- char targetName[ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES +
1];
- short retcode = convDoIt(source,
- sourceLen,
- (short) sourceType,
- 0,
- sourceCharset, // passed in as scale
- targetName,
- sizeof(targetName) - 1,
- REC_BYTE_V_ANSI, // short targetType
- 0, // Lng32 targetPrecision
- (Lng32) SQLCHARSETCODE_UTF8, // Lng32
targetScale - also used as targetCharSet for CharType
- 0,
- 0,
- &heap_,
- &diagsPtr);
- if (diagsPtr)
- {
- diagsArea.mergeAfter(*diagsPtr);
- diagsPtr->decrRefCount();
- diagsPtr = NULL;
- }
- if (retcode != ex_expr::EXPR_OK)
- {
- diagsArea << DgSqlCode(-EXE_CONVERT_STRING_ERROR);
- char hexstr[MAX_OFFENDING_SOURCE_DATA_DISPLAY_LEN];
- memset(hexstr, 0 , sizeof(hexstr) );
- diagsArea << DgString0(stringToHex(hexstr,
sizeof(hexstr), source, sourceLen ));
-
- return ERROR;
- }
-
- if (! fullyQualifiedSchemaName)
- fullyQualifiedSchemaName = lni->compileTimeAnsiName();
-
- curName = new (&heap_) AnsiOrNskName(targetName);
- if (curName->convertAnsiOrNskName())
- {
- diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
- <<DgString0(targetName);
- delete curName;
- return ERROR;
-
- }
- } // end if hvar
-
- if (NOT lni->isMPalias())
- {
- if (lni->isAvoidSimCheck()) // Host Variable in SET TABLE TIMEOUT
command without
- // Prototype clause will have the NAME_ONLY and VARIABLE bit is
set
- // So,we will need at least 3 parts
- // If nsk name, system name will be filled in
- {
- if (curName->extractParts(numParts, parts))
- {
- diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
- << DgString0(curName->getExternalName());
- delete curName;
- return ERROR;
- }
- if (numParts < 3)
- {
- diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
- << DgString0(curName->getExternalName());
- delete curName;
- return ERROR;
- }
- isNskName = curName->isNskName(); // will be set to False
- }
- else
- isNskName = TRUE;
- if (isNskName)
- {
- curAnsiName = curName->getInternalName();
- curAnsiNameLen = str_len(curAnsiName);
- if (curAnsiName[0] != '\\')
- {
-
- }
- }
- }
-
- if (lni->isMPalias() && NOT (lni->isAnsiPhySame())) {
- // if MPAlias flag is set, then we compiled for an MX table, or an
MP table with
- // a MPAlias defined on it See comment in GenRelMisc.cpp
PartitionAccess::codeGen
- // method where the mpalias flag is set. If that flag is set only
for an MPAlias
- // and not for MX tables then this else statement condition will
have to be modified.
- // The code below should be executed for MX tables and for MP
tables access through
- // their MPAlias. The code below is also not needed for accessing
resource forks whose
- // ansi name is same as their physical name.
-
- if (curName->extractParts(numParts, parts))
- {
- diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
- << DgString0(curName->getExternalName());
- delete curName;
- return ERROR;
- }
- if (numParts != 3) {
- if (curName->fillInMissingParts(fullyQualifiedSchemaName) == -1)
- {
- diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
- << DgString0(curName->getExternalName());
- delete curName;
- return ERROR;
- }
- } // end if numParts != 3
-
- } // end isMPAlias && isAnsiPhySame
-
- // if current ansi name is different than the last ansi name,
- // resolve the current ansi name. Remember this in lni.
- retCode = lni->getLastUsedName(&heap_)->equals(curName);
- if (retCode == -1)
- {
- diagsArea << DgSqlCode(-CLI_INTERNAL_ERROR);
- delete curName;
- return ERROR;
- }
- else
- if (retCode == 0)
- {
- lni->setAnsiNameChange(1);
- lni->setLastUsedName(curName);
- // If similarity check is to be avoided: Map the physical name
- // right here (otherwise done during the similarity check.)
- if ( lni->isAvoidSimCheck() )
- {
- if (! isNskName)
- {
- if (mapAnsiToGuaName(lni, diagsArea) == ERROR)
- return ERROR;
- }
- else
- strcpy(lni->resolvedPhyName(),
curName->getInternalName());
- doFixup = TRUE; // fixup needs to be done again to
- // be able to use the new name.
- }
- else
- if (NOT doSimCheck)
- {
- doSimCheck = TRUE;
-
- // if this is a view name that has changed, mark it
- // in the latenameinfo struct. This will be used later
- // when similarity check is done in
doQuerySimilarityCheck.
- if (lni->isView())
- {
- lni->setViewNameChange(1);
- }
- }
- }
- else
- {
- if (curName)
- {
- delete curName;
- curName = NULL;
- }
-
-
- }
-
- } // variable
-
- } // for
-
- if (NOT firstResolveDone())
- {
- setFirstResolveDone(TRUE);
- }
-
- return SUCCESS;
-}
-
-
-////////////////////////////////////////////////////////////////////
-// This method performs similarity check between the information
-// generated at compile time (si) and the information retrieved at
-// runtime for table specified via tableName.
-// RETURNS: ERROR: if an error occured.
-// SUCCESS, otherwise.
-// if sim check fails, then this is indicated by
-// the return param, simCheckFailed, set to TRUE.
-/////////////////////////////////////////////////////////////////////
-RETCODE Statement::doSimilarityCheck(SimilarityInfo * si,
- LateNameInfo * lni,
- char * tableName,
- NABoolean &simCheckFailed,
- ComDiagsArea &diagsArea
- )
-{
- return SUCCESS;
-}
+ simCheckFailed = FALSE;
+ Lng32 retcode = 0;
-RETCODE Statement::doIUDSimilarityCheck(SimilarityInfo * si,
- LateNameInfo * lni,
- char * tableName,
- Queue * indexInfoList,
- NABoolean &simCheckFailed,
- ComDiagsArea &diagsArea)
-{
- return SUCCESS;
-}
+ if ((si->hdfsRootDir() == NULL) || (si->modTS() == -1))
+ return SUCCESS;
-RETCODE Statement::getMatchingIndex(Queue * indexInfoList,
- char * indexAnsiName,
- char * indexPhyName,
- ComDiagsArea &diagsArea)
-{
- indexInfoList->position();
- for (Int32 i = 0; i < indexInfoList->numEntries(); i++)
+ Int64 failedModTS = -1;
+ Lng32 failedLocBufLen = 1000;
+ char failedLocBuf[failedLocBufLen];
+ retcode = ExpLOBinterfaceDataModCheck
+ (lobGlob,
+ si->hdfsRootDir(),
+ si->hdfsHostName(),
+ si->hdfsPort(),
+ si->modTS(),
+ si->numPartnLevels(),
+ failedModTS,
+ failedLocBuf, failedLocBufLen);
+ if (retcode < 0)
{
- IndexInfo * ii = (IndexInfo*)(indexInfoList->getNext());
-
- if (str_cmp(indexAnsiName, ii->indexAnsiName(),
- str_len(indexAnsiName)) == 0)
- {
- strcpy(indexPhyName, ii->indexPhyName());
- return SUCCESS;
- }
+ Lng32 intParam1 = -retcode;
+ diagsArea << DgSqlCode(-EXE_ERROR_FROM_LOB_INTERFACE)
+ << DgString0("HDFS")
+ << DgString1("ExpLOBInterfaceDataModCheck")
+ << DgString2(getLobErrStr(intParam1))
+ << DgInt0(intParam1)
+ << DgInt1(0);
+ return ERROR;
}
- return SUCCESS;
-}
-
-RETCODE Statement::mapAnsiToGuaName(LateNameInfo * lni,
- ComDiagsArea &diagsArea)
-{
- return ERROR;
-}
-
-// this method resolves all table names that are used in this query.
-RETCODE Statement::forceMapAllNames(LateNameInfoList * lnil,
- ComDiagsArea &diagsArea)
-{
- for (Int32 i = 0; i < (Int32) (lnil->getNumEntries()); i++)
+ if (retcode == 1) // check failed
{
- LateNameInfo * lni = &(lnil->getLateNameInfo(i));
-
- lni->setAnsiNameChange(1);
-
- char *lastUsedAnsiName = lni->lastUsedAnsiName();
-
- // NA_NSK_REL1 - need to change for ansi names
- NABoolean isGuardianName =
- (lastUsedAnsiName && lastUsedAnsiName[0] == '\\') ? TRUE : FALSE;
-
- if (lni->isAnsiPhySame() || isGuardianName)
- {
-
- // If ansi name is really a phys name, copy it to resolvedPhyName.
- // For the first release on NSK, all internal table names, such
as
- // histograms, histints, etc, will have resolved physical names
as
- // their ANSI name.
- str_cpy(lni->resolvedPhyName(), lni->lastUsedAnsiName(),
- str_len(lni->lastUsedAnsiName()) + 1, '\0');
- }
- else
- {
- // VO, Metadata indexes
- // If all of the following is true:
- // - the lni represents an index
- // - the query is from a system module,
- // THEN the generator has set the last used ansi name to the
- // compile time name of the base table. We will use that to find
- // the index runtime ANSI name.
- LateNameInfo * baseTableLni = NULL;
- if ( (lni->getNameSpace() == COM_INDEX_NAME) &&
- (systemModuleStmt())
- )
- {
- if (lni->isVariable())
- {
- // resolve names has put the runtime name of the table in
this lni
- baseTableLni = lni;
- }
- else
- {
- // iterate over the lnil to find the entry for the index'
base table
- for (Int32 ix = 0; ix < (Int32) (lnil->getNumEntries());
ix++)
- {
- LateNameInfo * lni2 = &(lnil->getLateNameInfo(ix));
- if (!strcmp (lni2->compileTimeAnsiName(),
lni->lastUsedAnsiName()))
- {
- baseTableLni = lni2;
- break;
- }
- }
- }
-
- if (baseTableLni == NULL)
- {
- // didn't find the base table in the lnil - quit!
- diagsArea << DgSqlCode(-CLI_INTERNAL_ERROR);
- return ERROR;
- }
-
- // Construct the runtime index name from
- // - the runtime catalog and schema name parts of the base
table
- // - the compile time object name part of the index
- char * indexParts[4];
- char * tableParts[4];
- Lng32 numIndexParts = -1, numTableParts = -1;
- AnsiOrNskName * indexCompileTimeAnsiName = new (&heap_)
AnsiOrNskName(lni->compileTimeAnsiName());
- baseTableLni->getLastUsedName(&heap_)->extractParts
(numTableParts, tableParts);
- indexCompileTimeAnsiName->extractParts (numIndexParts,
indexParts);
-
- if ( (numIndexParts != 3) || (numTableParts != 3))
- {
- // Something rotten here ...
- delete indexCompileTimeAnsiName;
- diagsArea << DgSqlCode(-CLI_INTERNAL_ERROR);
- return ERROR;
- }
-
- char
indexExtName[ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES + 1];
- ComBuildANSIName (tableParts[0], tableParts[1], indexParts[2],
indexExtName);
- delete indexCompileTimeAnsiName;
-
- AnsiOrNskName * indexName = new (&heap_)
AnsiOrNskName(indexExtName);
- lni->setLastUsedName (indexName);
- }
+ char errStr[200];
--- End diff --
200 won't be enough. Directory lengths can be up to 1K (well, there is a
bug in the LOB code that limits them to 256, but even at that, 200 isn't
enough).
> Move Similarity check for hive tables to master executor root operator
> ----------------------------------------------------------------------
>
> Key: TRAFODION-2263
> URL: https://issues.apache.org/jira/browse/TRAFODION-2263
> Project: Apache Trafodion
> Issue Type: Improvement
> Reporter: Anoop Sharma
> Assignee: Anoop Sharma
> Priority: Minor
>
> Query plans for hive tables include location of data directories/files
> and the plan depends on it.
> If data changes in a hive file/dir after a query has been compiled and
> before it is executed, then that change is detected at runtime by
> comparing compile time and runtime dir timestamps.
> A mismatch causes that query to recompile using AQR and a new
> plan is generated with latest dir location.
> This detection is currently being done at leaf operators (scan, insert) when
> that operator is executed.
> This jira is to move that check (called Similarity Check) to the root
> operator before starting query execution. That way plan consistency
> checks will be done before running the query and will not depend
> on the actual operator being executed. It will also result in checks not
> being done at various processes and operators like an ESP process.
> All checks will be done at one place in master executor and code will not
> be spread across multiple operators as it is now.
>
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)