[ 
https://issues.apache.org/jira/browse/TRAFODION-2263?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15552379#comment-15552379
 ] 

ASF GitHub Bot commented on TRAFODION-2263:
-------------------------------------------

Github user DaveBirdsall commented on a diff in the pull request:

    https://github.com/apache/incubator-trafodion/pull/746#discussion_r82220981
  
    --- Diff: core/sql/cli/Statement.cpp ---
    @@ -2102,775 +2071,109 @@ Statement * 
Statement::getCurrentOfCursorStatement(char * cursorName)
     
     }
     
    -///////////////////////////////////////////////////////////////////////
    -// RETURN: doSimCheck: if true, do similarity check.
    -//         doFixup:    if true and doSimCheck is false, do fixup again.
    -//                     Note: fixup is always done after sim check so if
    -//                           doSimCheck is returned as true, then doFixup
    -//                           is ignored.
    -///////////////////////////////////////////////////////////////////////
    -RETCODE Statement::resolveNames(LateNameInfoList * lnil,
    -                                Descriptor * inputDesc, 
    -                                ComDiagsArea &diagsArea,
    -                                NABoolean &doSimCheck,
    -                           NABoolean &doFixup)
    +RETCODE Statement::doHiveTableSimCheck(TrafSimilarityTableInfo *si,
    +                                       void * lobGlob,
    +                                       NABoolean &simCheckFailed,
    +                                       ComDiagsArea &diagsArea)
     {
    -  doSimCheck = FALSE;
    -  doFixup    = FALSE;
    -  Lng32 curAnsiNameLen = 0;
    -  char * curAnsiName = 0;
    -  NABoolean contextChanged = FALSE;
    -  AnsiOrNskName *curName = NULL;
    -  Int16 retCode;
    -  char * parts[4];
    -  Lng32 numParts;
    -  bool isNskName = FALSE;
    -  char * fullyQualifiedSchemaName = schemaName_;
    -
    -  if (lnil->definePresent())
    -    {
    -      // if this is the first time names are being resolved, then
    -      // do it even if the define context has not changed.
    -      unsigned short defcon = context_->getCurrentDefineContext();
    -      if ((NOT firstResolveDone()) ||
    -     (defcon != defineContext()))
    -   {
    -     contextChanged = TRUE;
    -   }
    -      defineContext() = defcon;
    -    }
    -  
    -  for (Int32 l = 0; l < (Int32) (lnil->getNumEntries()); l++)
    -    {
    -      LateNameInfo * lni = &(lnil->getLateNameInfo(l));
    -      
    -      lni->setAnsiNameChange(0);
    -      
    -      
    -      if (lni->isVariable())
    -   {
    -     
    -     if (lni->isDefine())
    -       {
    -       }
    -     else if (lni->isEnvVar())
    -       {
    -         // get name from env var.
    -         char envName[ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES+1];
    -         curAnsiName = cliGlobals_->getEnv(lni->variableName());
    -         
    -         if ((! curAnsiName)||
    -             ((curAnsiNameLen = str_len(curAnsiName)) >
    -              ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES))
    -           {
    -             
    -             diagsArea << DgSqlCode(-EXE_INVALID_DEFINE_OR_ENVVAR)
    -                       <<DgString0(lni->variableName());
    -             return ERROR;
    -           }
    -         str_cpy_all(envName,curAnsiName,str_len(curAnsiName));
    -         envName[curAnsiNameLen] = '\0';
    -         
    -         curName = new (&heap_) AnsiOrNskName(envName);
    -         if (curName->convertAnsiOrNskName())
    -           {
    -             diagsArea << 
DgSqlCode(-CLI_INVALID_SQL_ID)<<DgString0(envName);
    -             delete curName;
    -             return ERROR;
    -           }
    -       } //endif env var
    -     
    -     else if (lni->isCachedParam())
    -       {
    -         // cached params are only resolved on the first execute after
    -         // prepare. These values are set from constants and 
    -         // cannot change for multiple executions of the same stmt.
    -         if (firstResolveDone())
    -                continue; // Go back to the for loop to continue with the 
next lni
    -   
    -         curAnsiName = 
    -           &root_tdb->getParameterBuffer()
    -           [lni->getCachedParamOffset()];
    -         curName = new (&heap_) AnsiOrNskName(curAnsiName);
    -         if (curName->convertAnsiOrNskName())
    -           {
    -             diagsArea << 
DgSqlCode(-CLI_INVALID_SQL_ID)<<DgString0(curAnsiName);
    -             delete curName;
    -             return ERROR;
    -           }
    -
    -       } //endif cached param
    -     else // hvar or param
    -       {
    -         // Here we need to validate what the hvar contains
    -         // We need to make sure it is a string hvar and we call
    -         // convDoIt so that we only get the length of the
    -         // string that we need.
    -         
    -         char *source= 0;
    -         Lng32 sourceLen = 0;
    -         Lng32 sourceType = 0;
    -              Lng32 sourceCharset = CharInfo::UnknownCharSet;
    -         void *var_ptr = 0;
    -
    -         inputDesc->getDescItem(lni->getInputListIndex(), 
    -                                SQLDESC_VAR_PTR, &var_ptr, 
    -                                0, 0, 0, 0);
    -         inputDesc->getDescItem(lni->getInputListIndex(),
    -                                SQLDESC_LENGTH, &sourceLen,
    -                                0,0,0,0);
    -         
    -         inputDesc->getDescItem(lni->getInputListIndex(),
    -                                SQLDESC_TYPE_FS, &sourceType,
    -                                0,0,0,0);
    -         
    -         // Check if input is a string type
    -         
    -         if ((sourceType>=REC_MIN_CHARACTER) &&
    -                  (sourceType <= REC_MAX_CHARACTER))
    -                {
    -                  inputDesc->getDescItem(lni->getInputListIndex(),
    -                                         SQLDESC_CHAR_SET, &sourceCharset,
    -                                         0,0,0,0);
    -
    -                  if (sourceCharset == CharInfo::UnknownCharSet &&
    -                      (sourceType == REC_BYTE_F_ASCII ||
    -                       sourceType == REC_BYTE_V_ASCII ||
    -                       sourceType == REC_BYTE_V_ANSI))
    -                    {
    -                      // some clients aren't accustomed yet to setting the
    -                      // charset of the SQL statement, treat those as 
ISO88591
    -                      // for single-byte based data types
    -                      sourceCharset = CharInfo::ISO88591;
    -                    }
    -
    -                  if (stmt_type == STATIC_STMT &&
    -                      sourceCharset == CharInfo::ISO88591)
    -                    {
    -                      // our convention is to pass UTF-8 or UCS2 for
    -                      // names in embedded programs. However, embedded
    -                      // SQL does not support UTF-8 at this time.
    -                      // Temporary fix: Set input charset to UTF-8
    -                      // here until UTF-8 host varables are supported
    -                      // in embedded programs (if that ever happens)
    -                      sourceCharset = CharInfo::UTF8;
    -                    }
    -                }
    -              else
    -           {
    -             diagsArea << DgSqlCode(-CLI_INVALID_OBJECTNAME);
    -             return ERROR; 
    -           }
    -         source = (char *)var_ptr;
    -         
    -         if (DFS2REC::isSQLVarChar(sourceType)) 
    -           {
    -             // the first 2 bytes of data are actually the variable 
    -             // length indicator
    -             short VCLen;
    -             str_cpy_all((char *) &VCLen, source, sizeof(short));
    -             sourceLen = (Lng32) VCLen;
    -             source = &source[sizeof(short)];
    -           }
    -         
    -         ComDiagsArea *diagsPtr = NULL;
    -         char targetName[ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES + 
1];
    -         short retcode = convDoIt(source,
    -                                  sourceLen,
    -                                  (short) sourceType,
    -                                  0,
    -                                  sourceCharset,   // passed in as scale
    -                                  targetName,
    -                                  sizeof(targetName) - 1,
    -                                  REC_BYTE_V_ANSI, // short targetType
    -                                  0,               // Lng32 targetPrecision
    -                                  (Lng32) SQLCHARSETCODE_UTF8, // Lng32 
targetScale - also used as targetCharSet for CharType
    -                                  0,
    -                                  0,
    -                                  &heap_,
    -                                  &diagsPtr);
    -         if (diagsPtr)
    -           {
    -             diagsArea.mergeAfter(*diagsPtr);
    -             diagsPtr->decrRefCount();
    -                  diagsPtr = NULL;
    -           }
    -         if (retcode != ex_expr::EXPR_OK)
    -           {
    -             diagsArea << DgSqlCode(-EXE_CONVERT_STRING_ERROR);
    -                  char hexstr[MAX_OFFENDING_SOURCE_DATA_DISPLAY_LEN];
    -                  memset(hexstr, 0 , sizeof(hexstr) );
    -                  diagsArea << DgString0(stringToHex(hexstr, 
sizeof(hexstr), source, sourceLen ));
    -
    -             return ERROR;  
    -           }
    -
    -         if (! fullyQualifiedSchemaName)
    -           fullyQualifiedSchemaName = lni->compileTimeAnsiName();
    -
    -         curName = new (&heap_) AnsiOrNskName(targetName);
    -         if (curName->convertAnsiOrNskName())
    -           {
    -             diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
    -                       <<DgString0(targetName);
    -             delete curName;
    -             return ERROR;
    -             
    -           }
    -       } // end if hvar 
    -
    -             if (NOT lni->isMPalias())
    -       {
    -         if (lni->isAvoidSimCheck()) // Host Variable in SET TABLE TIMEOUT 
command without
    -           // Prototype clause will have the NAME_ONLY and VARIABLE bit is 
set
    -           // So,we will need at least 3 parts
    -           // If nsk name, system name will be filled in
    -           {
    -             if (curName->extractParts(numParts, parts))
    -               {
    -                 diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
    -                           << DgString0(curName->getExternalName());
    -                 delete curName;
    -                 return ERROR;
    -               }
    -             if (numParts < 3)
    -               {
    -                 diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
    -                           << DgString0(curName->getExternalName());
    -                 delete curName;
    -                 return ERROR;
    -               }
    -             isNskName = curName->isNskName(); // will be set to False
    -           }
    -         else
    -           isNskName = TRUE;
    -         if (isNskName)
    -           {
    -             curAnsiName = curName->getInternalName();
    -             curAnsiNameLen = str_len(curAnsiName);
    -             if (curAnsiName[0] != '\\')
    -               {
    -                 
    -               }
    -           }
    -       }
    -
    -     if (lni->isMPalias() &&  NOT (lni->isAnsiPhySame())) {
    -       // if MPAlias flag is set,  then we compiled for an MX table, or an 
MP table with 
    -       // a MPAlias defined on it See comment in GenRelMisc.cpp 
PartitionAccess::codeGen 
    -       // method where the mpalias flag is set. If that flag is set only 
for an MPAlias
    -       // and not for MX tables then this else statement condition will 
have to be modified.
    -       // The code below should be executed for MX tables and for MP 
tables access through
    -       // their MPAlias. The code below is also not needed for accessing 
resource forks whose
    -       // ansi name is same as their physical name.
    -       
    -       if (curName->extractParts(numParts, parts))
    -         {
    -           diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
    -                     << DgString0(curName->getExternalName());
    -           delete curName;
    -           return ERROR;
    -         }
    -       if (numParts != 3) {
    -         if (curName->fillInMissingParts(fullyQualifiedSchemaName) == -1)
    -           {
    -             diagsArea << DgSqlCode(-CLI_INVALID_SQL_ID)
    -                       << DgString0(curName->getExternalName());
    -             delete curName;
    -             return ERROR;
    -           }
    -       } // end if numParts != 3
    -       
    -     } // end isMPAlias && isAnsiPhySame
    -     
    -     // if current ansi name is different than the last ansi name, 
    -     // resolve the current ansi name. Remember this in lni.
    -     retCode = lni->getLastUsedName(&heap_)->equals(curName);
    -     if (retCode == -1)
    -       {
    -         diagsArea << DgSqlCode(-CLI_INTERNAL_ERROR);
    -         delete curName;
    -         return ERROR;
    -       }
    -     else
    -       if (retCode == 0)
    -         {
    -           lni->setAnsiNameChange(1);
    -           lni->setLastUsedName(curName);
    -           // If similarity check is to be avoided: Map the physical name
    -           // right here (otherwise done during the similarity check.)
    -           if ( lni->isAvoidSimCheck() ) 
    -             {
    -               if (! isNskName)
    -                 {
    -                   if (mapAnsiToGuaName(lni, diagsArea) == ERROR)
    -                     return ERROR;
    -                 }
    -               else
    -                 strcpy(lni->resolvedPhyName(), 
curName->getInternalName());
    -               doFixup = TRUE; // fixup needs to be done again to 
    -               // be able to use the new name.
    -             }
    -           else
    -             if (NOT doSimCheck)
    -               {
    -                 doSimCheck = TRUE;
    -                 
    -                 // if this is a view name that has changed, mark it
    -                 // in the latenameinfo struct. This will be used later
    -                 // when similarity check is done in 
doQuerySimilarityCheck.
    -                 if (lni->isView())
    -                   {
    -                     lni->setViewNameChange(1);
    -                   }
    -               }
    -         }
    -       else
    -         {
    -           if (curName)
    -             {
    -               delete curName;
    -               curName = NULL;
    -             }
    -
    -
    -         }
    -     
    -   } // variable
    -      
    -    } // for
    -  
    -  if (NOT firstResolveDone())
    -    {
    -      setFirstResolveDone(TRUE);
    -    }
    -  
    -  return SUCCESS;
    -}
    -
    -
    -////////////////////////////////////////////////////////////////////
    -// This method performs similarity check between the information
    -// generated at compile time (si) and the information retrieved at
    -// runtime for table specified via tableName.
    -// RETURNS: ERROR:   if an error occured.
    -//          SUCCESS, otherwise.
    -//                   if sim check fails, then this is indicated by
    -//                   the return param, simCheckFailed, set to TRUE.      
    -/////////////////////////////////////////////////////////////////////
    -RETCODE Statement::doSimilarityCheck(SimilarityInfo * si,
    -                                LateNameInfo * lni,
    -                                char * tableName,
    -                                NABoolean &simCheckFailed,
    -                                ComDiagsArea &diagsArea
    -                                )
    -{
    -  return SUCCESS;
    -}
    +  simCheckFailed = FALSE;
    +  Lng32 retcode = 0;
     
    -RETCODE Statement::doIUDSimilarityCheck(SimilarityInfo * si,
    -                                   LateNameInfo * lni,
    -                                   char * tableName,
    -                                   Queue * indexInfoList,
    -                                   NABoolean &simCheckFailed,  
    -                                   ComDiagsArea &diagsArea)
    -{
    -  return SUCCESS;
    -}
    +  if ((si->hdfsRootDir() == NULL) || (si->modTS() == -1))
    +    return SUCCESS;
     
    -RETCODE Statement::getMatchingIndex(Queue * indexInfoList,
    -                               char * indexAnsiName,
    -                               char * indexPhyName,
    -                               ComDiagsArea &diagsArea)
    -{
    -  indexInfoList->position();
    -  for (Int32 i = 0; i < indexInfoList->numEntries(); i++)
    +  Int64 failedModTS = -1;
    +  Lng32 failedLocBufLen = 1000;
    +  char failedLocBuf[failedLocBufLen];
    +  retcode = ExpLOBinterfaceDataModCheck
    +    (lobGlob,
    +     si->hdfsRootDir(),
    +     si->hdfsHostName(),
    +     si->hdfsPort(),
    +     si->modTS(),
    +     si->numPartnLevels(),
    +     failedModTS,
    +     failedLocBuf, failedLocBufLen);
    +  if (retcode < 0)
         {
    -      IndexInfo * ii = (IndexInfo*)(indexInfoList->getNext());
    -
    -      if (str_cmp(indexAnsiName, ii->indexAnsiName(),
    -             str_len(indexAnsiName)) == 0)
    -   {
    -     strcpy(indexPhyName, ii->indexPhyName());
    -     return SUCCESS;
    -   }
    +      Lng32 intParam1 = -retcode;
    +      diagsArea << DgSqlCode(-EXE_ERROR_FROM_LOB_INTERFACE)
    +                << DgString0("HDFS")
    +                << DgString1("ExpLOBInterfaceDataModCheck")
    +                << DgString2(getLobErrStr(intParam1))
    +                << DgInt0(intParam1)
    +                << DgInt1(0);
    +      return ERROR;
         }
     
    -  return SUCCESS;
    -}
    -  
    -RETCODE Statement::mapAnsiToGuaName(LateNameInfo * lni,
    -                               ComDiagsArea &diagsArea)
    -{
    -  return ERROR;
    -}
    -
    -// this method resolves all table names that are used in this query.
    -RETCODE Statement::forceMapAllNames(LateNameInfoList * lnil,
    -                               ComDiagsArea &diagsArea)
    -{
    -  for (Int32 i = 0; i < (Int32) (lnil->getNumEntries()); i++)
    +  if (retcode == 1) // check failed
         {
    -      LateNameInfo * lni = &(lnil->getLateNameInfo(i));
    -
    -      lni->setAnsiNameChange(1);
    -
    -      char *lastUsedAnsiName = lni->lastUsedAnsiName();
    -
    -      // NA_NSK_REL1 -  need to change for ansi names 
    -      NABoolean isGuardianName =
    -         (lastUsedAnsiName && lastUsedAnsiName[0] == '\\') ? TRUE : FALSE;
    -
    -      if (lni->isAnsiPhySame() || isGuardianName)
    -        {
    -
    -     // If ansi name is really a phys name, copy it to resolvedPhyName.
    -          // For the first release on NSK, all internal table names, such 
as
    -          // histograms, histints, etc, will have resolved physical names 
as
    -          // their ANSI name.
    -     str_cpy(lni->resolvedPhyName(), lni->lastUsedAnsiName(),
    -             str_len(lni->lastUsedAnsiName()) + 1, '\0');
    -   }
    -      else 
    -   {
    -          // VO, Metadata indexes
    -          // If all of the following is true:
    -          // - the lni represents an index
    -          // - the query is from a system module,
    -          // THEN the generator has set the last used ansi name to the 
    -          // compile time name of the base table. We will use that to find
    -          // the index runtime ANSI name.
    -          LateNameInfo * baseTableLni = NULL;
    -          if ( (lni->getNameSpace() == COM_INDEX_NAME)      &&
    -               (systemModuleStmt())
    -             )
    -          {
    -            if (lni->isVariable())
    -            {
    -              // resolve names has put the runtime name of the table in 
this lni
    -              baseTableLni = lni;
    -            }
    -            else
    -            {
    -              // iterate over the lnil to find the entry for the index' 
base table 
    -              for (Int32 ix = 0; ix < (Int32) (lnil->getNumEntries()); 
ix++)
    -              {
    -                LateNameInfo * lni2 = &(lnil->getLateNameInfo(ix));
    -                if (!strcmp (lni2->compileTimeAnsiName(), 
lni->lastUsedAnsiName()))
    -                {              
    -                    baseTableLni = lni2;
    -                    break;
    -                }
    -              }
    -            }
    -
    -            if (baseTableLni == NULL)
    -            {
    -              // didn't find the base table in the lnil - quit!
    -              diagsArea << DgSqlCode(-CLI_INTERNAL_ERROR);
    -              return ERROR;
    -            }
    -
    -            // Construct the runtime index name from 
    -            // - the runtime catalog and schema name parts of the base 
table
    -            // - the compile time object name part of the index
    -            char * indexParts[4];
    -            char * tableParts[4];
    -            Lng32 numIndexParts = -1, numTableParts = -1;
    -            AnsiOrNskName * indexCompileTimeAnsiName = new (&heap_) 
AnsiOrNskName(lni->compileTimeAnsiName());
    -            baseTableLni->getLastUsedName(&heap_)->extractParts 
(numTableParts, tableParts);
    -            indexCompileTimeAnsiName->extractParts (numIndexParts, 
indexParts);
    -
    -            if ( (numIndexParts != 3) || (numTableParts != 3))
    -            {
    -              // Something rotten here ...
    -              delete indexCompileTimeAnsiName;
    -              diagsArea << DgSqlCode(-CLI_INTERNAL_ERROR);
    -              return ERROR;
    -            }
    -
    -            char  
indexExtName[ComMAX_3_PART_EXTERNAL_UTF8_NAME_LEN_IN_BYTES + 1]; 
    -            ComBuildANSIName (tableParts[0], tableParts[1], indexParts[2], 
indexExtName);
    -            delete indexCompileTimeAnsiName;
    -
    -            AnsiOrNskName * indexName = new (&heap_) 
AnsiOrNskName(indexExtName);
    -            lni->setLastUsedName (indexName);
    -          }
    +      char errStr[200];
    --- End diff --
    
    200 won't be enough. Directory lengths can be up to 1K (well, there is a 
bug in the LOB code that limits them to 256, but even at that, 200 isn't 
enough).


> Move Similarity check for hive tables to master executor root operator
> ----------------------------------------------------------------------
>
>                 Key: TRAFODION-2263
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-2263
>             Project: Apache Trafodion
>          Issue Type: Improvement
>            Reporter: Anoop Sharma
>            Assignee: Anoop Sharma
>            Priority: Minor
>
> Query plans for hive tables include location of data directories/files
> and the plan depends on it.
> If data changes in a hive file/dir after a query has been compiled and
> before it is executed, then that change is detected at runtime by
> comparing compile time and runtime dir timestamps.
> A mismatch causes that query to recompile using AQR and a new
> plan is generated with latest dir location.
> This detection is currently being done at leaf operators (scan, insert) when
> that operator is executed.
> This jira is to move that check (called Similarity Check) to the root 
> operator before starting query execution. That way plan consistency
> checks will be done before running the query and will not depend
> on the actual operator being executed. It will also result in checks not
> being done at various processes and operators like an ESP process.
> All checks will be done at one place in master executor and code will not
> be spread across multiple operators as it is now.
>  



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to