fix more according to jira 1720 review comments
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/82b256c0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/82b256c0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/82b256c0 Branch: refs/heads/master Commit: 82b256c00859ae80fb83351c940bddb12ce1e15b Parents: 3efa731 Author: Cloud User <[email protected]> Authored: Wed Jan 13 13:54:21 2016 +0000 Committer: Cloud User <[email protected]> Committed: Wed Jan 13 13:54:21 2016 +0000 ---------------------------------------------------------------------- core/sql/common/csconvert.cpp | 12 +++--------- core/sql/exp/exp_conv.cpp | 17 ++++++----------- core/sql/optimizer/SynthType.cpp | 10 ++++++++-- core/sql/sqlcomp/nadefaults.cpp | 2 +- 4 files changed, 18 insertions(+), 23 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/common/csconvert.cpp ---------------------------------------------------------------------- diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp index a18b1c3..438ee8b 100644 --- a/core/sql/common/csconvert.cpp +++ b/core/sql/common/csconvert.cpp @@ -1279,8 +1279,9 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer ) rtnv-- ; return rtnv ; } + /* A method to do character set conversion , using Glibc iconv */ -int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen) +static int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen) { iconv_t cd; int rc; @@ -1298,23 +1299,16 @@ int code_convert(const char *from_charset,const char *to_charset,char *inbuf, si iconv_close(cd); return outlen; } -/* from gbk to utf8 */ -int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen) -{ - return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen); -} /* convert gbk string into UTF8 */ int gbkToUtf8(char* gbkString, size_t gbklen, char* result ,size_t outlen, bool addNullAtEnd) { int originalOutlen = outlen; - int finalLength = gbk2utf8 ( gbkString, gbklen, result, outlen); + int finalLength = code_convert( "gbk","utf-8", gbkString, gbklen, result, outlen); if (finalLength == -1 ) return 0; - //the result is allocated with lenght originalOutlen + 1 - //so no overrun is possible if ( addNullAtEnd ) { if(originalOutlen >= finalLength ) http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/exp/exp_conv.cpp ---------------------------------------------------------------------- diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp index 319727f..b2a11cd 100644 --- a/core/sql/exp/exp_conv.cpp +++ b/core/sql/exp/exp_conv.cpp @@ -9322,18 +9322,18 @@ convDoIt(char * source, break; // gb2312 -> utf8 +// JIRA 1720 case CONV_GBK_F_UTF8_V: { - char * targetbuf = new char[sourceLen*4+1]; - size_t sl = sourceLen; - int convLen = gbkToUtf8( source, sl, targetbuf, sl*4); int copyLen = 0; + int convLen = gbkToUtf8( source, sourceLen, target, targetLen); if (convLen > 0) { - copyLen = (convLen< targetLen) ? convLen: targetLen; - str_cpy_all(target, targetbuf, copyLen); + copyLen = convLen; //if the target length is not enough, instead of truncate, raise a SQL Error if (convLen > targetLen) - ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW); + ExRaiseSqlError(heap, diagsArea, EXE_STRING_OVERFLOW); + if ( varCharLen ) + setVCLength(varCharLen, varCharLenSize, copyLen); } else { // LCOV_EXCL_START @@ -9341,11 +9341,6 @@ convDoIt(char * source, copyLen = 0; // LCOV_EXCL_STOP } - - if ( varCharLen ) - setVCLength(varCharLen, varCharLenSize, copyLen); - delete targetbuf; - }; break; // 5/10/98: sjis -> unicode http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/optimizer/SynthType.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp index 56e341f..7d1c8a9 100644 --- a/core/sql/optimizer/SynthType.cpp +++ b/core/sql/optimizer/SynthType.cpp @@ -5208,11 +5208,17 @@ const NAType *Translate::synthesizeType() break; case GBK_TO_UTF8: - if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet() == CharInfo::UnknownCharSet ) + if (translateSource->getCharSet() == CharInfo::GBK ) charsetTarget = CharInfo::UTF8; else { - if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) ) + /* this is a solution to support GBK before Trafodion can support GBK in total + * see jira 1720 for more details + * the logic here is: + * when HIVE_FILE_CHARSET is not empty, it means the real charset in Hive table is not same as HIVE_DEFAULT_CHARSET + * in this case, allow the converting , ignoring the source charset checking above + */ + if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == "" ) //CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) ) err4106arg = SQLCHARSETCODE_GB2312; else charsetTarget = CharInfo::UTF8; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/82b256c0/core/sql/sqlcomp/nadefaults.cpp ---------------------------------------------------------------------- diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp index cfe2cf3..20152d6 100644 --- a/core/sql/sqlcomp/nadefaults.cpp +++ b/core/sql/sqlcomp/nadefaults.cpp @@ -1958,7 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS, "OFF"), DDkwd__(HIVE_DEFAULT_CHARSET, (char *)SQLCHARSETSTRING_UTF8), DD_____(HIVE_DEFAULT_SCHEMA, "HIVE"), - DD_____(HIVE_FILE_CHARSET, (char *)SQLCHARSETSTRING_UTF8), + DD_____(HIVE_FILE_CHARSET, ""), DD_____(HIVE_FILE_NAME, "/hive/tpcds/customer/customer.dat" ), DD_____(HIVE_HDFS_STATS_LOG_FILE, ""), DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE, "-1"),
