Repository: incubator-trafodion Updated Branches: refs/heads/master aa3deffb1 -> 9cc1e835f
for jira 1720, add support to convert gbk into utf8 Project: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/commit/d51d2016 Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/tree/d51d2016 Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/diff/d51d2016 Branch: refs/heads/master Commit: d51d2016d29725e3d863b351b3a548ee1b325c0c Parents: 141f354 Author: Cloud User <[email protected]> Authored: Sat Jan 9 11:43:21 2016 +0000 Committer: Cloud User <[email protected]> Committed: Sat Jan 9 11:43:21 2016 +0000 ---------------------------------------------------------------------- core/sql/common/csconvert.cpp | 42 ++++++++++++++++++++++++++++++++ core/sql/common/csconvert.h | 4 +++ core/sql/exp/exp_clause_derived.h | 4 ++- core/sql/exp/exp_conv.cpp | 26 ++++++++++++++++++++ core/sql/generator/GenItemFunc.cpp | 3 +++ core/sql/optimizer/ItemExpr.cpp | 2 ++ core/sql/optimizer/ItemFunc.h | 1 + core/sql/optimizer/SynthType.cpp | 12 +++++++++ core/sql/sqlcomp/DefaultConstants.h | 3 +++ core/sql/sqlcomp/nadefaults.cpp | 2 ++ 10 files changed, 98 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.cpp ---------------------------------------------------------------------- diff --git a/core/sql/common/csconvert.cpp b/core/sql/common/csconvert.cpp index ffc5370..b0d21af 100644 --- a/core/sql/common/csconvert.cpp +++ b/core/sql/common/csconvert.cpp @@ -30,6 +30,10 @@ // but also used by the ODBC build and maybe others. #include <limits.h> +#include <iconv.h> +#include <stdio.h> +#include <stdlib.h> + #include "multi-byte.h" #include "fcconv.h" #include "csconvert.h" @@ -1275,3 +1279,41 @@ char * findStartOfChar( char *someByteInChar, char *startOfBuffer ) rtnv-- ; return rtnv ; } +/* A method to do character set conversion , using Glibc iconv */ +int code_convert(const char *from_charset,const char *to_charset,char *inbuf, size_t inlen, char *outbuf,size_t outlen) +{ + iconv_t cd; + int rc; + char **pin = &inbuf; + char **pout = &outbuf; + + cd = iconv_open(to_charset,from_charset); + if (cd==0) return -1; + memset(outbuf,0,outlen); + if (iconv(cd,pin,(size_t*)&inlen,pout,(size_t *)&outlen)==-1) + { + iconv_close(cd); + return -1; + } + iconv_close(cd); + return outlen; +} +/* from gbk to utf8 */ +int gbk2utf8(char *inbuf,size_t inlen,char *outbuf,size_t outlen) +{ + return code_convert("gbk","utf-8",inbuf,inlen,outbuf,outlen); +} + +int gbkToUtf8(char* gbkString, size_t gbklen, + char* result ,size_t outlen, int addNullAtEnd) +{ + + int finalLength = gbk2utf8 ( gbkString, gbklen, result, outlen); + + if (finalLength == -1 ) return 0; + + if ( addNullAtEnd > 0 ) + result[finalLength] = 0; + + return finalLength; +} http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/common/csconvert.h ---------------------------------------------------------------------- diff --git a/core/sql/common/csconvert.h b/core/sql/common/csconvert.h index 56c9603..57fec71 100644 --- a/core/sql/common/csconvert.h +++ b/core/sql/common/csconvert.h @@ -106,6 +106,10 @@ int UTF16ToLocale( const enum cnv_version version, unsigned int * translated_char_cnt_p = NULL , const char *substitution_char = NULL ); +NA_EIDPROC +int gbkToUtf8(char* gbkString, size_t gbklen, + char* result ,size_t outlen, int addNullAtEnd=FALSE); + /* * LocaleCharToUCS4() converts the FIRST char in the input string to its * UCS4 value. Returns the UCS4 value at location specified AND the http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_clause_derived.h ---------------------------------------------------------------------- diff --git a/core/sql/exp/exp_clause_derived.h b/core/sql/exp/exp_clause_derived.h index fc6ecfe..4558431 100644 --- a/core/sql/exp/exp_clause_derived.h +++ b/core/sql/exp/exp_clause_derived.h @@ -1542,7 +1542,9 @@ enum conv_case_index { CONV_UTF8_F_UCS2_V =248, CONV_BLOB_BLOB =249, - CONV_BLOB_ASCII_F =250 + CONV_BLOB_ASCII_F =250, + + CONV_GBK_F_UTF8_V =251 }; class SQLEXP_LIB_FUNC ex_conv_clause : public ex_clause { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/exp/exp_conv.cpp ---------------------------------------------------------------------- diff --git a/core/sql/exp/exp_conv.cpp b/core/sql/exp/exp_conv.cpp index 8c31efc..015ec0f 100644 --- a/core/sql/exp/exp_conv.cpp +++ b/core/sql/exp/exp_conv.cpp @@ -9321,6 +9321,32 @@ convDoIt(char * source, }; break; +// gb2312 -> utf8 + case CONV_GBK_F_UTF8_V: + { + char * targetbuf = new char[sourceLen*4+1]; + size_t sl = sourceLen; + int convLen = gbkToUtf8( source, sl, targetbuf, sl*4); + int copyLen = 0; + if (convLen > 0) { + copyLen = (convLen< targetLen) ? convLen: targetLen; + str_cpy_all(target, targetbuf, copyLen); + // if (convLen > targetLen) + + } + else { + // LCOV_EXCL_START + convLen = 0; + copyLen = 0; + // LCOV_EXCL_STOP + } + + if ( varCharLen ) + setVCLength(varCharLen, varCharLenSize, copyLen); + delete targetbuf; + + }; + break; // 5/10/98: sjis -> unicode case CONV_SJIS_F_UNICODE_F: case CONV_SJIS_F_UNICODE_V: http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/generator/GenItemFunc.cpp ---------------------------------------------------------------------- diff --git a/core/sql/generator/GenItemFunc.cpp b/core/sql/generator/GenItemFunc.cpp index c7e6748..8c8f7e6 100644 --- a/core/sql/generator/GenItemFunc.cpp +++ b/core/sql/generator/GenItemFunc.cpp @@ -1737,6 +1737,9 @@ short Translate::codeGen(Generator * generator) case UCS2_TO_UTF8: convType = CONV_UCS2_F_UTF8_V; break; + case GBK_TO_UTF8: + convType = CONV_GBK_F_UTF8_V; + break; case UNICODE_TO_ISO88591: convType = CONV_UNICODE_F_ASCII_V; break; http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemExpr.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ItemExpr.cpp b/core/sql/optimizer/ItemExpr.cpp index 41b6b76..7df1a7e 100644 --- a/core/sql/optimizer/ItemExpr.cpp +++ b/core/sql/optimizer/ItemExpr.cpp @@ -13408,6 +13408,8 @@ Translate::Translate(ItemExpr *valPtr, NAString* map_table_name) map_table_id_ = Translate::SJIS_TO_UTF8; else if ( _strcmpi(map_table_name->data(), "UTF8TOSJIS") == 0 ) map_table_id_ = Translate::UTF8_TO_SJIS; + else if ( _strcmpi(map_table_name->data(), "GBKTOUTF8") == 0 ) + map_table_id_ = Translate::GBK_TO_UTF8; else if ( _strcmpi(map_table_name->data(), "KANJITOISO88591") == 0 ) http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/ItemFunc.h ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/ItemFunc.h b/core/sql/optimizer/ItemFunc.h index 3bb7238..d794a3d 100644 --- a/core/sql/optimizer/ItemFunc.h +++ b/core/sql/optimizer/ItemFunc.h @@ -2112,6 +2112,7 @@ public: UTF8_TO_SJIS, SJIS_TO_UTF8, UTF8_TO_ISO88591, ISO88591_TO_UTF8, KANJI_MP_TO_ISO88591, KSC5601_MP_TO_ISO88591, + GBK_TO_UTF8, UNKNOWN_TRANSLATION}; Translate(ItemExpr *valPtr, NAString* map_table_name); http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/optimizer/SynthType.cpp ---------------------------------------------------------------------- diff --git a/core/sql/optimizer/SynthType.cpp b/core/sql/optimizer/SynthType.cpp index b9d5518..56e341f 100644 --- a/core/sql/optimizer/SynthType.cpp +++ b/core/sql/optimizer/SynthType.cpp @@ -5207,6 +5207,18 @@ const NAType *Translate::synthesizeType() err4106arg = SQLCHARSETSTRING_UTF8; break; + case GBK_TO_UTF8: + if (translateSource->getCharSet() == CharInfo::GBK || translateSource->getCharSet() == CharInfo::UnknownCharSet ) + charsetTarget = CharInfo::UTF8; + else + { + if( CmpCommon::getDefaultString(HIVE_FILE_CHARSET) == CmpCommon::getDefaultString(HIVE_DEFAULT_CHARSET) ) + err4106arg = SQLCHARSETCODE_GB2312; + else + charsetTarget = CharInfo::UTF8; + } + break; + case ISO88591_TO_UTF8: if (translateSource->getCharSet() == CharInfo::ISO88591) { http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/DefaultConstants.h ---------------------------------------------------------------------- diff --git a/core/sql/sqlcomp/DefaultConstants.h b/core/sql/sqlcomp/DefaultConstants.h index 2778197..d10acc8 100644 --- a/core/sql/sqlcomp/DefaultConstants.h +++ b/core/sql/sqlcomp/DefaultConstants.h @@ -3785,6 +3785,9 @@ enum DefaultConstants // set to ON to aggressively allocate ESP per core AGGRESSIVE_ESP_ALLOCATION_PER_CORE, + // real charset in the HIVE table + HIVE_FILE_CHARSET, + // This enum constant must be the LAST one in the list; it's a count, // not an Attribute (it's not IN DefaultDefaults; it's the SIZE of it)! __NUM_DEFAULT_ATTRIBUTES http://git-wip-us.apache.org/repos/asf/incubator-trafodion/blob/d51d2016/core/sql/sqlcomp/nadefaults.cpp ---------------------------------------------------------------------- diff --git a/core/sql/sqlcomp/nadefaults.cpp b/core/sql/sqlcomp/nadefaults.cpp index f6d5604..cfe2cf3 100644 --- a/core/sql/sqlcomp/nadefaults.cpp +++ b/core/sql/sqlcomp/nadefaults.cpp @@ -1958,6 +1958,7 @@ SDDkwd__(EXE_DIAGNOSTIC_EVENTS, "OFF"), DDkwd__(HIVE_DEFAULT_CHARSET, (char *)SQLCHARSETSTRING_UTF8), DD_____(HIVE_DEFAULT_SCHEMA, "HIVE"), + DD_____(HIVE_FILE_CHARSET, (char *)SQLCHARSETSTRING_UTF8), DD_____(HIVE_FILE_NAME, "/hive/tpcds/customer/customer.dat" ), DD_____(HIVE_HDFS_STATS_LOG_FILE, ""), DDint__(HIVE_LIB_HDFS_PORT_OVERRIDE, "-1"), @@ -6390,6 +6391,7 @@ DefaultToken NADefaults::token(Int32 attrEnum, else { if ((attrEnum == TERMINAL_CHARSET) || (attrEnum == USE_HIVE_SOURCE) || + (attrEnum == HIVE_FILE_CHARSET) || (attrEnum == HBASE_DATA_BLOCK_ENCODING_OPTION) || (attrEnum == HBASE_COMPRESSION_OPTION)) return DF_USER;
