Hi Robert, Here is a tiny submission that allow the user to convert an extended 8-bit ASCII std::string to an UTF8 std::string, and vice-versa. Why? Because of the FBX plugin I'll submit in a few hours (I hope).
BTW, should this header be moved in osg instead of osgDB? Actually conversion from and to ASCII/UTF8/UTF16 may be used from osgDB, osgText and so on. Well, just my two cents. Another remark: I saw conversions from/to UTF16 are Windows only. This may be addressed... Cheers, Sukender PVLE - Lightweight cross-platform game engine - http://pvle.sourceforge.net/
/* -*-c++-*- OpenSceneGraph - Copyright (C) 2008 Robert Osfield
*
* This library is open source and may be redistributed and/or modified under
* the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or
* (at your option) any later version. The full license is in LICENSE file
* included with this distribution, and on the openscenegraph.org website.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* OpenSceneGraph Public License for more details.
*/
#include <osgDB/ConvertUTF>
#include <osg/Notify>
#include <string.h>
#include <wchar.h>
#if defined(WIN32) && !defined(__CYGWIN__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
namespace osgDB
{
std::string convertUTF16toUTF8(const std::wstring& s){return
convertUTF16toUTF8(s.c_str(), s.length());}
std::string convertUTF16toUTF8(const wchar_t* s){return convertUTF16toUTF8(s,
wcslen(s));}
std::wstring convertUTF8toUTF16(const std::string& s){return
convertUTF8toUTF16(s.c_str(), s.length());}
std::wstring convertUTF8toUTF16(const char* s){return convertUTF8toUTF16(s,
strlen(s));}
std::string convertASCIItoUTF8(const std::string& s){return
convertASCIItoUTF8(s.c_str(), s.length());}
std::string convertASCIItoUTF8(const char* s){return convertASCIItoUTF8(s,
strlen(s));}
std::string convertUTF8toASCII(const std::string& s){return
convertUTF8toASCII(s.c_str(), s.length());}
std::string convertUTF8toASCII(const char* s){return convertUTF8toASCII(s,
strlen(s));}
std::string convertUTF16toUTF8(const wchar_t* source, unsigned sourceLength)
{
#if defined(WIN32) && !defined(__CYGWIN__)
if (sourceLength == 0)
{
return std::string();
}
int destLen = WideCharToMultiByte(CP_UTF8, 0, source, sourceLength, 0, 0,
0, 0);
if (destLen <= 0)
{
osg::notify(osg::WARN) << "Cannot convert UTF-16 string to UTF-8." <<
std::endl;
return std::string();
}
std::string sDest(destLen, '\0');
destLen = WideCharToMultiByte(CP_UTF8, 0, source, sourceLength, &sDest[0],
destLen, 0, 0);
if (destLen <= 0)
{
osg::notify(osg::WARN) << "Cannot convert UTF-16 string to UTF-8." <<
std::endl;
return std::string();
}
return sDest;
#else
//TODO: Implement for other platforms
osg::notify(osg::WARN) << "ConvertUTF16toUTF8 not implemented." <<
std::endl;
return std::string();
#endif
}
std::wstring convertUTF8toUTF16(const char* source, unsigned sourceLength)
{
#if defined(WIN32) && !defined(__CYGWIN__)
if (sourceLength == 0)
{
return std::wstring();
}
int destLen = MultiByteToWideChar(CP_UTF8, 0, source, sourceLength, 0, 0);
if (destLen <= 0)
{
osg::notify(osg::WARN) << "Cannot convert UTF-8 string to UTF-16." <<
std::endl;
return std::wstring();
}
std::wstring sDest(destLen, L'\0');
destLen = MultiByteToWideChar(CP_UTF8, 0, source, sourceLength, &sDest[0],
destLen);
if (destLen <= 0)
{
osg::notify(osg::WARN) << "Cannot convert UTF-8 string to UTF-16." <<
std::endl;
return std::wstring();
}
return sDest;
#else
//TODO: Implement for other platforms
osg::notify(osg::WARN) << "ConvertUTF8toUTF16 not implemented." <<
std::endl;
return std::wstring();
#endif
}
std::string convertASCIItoUTF8(const char* source, unsigned sourceLength) {
// Code borrowed from osgText::String and modified.
std::string utf8string;
for(unsigned i=0; i<sourceLength && source[i]; ++i)
{
unsigned int currentChar = static_cast<unsigned char>(source[i]);
if (currentChar < 0x80)
{
utf8string+=static_cast<char>(currentChar);
}
else //if (currentChar < 0x800)
{
utf8string+=static_cast<char>(0xc0 | (currentChar>>6));
utf8string+=static_cast<char>(0x80 | (currentChar & 0x3f));
}
//else
//{
// utf8string+=static_cast<char>(0xe0 | (currentChar>>12));
// utf8string+=static_cast<char>(0x80 | ((currentChar>>6) & 0x3f));
// utf8string+=static_cast<char>(0x80 | (currentChar & 0x3f));
//}
}
return utf8string;
}
std::string convertUTF8toASCII(const char* source, unsigned sourceLength) {
std::string asciiString;
// 2-bytes chars are 110xxxxx 10xxxxxx
unsigned int first2ByteChar = 0; // Use 0 as "no value"
for(unsigned i=0; i<sourceLength && source[i]; ++i)
{
unsigned int currentChar = static_cast<unsigned char>(source[i]);
if ((currentChar & 0x80)==0)
{
if (first2ByteChar!=0) throw std::bad_cast();
asciiString+=static_cast<char>(currentChar);
}
else if ((currentChar >> 5)==0x6)
{
// First 2-bytes char
if (first2ByteChar!=0) throw std::bad_cast();
first2ByteChar = currentChar;
}
else if ((currentChar >> 6)==0x2)
{
// Second 2-bytes char
if (first2ByteChar==0) throw std::bad_cast();
unsigned int finalChar = (first2ByteChar & 0x1F)<<6 | (currentChar
& 0x3F);
if (finalChar > 0xFF) throw std::bad_cast();
first2ByteChar = 0;
asciiString+=static_cast<char>(finalChar);
}
else throw std::bad_cast(); // 3 or 4-bytes char
}
if (first2ByteChar != 0) throw std::bad_cast();
return asciiString;
}
//std::string s("aeiouéèê");
//std::string s2( osgDB::convertASCIItoUTF8(s) );
//std::string s3( osgDB::convertUTF8toASCII(s2) );
//assert(s == s3);
}
ConvertUTF
Description: Binary data
_______________________________________________ osg-submissions mailing list [email protected] http://lists.openscenegraph.org/listinfo.cgi/osg-submissions-openscenegraph.org
