Hi Robert,

Here is a tiny submission that allow the user to convert an extended 8-bit 
ASCII std::string to an UTF8 std::string, and vice-versa. Why? Because of the 
FBX plugin I'll submit in a few hours (I hope).

BTW, should this header be moved in osg instead of osgDB? Actually conversion 
from and to ASCII/UTF8/UTF16 may be used from osgDB, osgText and so on. Well, 
just my two cents.

Another remark: I saw conversions from/to UTF16 are Windows only. This may be 
addressed...
Cheers,

Sukender
PVLE - Lightweight cross-platform game engine - http://pvle.sourceforge.net/
/* -*-c++-*- OpenSceneGraph - Copyright (C) 2008 Robert Osfield 
 *
 * This library is open source and may be redistributed and/or modified under  
 * the terms of the OpenSceneGraph Public License (OSGPL) version 0.0 or 
 * (at your option) any later version.  The full license is in LICENSE file
 * included with this distribution, and on the openscenegraph.org website.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 * OpenSceneGraph Public License for more details.
*/

#include <osgDB/ConvertUTF>
#include <osg/Notify>

#include <string.h>
#include <wchar.h>

#if defined(WIN32) && !defined(__CYGWIN__)
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif

namespace osgDB
{

std::string convertUTF16toUTF8(const std::wstring& s){return 
convertUTF16toUTF8(s.c_str(), s.length());}
std::string convertUTF16toUTF8(const wchar_t* s){return convertUTF16toUTF8(s, 
wcslen(s));}

std::wstring convertUTF8toUTF16(const std::string& s){return 
convertUTF8toUTF16(s.c_str(), s.length());}
std::wstring convertUTF8toUTF16(const char* s){return convertUTF8toUTF16(s, 
strlen(s));}

std::string convertASCIItoUTF8(const std::string& s){return 
convertASCIItoUTF8(s.c_str(), s.length());}
std::string convertASCIItoUTF8(const char* s){return convertASCIItoUTF8(s, 
strlen(s));}

std::string convertUTF8toASCII(const std::string& s){return 
convertUTF8toASCII(s.c_str(), s.length());}
std::string convertUTF8toASCII(const char* s){return convertUTF8toASCII(s, 
strlen(s));}

std::string convertUTF16toUTF8(const wchar_t* source, unsigned sourceLength)
{
#if defined(WIN32) && !defined(__CYGWIN__)
    if (sourceLength == 0)
    {
        return std::string();
    }

    int destLen = WideCharToMultiByte(CP_UTF8, 0, source, sourceLength, 0, 0, 
0, 0);
    if (destLen <= 0)
    {
        osg::notify(osg::WARN) << "Cannot convert UTF-16 string to UTF-8." << 
std::endl;
        return std::string();
    }

    std::string sDest(destLen, '\0');
    destLen = WideCharToMultiByte(CP_UTF8, 0, source, sourceLength, &sDest[0], 
destLen, 0, 0);

    if (destLen <= 0)
    {
        osg::notify(osg::WARN) << "Cannot convert UTF-16 string to UTF-8." << 
std::endl;
        return std::string();
    }

    return sDest;
#else
    //TODO: Implement for other platforms
    osg::notify(osg::WARN) << "ConvertUTF16toUTF8 not implemented." << 
std::endl;
    return std::string();
#endif
}

std::wstring convertUTF8toUTF16(const char* source, unsigned sourceLength)
{
#if defined(WIN32) && !defined(__CYGWIN__)
    if (sourceLength == 0)
    {
        return std::wstring();
    }

    int destLen = MultiByteToWideChar(CP_UTF8, 0, source, sourceLength, 0, 0);
    if (destLen <= 0)
    {
        osg::notify(osg::WARN) << "Cannot convert UTF-8 string to UTF-16." << 
std::endl;
        return std::wstring();
    }

    std::wstring sDest(destLen, L'\0');
    destLen = MultiByteToWideChar(CP_UTF8, 0, source, sourceLength, &sDest[0], 
destLen);

    if (destLen <= 0)
    {
        osg::notify(osg::WARN) << "Cannot convert UTF-8 string to UTF-16." << 
std::endl;
        return std::wstring();
    }

    return sDest;
#else
    //TODO: Implement for other platforms
    osg::notify(osg::WARN) << "ConvertUTF8toUTF16 not implemented." << 
std::endl;
    return std::wstring();
#endif
}


std::string convertASCIItoUTF8(const char* source, unsigned sourceLength) {
    // Code borrowed from osgText::String and modified.
    std::string utf8string;
    for(unsigned i=0; i<sourceLength && source[i]; ++i)
    {
        unsigned int currentChar = static_cast<unsigned char>(source[i]);
        if (currentChar < 0x80)
        {
            utf8string+=static_cast<char>(currentChar);
        }
        else //if (currentChar < 0x800)
        {
            utf8string+=static_cast<char>(0xc0 | (currentChar>>6));
            utf8string+=static_cast<char>(0x80 | (currentChar & 0x3f));
        }
        //else
        //{
        //    utf8string+=static_cast<char>(0xe0 | (currentChar>>12));
        //    utf8string+=static_cast<char>(0x80 | ((currentChar>>6) & 0x3f));
        //    utf8string+=static_cast<char>(0x80 | (currentChar & 0x3f));
        //}
    }
    return utf8string;
}


std::string convertUTF8toASCII(const char* source, unsigned sourceLength) {
    std::string asciiString;
    // 2-bytes chars are 110xxxxx 10xxxxxx
    unsigned int first2ByteChar = 0;            // Use 0 as "no value"
    for(unsigned i=0; i<sourceLength && source[i]; ++i)
    {
        unsigned int currentChar = static_cast<unsigned char>(source[i]);
        if ((currentChar & 0x80)==0)
        {
            if (first2ByteChar!=0) throw std::bad_cast();
            asciiString+=static_cast<char>(currentChar);
        }
        else if ((currentChar >> 5)==0x6)
        {
            // First 2-bytes char
            if (first2ByteChar!=0) throw std::bad_cast();
            first2ByteChar = currentChar;
        }
        else if ((currentChar >> 6)==0x2)
        {
            // Second 2-bytes char
            if (first2ByteChar==0) throw std::bad_cast();
            unsigned int finalChar = (first2ByteChar & 0x1F)<<6 | (currentChar 
& 0x3F);
            if (finalChar > 0xFF) throw std::bad_cast();
            first2ByteChar = 0;
            asciiString+=static_cast<char>(finalChar);
        }
        else throw std::bad_cast();             // 3 or 4-bytes char
    }
    if (first2ByteChar != 0) throw std::bad_cast();
    return asciiString;
}

//std::string s("aeiouéèê");
//std::string s2( osgDB::convertASCIItoUTF8(s) );
//std::string s3( osgDB::convertUTF8toASCII(s2) );
//assert(s == s3);


}

Attachment: ConvertUTF
Description: Binary data

_______________________________________________
osg-submissions mailing list
[email protected]
http://lists.openscenegraph.org/listinfo.cgi/osg-submissions-openscenegraph.org

Reply via email to