C++ 一个文件库将utf8（字符*）转换为wchar\u t？_C++

C++ 一个文件库将utf8（字符*）转换为wchar\u t？

c++

C++ 一个文件库将utf8（字符*）转换为wchar\u t？,c++,C++,我用的是非常棒的。我唯一的问题是需要将utf8字符串（char*）转换为宽字符字符串（wchar\u t*）。我在谷歌上搜索并尝试了3种不同的lib，但都失败了（因为缺少标题）我不需要任何花哨的东西。只是单向转换。如何做到这一点？如果您使用的是windows（考虑到您对wchar的需要，您很可能是这样），请使用MultiByteToWideChar函数（在windows.h中声明），如下所示：或者，如果您想要的只是UTF8的文本多字节表示（这是不可能的，但也是可能的），请使用以下命令（std

我用的是非常棒的。我唯一的问题是需要将utf8字符串（

char*

）转换为宽字符字符串（

wchar\u t*

）。我在谷歌上搜索并尝试了3种不同的lib，但都失败了（因为缺少标题）

我不需要任何花哨的东西。只是单向转换。如何做到这一点？

如果您使用的是windows（考虑到您对wchar的需要，您很可能是这样），请使用MultiByteToWideChar函数（在windows.h中声明），如下所示：

或者，如果您想要的只是UTF8的文本多字节表示（这是不可能的，但也是可能的），请使用以下命令（stdlib.h）：

希望这有帮助。

这是我写的一段代码。它似乎工作得很好。utf8错误或值>FFFF（wchar\u t无法保持）时返回0

#包括
使用名称空间std；
wchar_t*utf8_到wchar（const char*utf8）{
wstring-sz；
wchar_t c；
自动p=utf8；
而（*p！=0）{
自动v=（*p）；
如果（v>=0）{
c=v；
sz+=c；
++p；
继续；
}
int shiftCount=0；
如果（（v&0xE0）=0xC0）{
shiftCount=1；
c=v&0x1F；
}
else if（（v&0xF0）==0xE0）{
shiftCount=2；
c=v&0xF；
}
其他的
返回0；
++p；
while（shiftCount）{
v=*p；
++p；
如果（（v&0xC0）！=0x80）返回0；
c以下（未测试）代码显示如何将当前区域设置中的多字节字符串转换为宽字符串。因此，如果当前区域设置为UTF-8，则这将满足您的需要
const char * inputStr = ... // your UTF-8 input
size_t maxSize = strlen(inputStr) + 1;
wchar_t * outputWStr = new wchar_t[maxSize];
size_t result = mbstowcs(outputWStr, inputStr, maxSize);
if (result == -1) {
    cerr << "Invalid multibyte characters in input";
}

const char*inputStr=…//您的UTF-8输入
size\u t maxSize=strlen（inputStr）+1；
wchar_t*outputWStr=新的wchar_t[maxSize]；
大小\u t结果=mbstowcs（输出WSTR、输入STR、最大大小）；
如果（结果==-1）{
cerr下面的代码成功地使CreateDirectoryW（）能够写入C:\Users\ПаСССааааСааааааааа
std::wstring utf16_from_utf8(const std::string & utf8)
{
    // Special case of empty input string
if (utf8.empty())
    return std::wstring();

// Шаг 1, Get length (in wchar_t's) of resulting UTF-16 string
const int utf16_length = ::MultiByteToWideChar(
    CP_UTF8,            // convert from UTF-8
    0,                  // default flags
    utf8.data(),        // source UTF-8 string
    utf8.length(),      // length (in chars) of source UTF-8 string
    NULL,               // unused - no conversion done in this step
    0                   // request size of destination buffer, in wchar_t's
    );
if (utf16_length == 0)
{
    // Error
    DWORD error = ::GetLastError();
    throw ;
}


// // Шаг 2, Allocate properly sized destination buffer for UTF-16 string
std::wstring utf16;
utf16.resize(utf16_length);

// // Шаг 3, Do the actual conversion from UTF-8 to UTF-16
if ( ! ::MultiByteToWideChar(
    CP_UTF8,            // convert from UTF-8
    0,                  // default flags
    utf8.data(),        // source UTF-8 string
    utf8.length(),      // length (in chars) of source UTF-8 string
    &utf16[0],          // destination buffer
    utf16.length()      // size of destination buffer, in wchar_t's
    ) )
{
    // не работает сука ... 
    DWORD error = ::GetLastError();
    throw;
}

return utf16; // ура!
}

@Mehrdad:mbstowcs是一个CRT函数，在设置参数后，它最终会归结为Win32 API函数MultiByteToWideChar。@约翰：我的意思是你忘记了mbstowcs中的第一个s
。你的第二个代码示例严重损坏，通过比较你对mbstowcs（）的使用可以看出
带有该函数的手册页。您是否打算使用另一个函数名而不是mbstowcs（）
？是的，不是。那是我脑子里想不出来的。我设法混淆了函数参数的顺序。这就是为什么这些事情不应该让我脑子里想不起来的原因。@Mehrdad:那只是一个修辞问题。改为编辑答案。我可以看到两个问题。首先，返回sz.c_str（）
正在返回一个悬空指针，因为sz
变量是基于堆栈的，并且在函数返回时将被销毁。其次，wchar\u t的大小取决于平台。它在Windows和OS X上是16位宽，但在许多UNIX平台上是32位宽。因此，您的wchar\u t无法保存的声明大于0xFFFF的值不正确。即使在Windows或OS X上，使用代理项对也可以提供对UTF-16而不是（仅）UCS-2的支持。事实上，在Mac OS X上，wchar\u t也是32位宽。类似的问题
#include <string>
using namespace std;
wchar_t* utf8_to_wchar(const char*utf8){
    wstring sz;
    wchar_t c;
    auto p=utf8;
    while(*p!=0){
        auto v=(*p);
        if(v>=0){
            c = v;
            sz+=c;
            ++p;
            continue;
        }
        int shiftCount=0;
        if((v&0xE0) == 0xC0){
            shiftCount=1;
            c = v&0x1F;
        }
        else if((v&0xF0) == 0xE0){
            shiftCount=2;
            c = v&0xF;
        }
        else
            return 0;
        ++p;
        while(shiftCount){
            v = *p;
            ++p;
            if((v&0xC0) != 0x80) return 0;
            c<<=6;
            c |= (v&0x3F);
            --shiftCount;
        }
        sz+=c;
    }
    return (wchar_t*)sz.c_str();
}

const char * inputStr = ... // your UTF-8 input
size_t maxSize = strlen(inputStr) + 1;
wchar_t * outputWStr = new wchar_t[maxSize];
size_t result = mbstowcs(outputWStr, inputStr, maxSize);
if (result == -1) {
    cerr << "Invalid multibyte characters in input";
}

std::wstring utf16_from_utf8(const std::string & utf8)
{
    // Special case of empty input string
if (utf8.empty())
    return std::wstring();

// Шаг 1, Get length (in wchar_t's) of resulting UTF-16 string
const int utf16_length = ::MultiByteToWideChar(
    CP_UTF8,            // convert from UTF-8
    0,                  // default flags
    utf8.data(),        // source UTF-8 string
    utf8.length(),      // length (in chars) of source UTF-8 string
    NULL,               // unused - no conversion done in this step
    0                   // request size of destination buffer, in wchar_t's
    );
if (utf16_length == 0)
{
    // Error
    DWORD error = ::GetLastError();
    throw ;
}


// // Шаг 2, Allocate properly sized destination buffer for UTF-16 string
std::wstring utf16;
utf16.resize(utf16_length);

// // Шаг 3, Do the actual conversion from UTF-8 to UTF-16
if ( ! ::MultiByteToWideChar(
    CP_UTF8,            // convert from UTF-8
    0,                  // default flags
    utf8.data(),        // source UTF-8 string
    utf8.length(),      // length (in chars) of source UTF-8 string
    &utf16[0],          // destination buffer
    utf16.length()      // size of destination buffer, in wchar_t's
    ) )
{
    // не работает сука ... 
    DWORD error = ::GetLastError();
    throw;
}

return utf16; // ура!
}