C++ 在C+中从UTF-8转换为ISO8859-15+；_C++_String_Encoding_Utf 8_Iso 8859 15

C++ 在C+中从UTF-8转换为ISO8859-15+；

c++ string encoding utf-8

C++ 在C+中从UTF-8转换为ISO8859-15+；,c++,string,encoding,utf-8,iso-8859-15,C++,String,Encoding,Utf 8,Iso 8859 15,我想在C/C++中完成从UTF-8到ISO 8859-15的转换，而不包括额外的库我怎样才能做到这一点我发现以下代码适用于ISO 8859-1，但我不确定如何处理ISO 8859-15和ISO 8859-1（）之间的差异： std:：字符串UTF8toISO8859_1（常量字符*in）{ std：：字符串输出； if（in==NULL）返回；无符号整数码点；而（*in！=0）{ 无符号字符ch=静态_转换（*in）； if（ch我喜欢这段代码。它惊人地短。大部分代码只处理将多字节序列

我想在C/C++中完成从UTF-8到ISO 8859-15的转换，而不包括额外的库

我怎样才能做到这一点

我发现以下代码适用于ISO 8859-1，但我不确定如何处理ISO 8859-15和ISO 8859-1（）之间的差异：

std:：字符串UTF8toISO8859_1（常量字符*in）{
std：：字符串输出；
if（in==NULL）
返回；
无符号整数码点；
而（*in！=0）{
无符号字符ch=静态_转换（*in）；
if（ch我喜欢这段代码。它惊人地短。大部分代码只处理将多字节序列解码为码点。一旦一个码点被解码，转换为ISO-8859-1非常简单：

如果小于或等于255，它也是一个有效的ISO-8859-1字符：out.append（1，static_cast（codepoint））；
如果不是，则不能在ISO-8859-1中表示，并用问号替换：out.append（“？”）；

因此，为了使其适用于ISO-8859-15，需要更多的代码来处理引入ISO-8859-15时已替换的字符（请参阅）。不幸的是，它大大增加了代码大小
下面的代码应该很容易理解。如果这是一个主要问题，可以对其进行优化以获得更好的性能
std::string UTF8toISO8859_1(const char * in) {
    std::string out;
    if (in == NULL)
        return out;

    unsigned int codepoint;
    while (*in != 0) {
        unsigned char ch = static_cast<unsigned char>(*in);
        if (ch <= 0x7f)
            codepoint = ch;
        else if (ch <= 0xbf)
            codepoint = (codepoint << 6) | (ch & 0x3f);
        else if (ch <= 0xdf)
            codepoint = ch & 0x1f;
        else if (ch <= 0xef)
            codepoint = ch & 0x0f;
        else
            codepoint = ch & 0x07;
        ++in;

        if (((*in & 0xc0) != 0x80) && (codepoint <= 0x10ffff)) {
            // a valid codepoint has been decoded; convert it to ISO-8859-15               
            char outc;
            if (codepoint <= 255) {
                // codepoints up to 255 can be directly converted wit a few exceptions
                if (codepoint != 0xa4 && codepoint != 0xa6 && codepoint != 0xa8
                        && codepoint != 0xb4 && codepoint != 0xb8 && codepoint != 0xbc
                        && codepoint != 0xbd && codepoint != 0xbe) {
                    outc = static_cast<char>(codepoint);
                }
                else {
                    outc = '?';
                }
            }
            else {
                // With a few exceptions, codepoints above 255 cannot be converted
                if (codepoint == 0x20AC) {
                    outc = 0xa4;
                }
                else if (codepoint == 0x0160) {
                    outc = 0xa6;
                }
                else if (codepoint == 0x0161) {
                    outc = 0xa8;
                }
                else if (codepoint == 0x017d) {
                    outc = 0xb4;
                }
                else if (codepoint == 0x017e) {
                    outc = 0xb8;
                }
                else if (codepoint == 0x0152) {
                    outc = 0xbc;
                }
                else if (codepoint == 0x0153) {
                    outc = 0xbd;
                }
                else if (codepoint == 0x0178) {
                    outc = 0xbe;
                }
                else {
                    outc = '?';
                }
            }
            out.append(1, outc);
        }
    }
    return out;
}

std:：字符串UTF8toISO8859_1（常量字符*in）{
std：：字符串输出；
if（in==NULL）
返回；
无符号整数码点；
而（*in！=0）{
无符号字符ch=静态_转换（*in）；
如果（ch），这可能有帮助吗？
std::string UTF8toISO8859_1(const char * in) {
    std::string out;
    if (in == NULL)
        return out;

    unsigned int codepoint;
    while (*in != 0) {
        unsigned char ch = static_cast<unsigned char>(*in);
        if (ch <= 0x7f)
            codepoint = ch;
        else if (ch <= 0xbf)
            codepoint = (codepoint << 6) | (ch & 0x3f);
        else if (ch <= 0xdf)
            codepoint = ch & 0x1f;
        else if (ch <= 0xef)
            codepoint = ch & 0x0f;
        else
            codepoint = ch & 0x07;
        ++in;

        if (((*in & 0xc0) != 0x80) && (codepoint <= 0x10ffff)) {
            // a valid codepoint has been decoded; convert it to ISO-8859-15               
            char outc;
            if (codepoint <= 255) {
                // codepoints up to 255 can be directly converted wit a few exceptions
                if (codepoint != 0xa4 && codepoint != 0xa6 && codepoint != 0xa8
                        && codepoint != 0xb4 && codepoint != 0xb8 && codepoint != 0xbc
                        && codepoint != 0xbd && codepoint != 0xbe) {
                    outc = static_cast<char>(codepoint);
                }
                else {
                    outc = '?';
                }
            }
            else {
                // With a few exceptions, codepoints above 255 cannot be converted
                if (codepoint == 0x20AC) {
                    outc = 0xa4;
                }
                else if (codepoint == 0x0160) {
                    outc = 0xa6;
                }
                else if (codepoint == 0x0161) {
                    outc = 0xa8;
                }
                else if (codepoint == 0x017d) {
                    outc = 0xb4;
                }
                else if (codepoint == 0x017e) {
                    outc = 0xb8;
                }
                else if (codepoint == 0x0152) {
                    outc = 0xbc;
                }
                else if (codepoint == 0x0153) {
                    outc = 0xbd;
                }
                else if (codepoint == 0x0178) {
                    outc = 0xbe;
                }
                else {
                    outc = '?';
                }
            }
            out.append(1, outc);
        }
    }
    return out;
}