C 如何将字符串\\u6d4e\\u5357\\u5e02转换为utf-8数据_C

C 如何将字符串\\u6d4e\\u5357\\u5e02转换为utf-8数据

C 如何将字符串\\u6d4e\\u5357\\u5e02转换为utf-8数据,c,C,gcc将把\u6d4e\u5357\u5e02格式化为utf-8编码数据。但如何将ucs2字符串格式化为utf-8编码数据 char str[] = "\\u6d4e\\u5357\\u5e02"; 您需要一个转换器功能 /* Input: a Unicode code point, "ucs2". Output: UTF-8 characters in buffer "utf8". Return value: the number of bytes written

gcc将把

\u6d4e\u5357\u5e02

格式化为utf-8编码数据。但如何将ucs2字符串格式化为utf-8编码数据

    char str[] = "\\u6d4e\\u5357\\u5e02";

您需要一个转换器功能

/* Input: a Unicode code point, "ucs2". 

   Output: UTF-8 characters in buffer "utf8". 

   Return value: the number of bytes written into "utf8", or a
   negative number if there was an error.

   This adds a zero byte to the end of the string. It assumes that the
   buffer "utf8" has at least four bytes of space to write to. */

int ucs2_to_utf8 (int ucs2, unsigned char * utf8)
{
    if (ucs2 < 0x80) {
        utf8[0] = ucs2;
        utf8[1] = '\0';
        return 1;
    }
    if (ucs2 >= 0x80  && ucs2 < 0x800) {
        utf8[0] = (ucs2 >> 6)   | 0xC0;
        utf8[1] = (ucs2 & 0x3F) | 0x80;
        utf8[2] = '\0';
        return 2;
    }
    if (ucs2 >= 0x800 && ucs2 < 0xFFFF) {
    if (ucs2 >= 0xD800 && ucs2 <= 0xDFFF) {
        /* Ill-formed. */
        return UNICODE_SURROGATE_PAIR;
    }
        utf8[0] = ((ucs2 >> 12)       ) | 0xE0;
        utf8[1] = ((ucs2 >> 6 ) & 0x3F) | 0x80;
        utf8[2] = ((ucs2      ) & 0x3F) | 0x80;
        utf8[3] = '\0';
        return 3;
    }
    if (ucs2 >= 0x10000 && ucs2 < 0x10FFFF) {
    /* http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c#L380 */
    utf8[0] = 0xF0 | (ucs2 >> 18);
    utf8[1] = 0x80 | ((ucs2 >> 12) & 0x3F);
    utf8[2] = 0x80 | ((ucs2 >> 6) & 0x3F);
    utf8[3] = 0x80 | ((ucs2 & 0x3F));
        utf8[4] = '\0';
        return 4;
    }
    return UNICODE_BAD_INPUT;
}

/*输入：Unicode代码点“ucs2”。
输出：缓冲区“utf8”中的UTF-8个字符。
返回值：写入“utf8”的字节数，或
如果出现错误，则为负数。
这会在字符串末尾添加一个零字节。它假设
缓冲区“utf8”至少有四个字节的空间可供写入*/
int ucs2_至_utf8（int ucs2，无符号字符*utf8）
{
如果（ucs2<0x80）{
utf8[0]=ucs2；
utf8[1]='\0'；
返回1；
}
如果（ucs2>=0x80&&ucs2<0x800）{
utf8[0]=（ucs2>>6）| 0xC0；
utf8[1]=（ucs2&0x3F）| 0x80；
utf8[2]='\0'；
返回2；
}
如果（ucs2>=0x800&&ucs2<0xFFFF）{
如果（ucs2>=0xD800&&ucs2>12））| 0xE0；
utf8[1]=（（ucs2>>6）和0x3F）| 0x80；
utf8[2]=（（ucs2）和0x3F）| 0x80；
utf8[3]='\0'；
返回3；
}
如果（ucs2>=0x10000&&ucs2<0x10FFFF）{
/* http://tidy.sourceforge.net/cgi-bin/lxr/source/src/utf8.c#L380 */
utf8[0]=0xF0 |（ucs2>>18）；
utf8[1]=0x80 |（（ucs2>>12）和0x3F）；
utf8[2]=0x80 |（（ucs2>>6）和0x3F）；
utf8[3]=0x80 |（（ucs2和0x3F））；
utf8[4]='\0'；
返回4；
}
返回UNICODE\u错误\u输入；
}