C++ 如何从unicode值推断字符集?

C++ 如何从unicode值推断字符集?,c++,windows,visual-studio-2010,unicode,mfc,C++,Windows,Visual Studio 2010,Unicode,Mfc,现在我可以得到unicode值,我必须先从unicode值调用中得到它的字符集 HFONT CreateFont( int nHeight, // height of font int nWidth, // average character width int nEscapement, // angle of escapement int nOrientation, // base-

现在我可以得到unicode值,我必须先从unicode值调用中得到它的字符集

HFONT CreateFont(
  int nHeight,               // height of font
  int nWidth,                // average character width
  int nEscapement,           // angle of escapement
  int nOrientation,          // base-line orientation angle
  int fnWeight,              // font weight
  DWORD fdwItalic,           // italic attribute option
  DWORD fdwUnderline,        // underline attribute option
  DWORD fdwStrikeOut,        // strikeout attribute option
  DWORD fdwCharSet,          // character set identifier
  DWORD fdwOutputPrecision,  // output precision
  DWORD fdwClipPrecision,    // clipping precision
  DWORD fdwQuality,          // output quality
  DWORD fdwPitchAndFamily,   // pitch and family
  LPCTSTR lpszFace           // typeface name
);
我从MSDN得到以下信息:

fdwCharSet

[in] Specifies the character set. The following values are predefined: 
ANSI_CHARSET
BALTIC_CHARSET
CHINESEBIG5_CHARSET
DEFAULT_CHARSET
EASTEUROPE_CHARSET
GB2312_CHARSET
GREEK_CHARSET
HANGUL_CHARSET
MAC_CHARSET
OEM_CHARSET
RUSSIAN_CHARSET
SHIFTJIS_CHARSET
SYMBOL_CHARSET
TURKISH_CHARSET
VIETNAMESE_CHARSET

Korean language edition of Windows: 
JOHAB_CHARSET 
Middle East language edition of Windows: 
ARABIC_CHARSET
HEBREW_CHARSET 
Thai language edition of Windows: 
THAI_CHARSET 
The OEM_CHARSET value specifies a character set that is operating-system dependent. 

Windows 95/98/Me: You can use the DEFAULT_CHARSET value to allow the name and size of a font to fully describe the logical font. If the specified font name does not exist, a font from any character set can be substituted for the specified font, so you should use DEFAULT_CHARSET sparingly to avoid unexpected results. 
以下是我现在拥有的:

FX_INT32 CharSetFromUnicode(FX_WORD word)
{
    int nACP = GetACP();
    switch (nACP)
    {
    case 932:
    case 936:
    case 950:
    case 949:
        if ((word >= 0x2E80 && word <= 0x2EFF) ||
            (word >= 0x3000 && word <= 0x303F) ||
            (word >= 0x3200 && word <= 0x32FF) ||
            (word >= 0x3300 && word <= 0x33FF) ||
            (word >= 0x3400 && word <= 0x4DB5) ||
            (word >= 0x4E00 && word <= 0x9FFF) ||
            (word >= 0xF900 && word <= 0xFAFF) ||
            (word >= 0xFE30 && word <= 0xFE4F) ||
            (word >= 0x20000 && word <= 0x2A6D6) ||
            (word >= 0x2F800 && word <= 0x2FA1F))
        {
            switch (nACP)
            {
            case 932:
                return SHIFTJIS_CHARSET;
            case 936:
            case 950:
                return GB2312_CHARSET;
            case 949:
                return HANGUL_CHARSET;
            }
        }
        break;
    }

    //find new charset
    if ((word >= 0x4E00 && word <= 0x9FA5) || 
        (word >= 0xE7C7 && word <= 0xE7F3) ||
        (word >= 0x3000 && word <= 0x303F) || //)"《" "》" "。" "、" 
        (word >= 0x2000 && word <= 0x206F))
    {
        return GB2312_CHARSET;
    }

    if (((word >= 0x3040) && (word <= 0x309F)) ||
        ((word >= 0x30A0) && (word <= 0x30FF)) ||
        ((word >= 0x31F0) && (word <= 0x31FF)) ||
        ((word >= 0xFF00) && (word <= 0xFFEF)) )
    {
        return SHIFTJIS_CHARSET;
    }

    if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
        ((word >= 0x1100) && (word <= 0x11FF)) ||
        ((word >= 0x3130) && (word <= 0x318F)))
    {
        return HANGUL_CHARSET;
    }

    if (word >= 0x0E00 && word <= 0x0E7F)
        return THAI_CHARSET;

    if ((word >= 0x0370 && word <= 0x03FF) ||
        (word >= 0x1F00 && word <= 0x1FFF))
        return GREEK_CHARSET;

    if ((word >= 0x0600 && word <= 0x06FF) ||
        (word >= 0xFB50 && word <= 0xFEFC))
        return ARABIC_CHARSET;

    if (word >= 0x0590 && word <= 0x05FF)
        return HEBREW_CHARSET;

    if (word >= 0x0400 && word <= 0x04FF)
        return RUSSIAN_CHARSET;

    if (word == 0x11E || word == 0x11F || word == 0x130 || word == 0x131 || word == 0x15E || word == 0x15F)
        return TURKISH_CHARSET;

    if (word >= 0x0100 && word <= 0x024F)
        return EASTEUROPE_CHARSET;

    if (word >= 0x1E00 && word <= 0x1EFF)
        return VIETNAMESE_CHARSET;

    return GB2312_CHARSET;
}
FX\u INT32字符集fromUnicode(FX\u WORD)
{
int-nACP=GetACP();
开关(nACP)
{
案例932:
案例936:
案例950:
案例949:
如果((word>=0x2E80&&word=0x3000&&word=0x3200&&word=0x3300&&word=0x3400&&word=0x4E00&&word=0xF900&&word=0xFE30&&word=0x20000&&word=0x2F800&&word=0x4E00&&word=0x3000&&word=0x2000&&word=0x3040&&word=0x30A0&&word=0x31F0&&word=0xFF00&&word=0xAC00)&&(word=0x1100)&(word=0x3130)&&(word=0x0E00&&word=0x0370&&word=0x1F00&&word=0x0600&&word=0xFB50&&word=0x0590&&word=0x0400&&word=0x0100&&word=0x1E00&&word一般来说,没有可靠的方法来猜测编码

然而,在实践中,人们可以猜测。例如,Mozilla创建了一个非常好的通用字符集检测库:


它在Firefox中用于自动猜测您访问的随机页面的字符集(如果有的话,并不总是提供正确的编码),并且在实践中似乎工作得很好。

如果您使用的是Unicode字符和函数而不是“ANSI”,我认为字符集参数不应该有任何区别,对吗角色和功能?还是比这更复杂?