C++ 如何从unicode值推断字符集?
现在我可以得到unicode值,我必须先从unicode值调用中得到它的字符集C++ 如何从unicode值推断字符集?,c++,windows,visual-studio-2010,unicode,mfc,C++,Windows,Visual Studio 2010,Unicode,Mfc,现在我可以得到unicode值,我必须先从unicode值调用中得到它的字符集 HFONT CreateFont( int nHeight, // height of font int nWidth, // average character width int nEscapement, // angle of escapement int nOrientation, // base-
HFONT CreateFont(
int nHeight, // height of font
int nWidth, // average character width
int nEscapement, // angle of escapement
int nOrientation, // base-line orientation angle
int fnWeight, // font weight
DWORD fdwItalic, // italic attribute option
DWORD fdwUnderline, // underline attribute option
DWORD fdwStrikeOut, // strikeout attribute option
DWORD fdwCharSet, // character set identifier
DWORD fdwOutputPrecision, // output precision
DWORD fdwClipPrecision, // clipping precision
DWORD fdwQuality, // output quality
DWORD fdwPitchAndFamily, // pitch and family
LPCTSTR lpszFace // typeface name
);
我从MSDN得到以下信息:
fdwCharSet
[in] Specifies the character set. The following values are predefined:
ANSI_CHARSET
BALTIC_CHARSET
CHINESEBIG5_CHARSET
DEFAULT_CHARSET
EASTEUROPE_CHARSET
GB2312_CHARSET
GREEK_CHARSET
HANGUL_CHARSET
MAC_CHARSET
OEM_CHARSET
RUSSIAN_CHARSET
SHIFTJIS_CHARSET
SYMBOL_CHARSET
TURKISH_CHARSET
VIETNAMESE_CHARSET
Korean language edition of Windows:
JOHAB_CHARSET
Middle East language edition of Windows:
ARABIC_CHARSET
HEBREW_CHARSET
Thai language edition of Windows:
THAI_CHARSET
The OEM_CHARSET value specifies a character set that is operating-system dependent.
Windows 95/98/Me: You can use the DEFAULT_CHARSET value to allow the name and size of a font to fully describe the logical font. If the specified font name does not exist, a font from any character set can be substituted for the specified font, so you should use DEFAULT_CHARSET sparingly to avoid unexpected results.
以下是我现在拥有的:
FX_INT32 CharSetFromUnicode(FX_WORD word)
{
int nACP = GetACP();
switch (nACP)
{
case 932:
case 936:
case 950:
case 949:
if ((word >= 0x2E80 && word <= 0x2EFF) ||
(word >= 0x3000 && word <= 0x303F) ||
(word >= 0x3200 && word <= 0x32FF) ||
(word >= 0x3300 && word <= 0x33FF) ||
(word >= 0x3400 && word <= 0x4DB5) ||
(word >= 0x4E00 && word <= 0x9FFF) ||
(word >= 0xF900 && word <= 0xFAFF) ||
(word >= 0xFE30 && word <= 0xFE4F) ||
(word >= 0x20000 && word <= 0x2A6D6) ||
(word >= 0x2F800 && word <= 0x2FA1F))
{
switch (nACP)
{
case 932:
return SHIFTJIS_CHARSET;
case 936:
case 950:
return GB2312_CHARSET;
case 949:
return HANGUL_CHARSET;
}
}
break;
}
//find new charset
if ((word >= 0x4E00 && word <= 0x9FA5) ||
(word >= 0xE7C7 && word <= 0xE7F3) ||
(word >= 0x3000 && word <= 0x303F) || //)"《" "》" "。" "、"
(word >= 0x2000 && word <= 0x206F))
{
return GB2312_CHARSET;
}
if (((word >= 0x3040) && (word <= 0x309F)) ||
((word >= 0x30A0) && (word <= 0x30FF)) ||
((word >= 0x31F0) && (word <= 0x31FF)) ||
((word >= 0xFF00) && (word <= 0xFFEF)) )
{
return SHIFTJIS_CHARSET;
}
if (((word >= 0xAC00) && (word <= 0xD7AF)) ||
((word >= 0x1100) && (word <= 0x11FF)) ||
((word >= 0x3130) && (word <= 0x318F)))
{
return HANGUL_CHARSET;
}
if (word >= 0x0E00 && word <= 0x0E7F)
return THAI_CHARSET;
if ((word >= 0x0370 && word <= 0x03FF) ||
(word >= 0x1F00 && word <= 0x1FFF))
return GREEK_CHARSET;
if ((word >= 0x0600 && word <= 0x06FF) ||
(word >= 0xFB50 && word <= 0xFEFC))
return ARABIC_CHARSET;
if (word >= 0x0590 && word <= 0x05FF)
return HEBREW_CHARSET;
if (word >= 0x0400 && word <= 0x04FF)
return RUSSIAN_CHARSET;
if (word == 0x11E || word == 0x11F || word == 0x130 || word == 0x131 || word == 0x15E || word == 0x15F)
return TURKISH_CHARSET;
if (word >= 0x0100 && word <= 0x024F)
return EASTEUROPE_CHARSET;
if (word >= 0x1E00 && word <= 0x1EFF)
return VIETNAMESE_CHARSET;
return GB2312_CHARSET;
}
FX\u INT32字符集fromUnicode(FX\u WORD)
{
int-nACP=GetACP();
开关(nACP)
{
案例932:
案例936:
案例950:
案例949:
如果((word>=0x2E80&&word=0x3000&&word=0x3200&&word=0x3300&&word=0x3400&&word=0x4E00&&word=0xF900&&word=0xFE30&&word=0x20000&&word=0x2F800&&word=0x4E00&&word=0x3000&&word=0x2000&&word=0x3040&&word=0x30A0&&word=0x31F0&&word=0xFF00&&word=0xAC00)&&(word=0x1100)&(word=0x3130)&&(word=0x0E00&&word=0x0370&&word=0x1F00&&word=0x0600&&word=0xFB50&&word=0x0590&&word=0x0400&&word=0x0100&&word=0x1E00&&word一般来说,没有可靠的方法来猜测编码
然而,在实践中,人们可以猜测。例如,Mozilla创建了一个非常好的通用字符集检测库:
它在Firefox中用于自动猜测您访问的随机页面的字符集(如果有的话,并不总是提供正确的编码),并且在实践中似乎工作得很好。如果您使用的是Unicode字符和函数而不是“ANSI”,我认为字符集参数不应该有任何区别,对吗角色和功能?还是比这更复杂?