Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/320.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
java无法正确地从拉丁语转换为utf8_Java_Mysql_Utf 8_Character Encoding_Codepages - Fatal编程技术网

java无法正确地从拉丁语转换为utf8

java无法正确地从拉丁语转换为utf8,java,mysql,utf-8,character-encoding,codepages,Java,Mysql,Utf 8,Character Encoding,Codepages,我从mysql中选择数据,数据库不是utf8(unicode字符另存为拉丁语,例如unicode字符串Đ)ỗ 钛ến(正确形式)另存为Äá»-Tiáº?n)。如果我使用PHP来回显html,我只需设置网页正确显示。 如果我没有设置meta标记,当Chrome打开时,windows-1258中的Chrome检测编码手动更改为Unicode(utf-8),网页将正确显示 问题是:当我使用jdbc从mysql选择数据时,我会这样转换: byte[] asciiBytes1 = "Äá»— ti

我从mysql中选择数据,数据库不是utf8(unicode字符另存为拉丁语,例如unicode字符串
Đ)ỗ 钛ến
(正确形式)另存为
Äá»-Tiáº?n
)。如果我使用PHP来回显html,我只需设置网页正确显示。 如果我没有设置meta标记,当Chrome打开时,windows-1258中的Chrome检测编码手动更改为Unicode(utf-8),网页将正确显示

问题是:当我使用jdbc从mysql选择数据时,我会这样转换:

    byte[] asciiBytes1 = "Äá»— tiến".getBytes("Cp1258");
    byte[] asciiBytes2 = "Äá»— tiến".getBytes("ISO-8859-1");
    String unicode1 = new String(asciiBytes1, "UTF-8");
    String unicode2 = new String(asciiBytes2, "UTF-8");
    System.out.println(unicode1);//�?ỗ tiến
    System.out.println(unicode2);//Đ�? tiến
因此,java无法正确转换,我尝试了许多编码,不仅是Cp1258和ISO-8859-1,而且都不起作用。 转换的2个简单方法是使用html文件和我前面提到的
Äá»-tiáºn
字符串,或者使用记事本++,设置编码ANSI,粘贴
Äá»-tiáºn
字符串,然后更改为utf-8,它将显示
ỗ 钛ến
(是我想要的正确字符串)

试试这个

byte[] asciiBytes1 = "Äá»— tiến".getBytes("Cp1258");
byte[] asciiBytes2 = "Äá»— tiến".getBytes("ISO-8859-1");
String unicode1 = new String(asciiBytes1, "Cp1258");
String unicode2 = new String(asciiBytes2, "ISO-8859-1");
System.out.println(unicode1);//�?ỗ tiến
System.out.println(unicode2);//Đ�? tiến

这有点复杂,在修改过的Windows-1252中,0x81、0x8d、0x8f、0x90和0x9d通常是 未分配的将替换为相应的C1字符。Java在默认情况下似乎没有考虑到这一点 使用Windows-1252时

修复数据库并在任何地方使用UTF-8都是最简单的

这是代码

public static byte[] getBytesModifiedW1252( String str ) {
    final int[] windows1252 = {
            0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F
            ,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F
            ,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F
            ,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
            ,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F
            ,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F
            ,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
            ,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F
            ,0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F
            ,0x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,0x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178
            ,0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF
            ,0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF
            ,0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF
            ,0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF
            ,0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF
            ,0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
        };
    Map<Integer, Integer> map = new HashMap<Integer, Integer>();

    for( int i = 0; i < windows1252.length; ++i ) {
        map.put( windows1252[i], i);
    }
    byte replacement = (byte)0x003F;

    byte[] ret = new byte[str.length()];

    for( int i = 0; i < str.length(); ++i ) {
        int cp = str.charAt(i);
        Integer w1252 = map.get(cp);
        ret[i] = w1252 == null ? replacement : (byte)(int)w1252;
    }

    return ret;
}

public static void main(String args[]) throws UnsupportedEncodingException {
    byte[] bytes = getBytesModifiedW1252( "Äá»— tiến" );
    System.out.println(new String(bytes, "UTF-8"));
    //Đỗ tiến
}
公共静态字节[]getBytesModifiedW1252(字符串str){
最终整数[]窗口1252={
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A、0x000B、0x000C、0x000D、0x000E、0x000F
、0x0010、0x0011、0x0012、0x0013、0x0014、0x0015、0x0016、0x0017、0x0018、0x0019、0x001A、0x001B、0x001C、0x001D、0x001E、0x001F
、0x0020、0x0021、0x0022、0x0023、0x0024、0x0025、0x0026、0x0027、0x0028、0x0029、0x002A、0x002B、0x002C、0x002D、0x002E、0x002F
,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
、0x0040、0x0041、0x0042、0x0043、0x0044、0x0045、0x0046、0x0047、0x0048、0x0049、0x004A、0x004B、0x004C、0x004D、0x004E、0x004F
、0x0050、0x0051、0x0052、0x0053、0x0054、0x0055、0x0056、0x0057、0x0058、0x0059、0x005A、0x005B、0x005C、0x005D、0x005E、0x005F
,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
、0x0070、0x0071、0x0072、0x0073、0x0074、0x0075、0x0076、0x0077、0x0078、0x0079、0x007A、0x007B、0x007C、0x007D、0x007E、0x007F
,0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F
、0x0090、0x2018、0x2019、0x201C、0x201D、0x2022、0x2013、0x2014、0x02DC、0x2122、0x0161、0x203A、0x0153、0x009D、0x017E、0x0178
、0x00A0、0x00A1、0x00A2、0x00A3、0x00A4、0x00A5、0x00A6、0x00A7、0x00A8、0x00A9、0x00AA、0x00AB、0x00AC、0x00AD、0x00AE、0x00AF
,0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF
、0x00C0、0x00C1、0x00C2、0x00C3、0x00C4、0x00C5、0x00C6、0x00C7、0x00C8、0x00C9、0x00CA、0x00CB、0x00CC、0x00CD、0x00CE、0x00CF
,0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF
,0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF
,0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
};
Map Map=newhashmap();
对于(int i=0;i
相反的是:

public static String getStringModifiedW1252( byte[] bytes ) {

    final int[] windows1252 = {
            0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F
            ,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F
            ,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F
            ,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
            ,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F
            ,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F
            ,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
            ,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F
            ,0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F
            ,0x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,0x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178
            ,0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF
            ,0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF
            ,0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF
            ,0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF
            ,0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF
            ,0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
        };

    StringBuilder ret = new StringBuilder(bytes.length);

    for( int i = 0; i < bytes.length; ++i ) {
        ret.append( (char) windows1252[(bytes[i] < 0 ? 256 + bytes[i] : bytes[i] )] );
    }

    return ret.toString();

}

public static void main(String args[]) throws UnsupportedEncodingException {
    String str = "Đỗ tiến";
    String w1252 = getStringModifiedW1252( str.getBytes("UTF-8"));
    System.out.println(w1252);
    //Äá»— tiến
}
公共静态字符串getStringModifiedW1252(字节[]字节){
最终整数[]窗口1252={
0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A、0x000B、0x000C、0x000D、0x000E、0x000F
、0x0010、0x0011、0x0012、0x0013、0x0014、0x0015、0x0016、0x0017、0x0018、0x0019、0x001A、0x001B、0x001C、0x001D、0x001E、0x001F
、0x0020、0x0021、0x0022、0x0023、0x0024、0x0025、0x0026、0x0027、0x0028、0x0029、0x002A、0x002B、0x002C、0x002D、0x002E、0x002F
,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
、0x0040、0x0041、0x0042、0x0043、0x0044、0x0045、0x0046、0x0047、0x0048、0x0049、0x004A、0x004B、0x004C、0x004D、0x004E、0x004F
、0x0050、0x0051、0x0052、0x0053、0x0054、0x0055、0x0056、0x0057、0x0058、0x0059、0x005A、0x005B、0x005C、0x005D、0x005E、0x005F
,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
、0x0070、0x0071、0x0072、0x0073、0x0074、0x0075、0x0076、0x0077、0x0078、0x0079、0x007A、0x007B、0x007C、0x007D、0x007E、0x007F
,0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F
、0x0090、0x2018、0x2019、0x201C、0x201D、0x2022、0x2013、0x2014、0x02DC、0x2122、0x0161、0x203A、0x0153、0x009D、0x017E、0x0178
、0x00A0、0x00A1、0x00A2、0x00A3、0x00A4、0x00A5、0x00A6、0x00A7、0x00A8、0x00A9、0x00AA、0x00AB、0x00AC、0x00AD、0x00AE、0x00AF
,0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF
、0x00C0、0x00C1、0x00C2、0x00C3、0x00C4、0x00C5、0x00C6、0x00C7、0x00C8、0x00C9、0x00CA、0x00CB、0x00CC、0x00CD、0x00CE、0x00CF
,0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,0x00D8,0x00D9,0