Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/360.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
从ANSI到UTF8的Java文件编码转换_Java_File_Encoding - Fatal编程技术网

从ANSI到UTF8的Java文件编码转换

从ANSI到UTF8的Java文件编码转换,java,file,encoding,Java,File,Encoding,我需要将文件的编码从ANSI(windows-1252)更改为UTF8。我写了下面的程序来通过java实现它。这个程序将字符转换为UTF8,但是当我在notepad++中打开文件时,编码类型显示为ANSI UTF8。这使我在access db中导入此文件时出错。只需要UTF8编码的文件。此外,还需要转换文件,而无需在任何编辑器中打开它 public class ConvertFromAnsiToUtf8 { private static final char BYTE_ORDER_MA

我需要将文件的编码从ANSI(windows-1252)更改为UTF8。我写了下面的程序来通过java实现它。这个程序将字符转换为UTF8,但是当我在notepad++中打开文件时,编码类型显示为ANSI UTF8。这使我在access db中导入此文件时出错。只需要UTF8编码的文件。此外,还需要转换文件,而无需在任何编辑器中打开它

public class ConvertFromAnsiToUtf8 {

    private static final char BYTE_ORDER_MARK = '\uFEFF';
    private static final String ANSI_CODE = "windows-1252";
    private static final String UTF_CODE = "UTF8";
    private static final Charset ANSI_CHARSET = Charset.forName(ANSI_CODE);

    public static void main(String[] args) {

        List<File> fileList;
        File inputFolder = new File(args[0]);
        if (!inputFolder.isDirectory()) {
            return;
        }
        File parentDir = new File(inputFolder.getParent() + "\\"
                    + inputFolder.getName() + "_converted");

        if (parentDir.exists()) {
            return;
        }
        if (parentDir.mkdir()) {

        } else {
            return;
        }

        fileList = new ArrayList<File>();
        for (final File fileEntry : inputFolder.listFiles()) {
            fileList.add(fileEntry);
        }

        InputStream in;

        Reader reader = null;
        Writer writer = null;
        try {
            for (File file : fileList) {
                in = new FileInputStream(file.getAbsoluteFile());
                reader = new InputStreamReader(in, ANSI_CHARSET);

                OutputStream out = new FileOutputStream(
                            parentDir.getAbsoluteFile() + "\\"
                                            + file.getName());
                writer = new OutputStreamWriter(out, UTF_CODE);
                writer.write(BYTE_ORDER_MARK);
                char[] buffer = new char[10];
                int read;
                while ((read = reader.read(buffer)) != -1) {
                    System.out.println(read);
                    writer.write(buffer, 0, read);
                }
            }
            reader.close();
            writer.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
公共类ConvertFromAnsiToUtf8{
私有静态最终字符字节\顺序\标记='\uFEFF';
私有静态最终字符串ANSI_CODE=“windows-1252”;
私有静态最终字符串UTF_CODE=“UTF8”;
私有静态最终字符集ANSI_Charset=Charset.forName(ANSI_代码);
公共静态void main(字符串[]args){
列表文件列表;
File inputFolder=新文件(args[0]);
如果(!inputFolder.isDirectory()){
返回;
}
File parentDir=新文件(inputFolder.getParent()+“\\”
+inputFolder.getName()+“_转换”);
if(parentDir.exists()){
返回;
}
if(parentDir.mkdir()){
}否则{
返回;
}
fileList=newarraylist();
对于(最终文件条目:inputFolder.listFiles()){
fileList.add(fileEntry);
}
输入流输入;
Reader=null;
Writer=null;
试一试{
用于(文件:文件列表){
in=新文件输入流(file.getAbsoluteFile());
reader=新的InputStreamReader(in,ANSI_字符集);
OutputStream out=新文件OutputStream(
parentDir.getAbsoluteFile()+“\\”
+getName());
writer=新的OutputStreamWriter(输出,UTF_代码);
writer.write(字节顺序标记);
char[]buffer=新字符[10];
int-read;
while((read=reader.read(buffer))!=-1){
系统输出打印项次(读取);
writer.write(缓冲区,0,读取);
}
}
reader.close();
writer.close();
}catch(filenotfounde异常){
e、 printStackTrace();
}捕获(不支持的编码异常e){
e、 printStackTrace();
}捕获(IOE异常){
e、 printStackTrace();
}
}
}
任何提示都会有帮助

谢谢,
Ashish

发布的代码正确地从windows-1252转码到UTF-8

Notepad++消息令人困惑,因为“ANSI as UTF-8”没有明显的含义;它似乎是记事本++中的一个。我相信Notepad++意味着没有BOM的UTF-8(参见编码菜单)

Microsoft Access作为一个Windows程序,可能希望UTF-8文件以字节顺序标记()开头

通过在文件开头写入代码点U+FEFF,可以将BOM表插入到文档中:

import java.io.*;
import java.nio.charset.*;

public class Ansi1252ToUtf8 {
  private static final char BYTE_ORDER_MARK = '\uFEFF';

  public static void main(String[] args) throws IOException {
    Charset windows1252 = Charset.forName("windows-1252");
    try (InputStream in = new FileInputStream(args[0]);
        Reader reader = new InputStreamReader(in, windows1252);
        OutputStream out = new FileOutputStream(args[1]);
        Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
      writer.write(BYTE_ORDER_MARK);
      char[] buffer = new char[1024];
      int read;
      while ((read = reader.read(buffer)) != -1) {
        writer.write(buffer, 0, read);
      }
    }
  }
}
在运行Java8的Windows7(64位)上,我必须关闭每个文件。否则,文件将被截断为4KB的倍数。仅仅关闭最后一组文件是不够的,我必须关闭每个文件才能得到想要的结果。发布添加错误消息的我的修改版本:

import java.io.*;
import java.nio.charset.*;
import java.util.ArrayList;

public class ConvertFromAnsiToUtf8 {

    private static final char BYTE_ORDER_MARK = '\uFEFF';
    private static final String ANSI_CODE = "windows-1252";
    private static final String UTF_CODE = "UTF8";
    private static final Charset ANSI_CHARSET = Charset.forName(ANSI_CODE);
    private static final String PATH_SEP = "\\";
    private static final boolean WRITE_BOM = false;

    public static void main(String[] args) 
    {
        if (args.length != 2) {
            System.out.println("Please name a source and a target directory");
            return;
        }

        File inputFolder = new File(args[0]);
        if (!inputFolder.isDirectory()) {
            System.out.println("Input folder " + inputFolder + " does not exist");
            return;
        }
        File outputFolder = new File(args[1]);

        if (outputFolder.exists()) {
            System.out.println("Folder " + outputFolder + " exists - aborting");
            return;
        }
        if (outputFolder.mkdir()) {
            System.out.println("Placing converted files in " + outputFolder);
        } else {
            System.out.println("Output folder " + outputFolder + " exists - aborting");
            return;
        }

        ArrayList<File> fileList = new ArrayList<File>();
        for (final File fileEntry : inputFolder.listFiles()) {
            fileList.add(fileEntry);
        }

        InputStream in;
        Reader reader = null;
        Writer writer = null;
        int converted = 0;

        try {
            for (File file : fileList) {
                try {
                    in = new FileInputStream(file.getAbsoluteFile());
                    reader = new InputStreamReader(in, ANSI_CHARSET);

                    OutputStream out = new FileOutputStream(outputFolder.getAbsoluteFile() + PATH_SEP + file.getName());
                    writer = new OutputStreamWriter(out, UTF_CODE);

                    if (WRITE_BOM)
                        writer.write(BYTE_ORDER_MARK);
                    char[] buffer = new char[1024];
                    int read;
                    while ((read = reader.read(buffer)) != -1) {
                        writer.write(buffer, 0, read);
                    }
                    ++converted;
                } finally {
                    reader.close();
                    writer.close();
                }
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        System.out.println(converted + " files converted");
    }

}
import java.io.*;
导入java.nio.charset.*;
导入java.util.ArrayList;
公共类ConvertFromAnsiToUtf8{
私有静态最终字符字节\顺序\标记='\uFEFF';
私有静态最终字符串ANSI_CODE=“windows-1252”;
私有静态最终字符串UTF_CODE=“UTF8”;
私有静态最终字符集ANSI_Charset=Charset.forName(ANSI_代码);
私有静态最终字符串路径\u SEP=“\\”;
私有静态最终布尔写_BOM=false;
公共静态void main(字符串[]args)
{
如果(参数长度!=2){
System.out.println(“请指定源目录和目标目录”);
返回;
}
File inputFolder=新文件(args[0]);
如果(!inputFolder.isDirectory()){
System.out.println(“输入文件夹”+输入文件夹+”不存在);
返回;
}
File outputFolder=新文件(args[1]);
if(outputFolder.exists()){
System.out.println(“文件夹”+outputFolder+“存在-正在中止”);
返回;
}
if(outputFolder.mkdir()){
System.out.println(“将转换的文件放入”+outputFolder);
}否则{
System.out.println(“输出文件夹”+outputFolder+“存在-中止”);
返回;
}
ArrayList fileList=新建ArrayList();
对于(最终文件条目:inputFolder.listFiles()){
fileList.add(fileEntry);
}
输入流输入;
Reader=null;
Writer=null;
int=0;
试一试{
用于(文件:文件列表){
试一试{
in=新文件输入流(file.getAbsoluteFile());
reader=新的InputStreamReader(in,ANSI_字符集);
OutputStream out=新文件OutputStream(outputFolder.getAbsoluteFile()+路径+文件.getName());
writer=新的OutputStreamWriter(输出,UTF_代码);
如果(写入物料清单)
writer.write(字节顺序标记);
char[]buffer=新字符[1024];
int-read;
while((read=reader.read(buffer))!=-1){
writer.write(缓冲区,0,读取);
}
++转化的;
}最后{
reader.close();
writer.close();
}
}
}catch(filenotfounde异常){
e、 printStackTrace();
}捕获(不支持的编码异常e){
e、 printStackTrace();
}捕获(IOE异常){