从ANSI到UTF8的Java文件编码转换
我需要将文件的编码从ANSI(windows-1252)更改为UTF8。我写了下面的程序来通过java实现它。这个程序将字符转换为UTF8,但是当我在notepad++中打开文件时,编码类型显示为ANSI UTF8。这使我在access db中导入此文件时出错。只需要UTF8编码的文件。此外,还需要转换文件,而无需在任何编辑器中打开它从ANSI到UTF8的Java文件编码转换,java,file,encoding,Java,File,Encoding,我需要将文件的编码从ANSI(windows-1252)更改为UTF8。我写了下面的程序来通过java实现它。这个程序将字符转换为UTF8,但是当我在notepad++中打开文件时,编码类型显示为ANSI UTF8。这使我在access db中导入此文件时出错。只需要UTF8编码的文件。此外,还需要转换文件,而无需在任何编辑器中打开它 public class ConvertFromAnsiToUtf8 { private static final char BYTE_ORDER_MA
public class ConvertFromAnsiToUtf8 {
private static final char BYTE_ORDER_MARK = '\uFEFF';
private static final String ANSI_CODE = "windows-1252";
private static final String UTF_CODE = "UTF8";
private static final Charset ANSI_CHARSET = Charset.forName(ANSI_CODE);
public static void main(String[] args) {
List<File> fileList;
File inputFolder = new File(args[0]);
if (!inputFolder.isDirectory()) {
return;
}
File parentDir = new File(inputFolder.getParent() + "\\"
+ inputFolder.getName() + "_converted");
if (parentDir.exists()) {
return;
}
if (parentDir.mkdir()) {
} else {
return;
}
fileList = new ArrayList<File>();
for (final File fileEntry : inputFolder.listFiles()) {
fileList.add(fileEntry);
}
InputStream in;
Reader reader = null;
Writer writer = null;
try {
for (File file : fileList) {
in = new FileInputStream(file.getAbsoluteFile());
reader = new InputStreamReader(in, ANSI_CHARSET);
OutputStream out = new FileOutputStream(
parentDir.getAbsoluteFile() + "\\"
+ file.getName());
writer = new OutputStreamWriter(out, UTF_CODE);
writer.write(BYTE_ORDER_MARK);
char[] buffer = new char[10];
int read;
while ((read = reader.read(buffer)) != -1) {
System.out.println(read);
writer.write(buffer, 0, read);
}
}
reader.close();
writer.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
公共类ConvertFromAnsiToUtf8{
私有静态最终字符字节\顺序\标记='\uFEFF';
私有静态最终字符串ANSI_CODE=“windows-1252”;
私有静态最终字符串UTF_CODE=“UTF8”;
私有静态最终字符集ANSI_Charset=Charset.forName(ANSI_代码);
公共静态void main(字符串[]args){
列表文件列表;
File inputFolder=新文件(args[0]);
如果(!inputFolder.isDirectory()){
返回;
}
File parentDir=新文件(inputFolder.getParent()+“\\”
+inputFolder.getName()+“_转换”);
if(parentDir.exists()){
返回;
}
if(parentDir.mkdir()){
}否则{
返回;
}
fileList=newarraylist();
对于(最终文件条目:inputFolder.listFiles()){
fileList.add(fileEntry);
}
输入流输入;
Reader=null;
Writer=null;
试一试{
用于(文件:文件列表){
in=新文件输入流(file.getAbsoluteFile());
reader=新的InputStreamReader(in,ANSI_字符集);
OutputStream out=新文件OutputStream(
parentDir.getAbsoluteFile()+“\\”
+getName());
writer=新的OutputStreamWriter(输出,UTF_代码);
writer.write(字节顺序标记);
char[]buffer=新字符[10];
int-read;
while((read=reader.read(buffer))!=-1){
系统输出打印项次(读取);
writer.write(缓冲区,0,读取);
}
}
reader.close();
writer.close();
}catch(filenotfounde异常){
e、 printStackTrace();
}捕获(不支持的编码异常e){
e、 printStackTrace();
}捕获(IOE异常){
e、 printStackTrace();
}
}
}
任何提示都会有帮助
谢谢,
Ashish发布的代码正确地从windows-1252转码到UTF-8 Notepad++消息令人困惑,因为“ANSI as UTF-8”没有明显的含义;它似乎是记事本++中的一个。我相信Notepad++意味着没有BOM的UTF-8(参见编码菜单) Microsoft Access作为一个Windows程序,可能希望UTF-8文件以字节顺序标记()开头 通过在文件开头写入代码点U+FEFF,可以将BOM表插入到文档中:
import java.io.*;
import java.nio.charset.*;
public class Ansi1252ToUtf8 {
private static final char BYTE_ORDER_MARK = '\uFEFF';
public static void main(String[] args) throws IOException {
Charset windows1252 = Charset.forName("windows-1252");
try (InputStream in = new FileInputStream(args[0]);
Reader reader = new InputStreamReader(in, windows1252);
OutputStream out = new FileOutputStream(args[1]);
Writer writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
writer.write(BYTE_ORDER_MARK);
char[] buffer = new char[1024];
int read;
while ((read = reader.read(buffer)) != -1) {
writer.write(buffer, 0, read);
}
}
}
}
在运行Java8的Windows7(64位)上,我必须关闭每个文件。否则,文件将被截断为4KB的倍数。仅仅关闭最后一组文件是不够的,我必须关闭每个文件才能得到想要的结果。发布添加错误消息的我的修改版本:
import java.io.*;
import java.nio.charset.*;
import java.util.ArrayList;
public class ConvertFromAnsiToUtf8 {
private static final char BYTE_ORDER_MARK = '\uFEFF';
private static final String ANSI_CODE = "windows-1252";
private static final String UTF_CODE = "UTF8";
private static final Charset ANSI_CHARSET = Charset.forName(ANSI_CODE);
private static final String PATH_SEP = "\\";
private static final boolean WRITE_BOM = false;
public static void main(String[] args)
{
if (args.length != 2) {
System.out.println("Please name a source and a target directory");
return;
}
File inputFolder = new File(args[0]);
if (!inputFolder.isDirectory()) {
System.out.println("Input folder " + inputFolder + " does not exist");
return;
}
File outputFolder = new File(args[1]);
if (outputFolder.exists()) {
System.out.println("Folder " + outputFolder + " exists - aborting");
return;
}
if (outputFolder.mkdir()) {
System.out.println("Placing converted files in " + outputFolder);
} else {
System.out.println("Output folder " + outputFolder + " exists - aborting");
return;
}
ArrayList<File> fileList = new ArrayList<File>();
for (final File fileEntry : inputFolder.listFiles()) {
fileList.add(fileEntry);
}
InputStream in;
Reader reader = null;
Writer writer = null;
int converted = 0;
try {
for (File file : fileList) {
try {
in = new FileInputStream(file.getAbsoluteFile());
reader = new InputStreamReader(in, ANSI_CHARSET);
OutputStream out = new FileOutputStream(outputFolder.getAbsoluteFile() + PATH_SEP + file.getName());
writer = new OutputStreamWriter(out, UTF_CODE);
if (WRITE_BOM)
writer.write(BYTE_ORDER_MARK);
char[] buffer = new char[1024];
int read;
while ((read = reader.read(buffer)) != -1) {
writer.write(buffer, 0, read);
}
++converted;
} finally {
reader.close();
writer.close();
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(converted + " files converted");
}
}
import java.io.*;
导入java.nio.charset.*;
导入java.util.ArrayList;
公共类ConvertFromAnsiToUtf8{
私有静态最终字符字节\顺序\标记='\uFEFF';
私有静态最终字符串ANSI_CODE=“windows-1252”;
私有静态最终字符串UTF_CODE=“UTF8”;
私有静态最终字符集ANSI_Charset=Charset.forName(ANSI_代码);
私有静态最终字符串路径\u SEP=“\\”;
私有静态最终布尔写_BOM=false;
公共静态void main(字符串[]args)
{
如果(参数长度!=2){
System.out.println(“请指定源目录和目标目录”);
返回;
}
File inputFolder=新文件(args[0]);
如果(!inputFolder.isDirectory()){
System.out.println(“输入文件夹”+输入文件夹+”不存在);
返回;
}
File outputFolder=新文件(args[1]);
if(outputFolder.exists()){
System.out.println(“文件夹”+outputFolder+“存在-正在中止”);
返回;
}
if(outputFolder.mkdir()){
System.out.println(“将转换的文件放入”+outputFolder);
}否则{
System.out.println(“输出文件夹”+outputFolder+“存在-中止”);
返回;
}
ArrayList fileList=新建ArrayList();
对于(最终文件条目:inputFolder.listFiles()){
fileList.add(fileEntry);
}
输入流输入;
Reader=null;
Writer=null;
int=0;
试一试{
用于(文件:文件列表){
试一试{
in=新文件输入流(file.getAbsoluteFile());
reader=新的InputStreamReader(in,ANSI_字符集);
OutputStream out=新文件OutputStream(outputFolder.getAbsoluteFile()+路径+文件.getName());
writer=新的OutputStreamWriter(输出,UTF_代码);
如果(写入物料清单)
writer.write(字节顺序标记);
char[]buffer=新字符[1024];
int-read;
while((read=reader.read(buffer))!=-1){
writer.write(缓冲区,0,读取);
}
++转化的;
}最后{
reader.close();
writer.close();
}
}
}catch(filenotfounde异常){
e、 printStackTrace();
}捕获(不支持的编码异常e){
e、 printStackTrace();
}捕获(IOE异常){