Java 从xml中读取和解析汉字
当我从xml文件中读取汉字时,我得到了一些非法或编码错误的字符。我甚至无法使用DOM/SAX解析xml文件。我试图指定编码“UTF-8”,但仍然没有得到正确的输出。有时我会得到问号(?)而不是汉字 我的要求是,我有一个带有中文字符的xml文件。我需要读取并解析文件中的汉字,然后我需要将其按原样放回另一个文件中。 请帮我解决这个问题。这是我的密码 TestMain.javaJava 从xml中读取和解析汉字,java,character-encoding,Java,Character Encoding,当我从xml文件中读取汉字时,我得到了一些非法或编码错误的字符。我甚至无法使用DOM/SAX解析xml文件。我试图指定编码“UTF-8”,但仍然没有得到正确的输出。有时我会得到问号(?)而不是汉字 我的要求是,我有一个带有中文字符的xml文件。我需要读取并解析文件中的汉字,然后我需要将其按原样放回另一个文件中。 请帮我解决这个问题。这是我的密码 TestMain.java import java.io.BufferedReader; import java.io.ByteArrayInputSt
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
public class TestMain {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream("C:\\temp\\myInputFile.txt")));
StringBuilder out = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
out.append(line);
}
reader.close();
System.out.println(out.toString());
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new ByteArrayInputStream(out.toString().getBytes("UTF-8")));
DOMSource domSource = new DOMSource(doc);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
JAXBContext context = JAXBContext.newInstance(Sender.class);
Unmarshaller unmarshaller = context.createUnmarshaller();
Sender sender = (Sender) unmarshaller.unmarshal(new ByteArrayInputStream(writer.toString().getBytes("UTF-8")));
System.out.println(sender.toString());
FileOutputStream fos = new FileOutputStream("C:\\temp\\myOutputFile.txt");
fos.write(sender.toString().getBytes());
fos.flush();
fos.close();
}
}
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlType;
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "", propOrder = { "name" })
@XmlRootElement(name = "sender")
public class Sender {
@XmlElement(required = true)
protected String name;
public String getName() {
return name;
}
public void setName(String value) {
this.name = value;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return "<sender><name>"+this.name+"</name></sender>";
}
}
Sender.java
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
public class TestMain {
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream("C:\\temp\\myInputFile.txt")));
StringBuilder out = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
out.append(line);
}
reader.close();
System.out.println(out.toString());
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new ByteArrayInputStream(out.toString().getBytes("UTF-8")));
DOMSource domSource = new DOMSource(doc);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
JAXBContext context = JAXBContext.newInstance(Sender.class);
Unmarshaller unmarshaller = context.createUnmarshaller();
Sender sender = (Sender) unmarshaller.unmarshal(new ByteArrayInputStream(writer.toString().getBytes("UTF-8")));
System.out.println(sender.toString());
FileOutputStream fos = new FileOutputStream("C:\\temp\\myOutputFile.txt");
fos.write(sender.toString().getBytes());
fos.flush();
fos.close();
}
}
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlType;
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "", propOrder = { "name" })
@XmlRootElement(name = "sender")
public class Sender {
@XmlElement(required = true)
protected String name;
public String getName() {
return name;
}
public void setName(String value) {
this.name = value;
}
@Override
public String toString() {
// TODO Auto-generated method stub
return "<sender><name>"+this.name+"</name></sender>";
}
}
import javax.xml.bind.annotation.XmlAccessType;
导入javax.xml.bind.annotation.XmlAccessorType;
导入javax.xml.bind.annotation.xmlement;
导入javax.xml.bind.annotation.XmlRootElement;
导入javax.xml.bind.annotation.XmlType;
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name=”“,proporter={“name”})
@XmlRootElement(name=“sender”)
公共类发送器{
@XmlElement(必需=true)
受保护的字符串名称;
公共字符串getName(){
返回名称;
}
公共void集合名(字符串值){
this.name=值;
}
@凌驾
公共字符串toString(){
//TODO自动生成的方法存根
返回“+this.name+”;
}
}
myInputFile.txt
<sender><name>奥迪普时装(深圳)有限公司</name></sender>
<sender><name>奥迪普时装(深圳)有陿公忸</name></sender>
奥迪普时装(深圳)有限公司
myOutputFile.txt
<sender><name>奥迪普时装(深圳)有限公司</name></sender>
<sender><name>奥迪普时装(深圳)有陿公忸</name></sender>
奥迪普时装(深圳)有陿公忸
在输出文件中,我们可以看到从右到左的第一个和第三个字符的差异。我已经找到了解决方案 我们需要在读取文件时对inputstream使用UTF-8字符集编码,并且需要使用PrintStream为outputstream设置UTF-8编码
//While reading the file
BufferedReader reader = new BufferedReader(new InputStreamReader(new
FileInputStream("C:\\temp\\myInputFile.txt"), "UTF-8"));
//While writing the file
PrintStream ps = new PrintStream(fos, true, "UTF-8");
ps.print(sender.toString());
ps.close();