Java 从xml中读取和解析汉字

Java 从xml中读取和解析汉字,java,character-encoding,Java,Character Encoding,当我从xml文件中读取汉字时,我得到了一些非法或编码错误的字符。我甚至无法使用DOM/SAX解析xml文件。我试图指定编码“UTF-8”,但仍然没有得到正确的输出。有时我会得到问号(?)而不是汉字 我的要求是,我有一个带有中文字符的xml文件。我需要读取并解析文件中的汉字,然后我需要将其按原样放回另一个文件中。 请帮我解决这个问题。这是我的密码 TestMain.java import java.io.BufferedReader; import java.io.ByteArrayInputSt

当我从xml文件中读取汉字时,我得到了一些非法或编码错误的字符。我甚至无法使用DOM/SAX解析xml文件。我试图指定编码“UTF-8”,但仍然没有得到正确的输出。有时我会得到问号(?)而不是汉字

我的要求是,我有一个带有中文字符的xml文件。我需要读取并解析文件中的汉字,然后我需要将其按原样放回另一个文件中。 请帮我解决这个问题。这是我的密码

TestMain.java

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;

public class TestMain {
    public static void main(String[] args) throws Exception {
        // TODO Auto-generated method stub
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream("C:\\temp\\myInputFile.txt")));
        StringBuilder out = new StringBuilder();
        String line;
        while ((line = reader.readLine()) != null) {
            out.append(line);
        }
        reader.close();
        System.out.println(out.toString());
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new ByteArrayInputStream(out.toString().getBytes("UTF-8")));
        DOMSource domSource = new DOMSource(doc);
        StringWriter writer = new StringWriter();
        StreamResult result = new StreamResult(writer);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer transformer = tf.newTransformer();
        transformer.transform(domSource, result);
        JAXBContext context = JAXBContext.newInstance(Sender.class);
        Unmarshaller unmarshaller = context.createUnmarshaller();
        Sender sender = (Sender) unmarshaller.unmarshal(new ByteArrayInputStream(writer.toString().getBytes("UTF-8")));
        System.out.println(sender.toString());
        FileOutputStream fos = new FileOutputStream("C:\\temp\\myOutputFile.txt");
        fos.write(sender.toString().getBytes());
        fos.flush();
        fos.close();
    }
}
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlType;

@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "", propOrder = { "name" })
@XmlRootElement(name = "sender")
public class Sender {
    @XmlElement(required = true)
    protected String name;
    public String getName() {
        return name;
    }
    public void setName(String value) {
        this.name = value;
    }
    @Override
    public String toString() {
        // TODO Auto-generated method stub
        return "<sender><name>"+this.name+"</name></sender>";
    }
}
Sender.java

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.Unmarshaller;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;

public class TestMain {
    public static void main(String[] args) throws Exception {
        // TODO Auto-generated method stub
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream("C:\\temp\\myInputFile.txt")));
        StringBuilder out = new StringBuilder();
        String line;
        while ((line = reader.readLine()) != null) {
            out.append(line);
        }
        reader.close();
        System.out.println(out.toString());
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder builder = factory.newDocumentBuilder();
        Document doc = builder.parse(new ByteArrayInputStream(out.toString().getBytes("UTF-8")));
        DOMSource domSource = new DOMSource(doc);
        StringWriter writer = new StringWriter();
        StreamResult result = new StreamResult(writer);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer transformer = tf.newTransformer();
        transformer.transform(domSource, result);
        JAXBContext context = JAXBContext.newInstance(Sender.class);
        Unmarshaller unmarshaller = context.createUnmarshaller();
        Sender sender = (Sender) unmarshaller.unmarshal(new ByteArrayInputStream(writer.toString().getBytes("UTF-8")));
        System.out.println(sender.toString());
        FileOutputStream fos = new FileOutputStream("C:\\temp\\myOutputFile.txt");
        fos.write(sender.toString().getBytes());
        fos.flush();
        fos.close();
    }
}
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlType;

@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "", propOrder = { "name" })
@XmlRootElement(name = "sender")
public class Sender {
    @XmlElement(required = true)
    protected String name;
    public String getName() {
        return name;
    }
    public void setName(String value) {
        this.name = value;
    }
    @Override
    public String toString() {
        // TODO Auto-generated method stub
        return "<sender><name>"+this.name+"</name></sender>";
    }
}
import javax.xml.bind.annotation.XmlAccessType;
导入javax.xml.bind.annotation.XmlAccessorType;
导入javax.xml.bind.annotation.xmlement;
导入javax.xml.bind.annotation.XmlRootElement;
导入javax.xml.bind.annotation.XmlType;
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name=”“,proporter={“name”})
@XmlRootElement(name=“sender”)
公共类发送器{
@XmlElement(必需=true)
受保护的字符串名称;
公共字符串getName(){
返回名称;
}
公共void集合名(字符串值){
this.name=值;
}
@凌驾
公共字符串toString(){
//TODO自动生成的方法存根
返回“+this.name+”;
}
}
myInputFile.txt

<sender><name>奥迪普时装(深圳)有限公司</name></sender>
<sender><name>奥迪普时装(深圳)有陿公忸</name></sender>
奥迪普时装(深圳)有限公司
myOutputFile.txt

<sender><name>奥迪普时装(深圳)有限公司</name></sender>
<sender><name>奥迪普时装(深圳)有陿公忸</name></sender>
奥迪普时装(深圳)有陿公忸

在输出文件中,我们可以看到从右到左的第一个和第三个字符的差异。

我已经找到了解决方案

我们需要在读取文件时对inputstream使用UTF-8字符集编码,并且需要使用PrintStream为outputstream设置UTF-8编码

//While reading the file
BufferedReader reader = new BufferedReader(new InputStreamReader(new 
FileInputStream("C:\\temp\\myInputFile.txt"), "UTF-8"));

//While writing the file
PrintStream ps = new PrintStream(fos, true, "UTF-8");
ps.print(sender.toString());
ps.close();