Java 谷歌差异匹配补丁不';我不能在某些文件上正常工作

Java 谷歌差异匹配补丁不';我不能在某些文件上正常工作,java,Java,我正在使用google diff match补丁与文本文件进行比较。第一个输入是xml文件,第二个输入是xml文件,首先我规范化了两个文件的文本。它适用于许多文件,但在某些文件上工作不正常,这表明这两个文件完全不同。但事实并非如此。我的代码是: package com.java.contentMatcher; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.u

我正在使用google diff match补丁与文本文件进行比较。第一个输入是xml文件,第二个输入是xml文件,首先我规范化了两个文件的文本。它适用于许多文件,但在某些文件上工作不正常,这表明这两个文件完全不同。但事实并非如此。我的代码是:

package com.java.contentMatcher;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Properties;
import java.util.Scanner;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import com.java.diff.diff_match_patch;
import com.java.diff.diff_match_patch.Diff;
import com.java.diff.diff_match_patch.Operation;
public class SGL_ContentMatcher {
public static Properties CONFIG_PROPS;
public static String input_path1, input_path2, compaired_file;
public static String text1 = "";
public static void main(String[] args) throws IOException {

    Scanner scanIn = new Scanner(System.in);
    System.out.println("Enter  Html Input Directory : ");
    input_path1 = scanIn.nextLine();

    System.out.println("Enter  XML Input Directory : ");
    input_path2 = scanIn.nextLine();

    System.out.println("Enter  Output Directory : ");
    compaired_file = scanIn.nextLine();
    LinkedList<File> input_list = (LinkedList<File>) FileUtils.listFiles(new File(input_path1),
            TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
    LinkedList<File> output_list = (LinkedList<File>) FileUtils.listFiles(new File(input_path2),
            TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
    for (Iterator iterator = input_list.iterator(); iterator.hasNext();) {
        File file = (File) iterator.next();

        for (Iterator iterator2 = output_list.iterator(); iterator2.hasNext();) {
            File file2 = (File) iterator2.next();

            String f1 = FilenameUtils.getExtension(file.getName());
            String f2 = FilenameUtils.getExtension(file2.getName());

            if (f1.equals("html") && f2.equals("xml")) {
                String filename1, filename2;

                int end = file.getName().lastIndexOf(FilenameUtils.getExtension(file.getName()));
                filename1 = file.getName().substring(0, end);

                end = file2.getName().lastIndexOf(FilenameUtils.getExtension(file2.getName()));
                filename2 = file2.getName().substring(0, end);
                System.out.println(filename1 + "---------------" + filename2);

                if (filename1.equals(filename2)) {

                    System.out.println("file1" + file.getName() + "File2" + file2.getName());
                    // first file
                    xmlInput = xmlInput.replaceAll("\\n+|\\r+|\\t+", " ").replaceAll("<.*?>", " ")
                            .replaceAll("&#[a-zA-Z0-9]+;", " ").replaceAll("&[a-zA-Z0-9]+;", " ")
                            .replaceAll("\\s+", " ");

                    String xmlInput2 = FileUtils.readFileToString(file2);

                    xmlInput2 = xmlInput2.replaceAll("\\n+|\\r+|\\t+", " ").replaceAll("<.*?>", " ")
                            .replaceAll("&#[a-zA-Z0-9]+;", " ").replaceAll("&[a-zA-Z0-9]+;", " ")
                            .replaceAll("\\s+", " ");

                    diff_match_patch diff = new diff_match_patch();
                    LinkedList<Diff> diffrences = diff.diff_main(xmlInput, xmlInput2);
                    String str = diff.diff_prettyHtml(diffrences);

                    FileUtils.writeStringToFile(new File(compaired_file + File.separator + file.getName()), str);

                    // String text1 = "";
                    String text2 = "";
                    String text3 = "";
                    for (Diff d : diffrences) {
                        if (d.operation == Operation.DELETE) {
                            text1 = text1 + " " + d.text;

                            System.err.println("text1" + text1 + "text2" + text2);
                        } else if (d.operation == Operation.INSERT) {
                            text2 = text2 + " " + d.text;
                            // System.err.println("text1"+text1+"text2"+text2);
                        } else if (d.operation == Operation.EQUAL) {
                            text3 = text3 + " " + d.text;
                        }

                    }
                   text1 = text1 + " ---->" + file2.getName();
                }
            }
        }
    }
    FileUtils.writeStringToFile(new File(compaired_file + "/UnMatched.txt"), text1);
}

}
package com.java.contentMatcher;
导入java.io.File;
导入java.io.FileInputStream;
导入java.io.IOException;
导入java.util.Iterator;
导入java.util.LinkedList;
导入java.util.Properties;
导入java.util.Scanner;
导入org.apache.commons.io.FileUtils;
导入org.apache.commons.io.FilenameUtils;
导入org.apache.commons.io.filefilter.TrueFileFilter;
导入com.java.diff.diff_匹配_补丁;
导入com.java.diff.diff_match_patch.diff;
导入com.java.diff.diff_match_patch.Operation;
公共类SGL_内容匹配器{
公共静态属性配置道具;
公共静态字符串输入路径1、输入路径2、比较文件;
公共静态字符串text1=“”;
公共静态void main(字符串[]args)引发IOException{
Scanner scanIn=新扫描仪(System.in);
System.out.println(“输入Html输入目录:”);
input_path1=scanIn.nextLine();
System.out.println(“输入XML输入目录:”);
input_path2=scanIn.nextLine();
System.out.println(“输入输出目录:”);
compaired_file=scanIn.nextLine();
LinkedList输入\列表=(LinkedList)FileUtils.listFiles(新文件(输入\路径1),
TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);
LinkedList输出\列表=(LinkedList)FileUtils.listFiles(新文件(输入\路径2),
TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);
for(Iterator Iterator=input_list.Iterator();Iterator.hasNext();){
File=(File)iterator.next();
for(迭代器迭代器2=output_list.Iterator();迭代器2.hasNext();){
File file2=(File)iterator2.next();
字符串f1=FilenameUtils.getExtension(file.getName());
字符串f2=FilenameUtils.getExtension(file2.getName());
if(f1.equals(“html”)和&f2.equals(“xml”)){
字符串filename1,filename2;
int end=file.getName().lastIndexOf(FilenameUtils.getExtension(file.getName());
filename1=file.getName().substring(0,end);
end=file2.getName().lastIndexOf(FilenameUtils.getExtension(file2.getName());
filename2=file2.getName().substring(0,end);
System.out.println(filename1+“--------------”+filename2);
if(filename1.equals(filename2)){
System.out.println(“file1”+file.getName()+“File2”+File2.getName());
//第一档
xmlInput=xmlInput.replaceAll(“\\n+|\\r+|\\t+”,”).replaceAll(“,”)
.replaceAll(&#[a-zA-Z0-9]+;”).replaceAll(&[a-zA-Z0-9]+;”,“”)
.replaceAll(“\\s+”,”);
字符串xmlInput2=FileUtils.readFileToString(file2);
xmlInput2=xmlInput2.replaceAll(“\\n+\\r+\\t+”,”).replaceAll(“,”)
.replaceAll(&#[a-zA-Z0-9]+;”).replaceAll(&[a-zA-Z0-9]+;”,“”)
.replaceAll(“\\s+”,”);
diff_match_patch diff=新的diff_match_patch();
LinkedList Differences=diff.diff_main(xmlInput,xmlInput2);
字符串str=diff.diff_prettyHtml(差异);
FileUtils.writeStringToFile(新文件(compaired_File+File.separator+File.getName()),str);
//字符串text1=“”;
字符串text2=“”;
字符串text3=“”;
用于(差异d:差异){
if(d.operation==operation.DELETE){
text1=text1+“”+d.text;
System.err.println(“text1”+text1+“text2”+text2);
}else if(d.operation==operation.INSERT){
text2=text2+“”+d.text;
//System.err.println(“text1”+text1+“text2”+text2);
}else if(d.operation==operation.EQUAL){
text3=text3+“”+d.text;
}
}
text1=text1+“--->”+file2.getName();
}
}
}
}
FileUtils.writeStringToFile(新文件(compaired_File+“/UnMatched.txt”),text1;
}
}
google diff match修补程序api位于以下位置:
https://code.google.com/p/google-diff-match-patch/downloads/detail?name=diff_match_patch_20121119.zip
我的输入链接是:
https://drive.google.com/open?id=0B9sBq5p886c4VC1IOExLZnllcm8