Java 谷歌差异匹配补丁不';我不能在某些文件上正常工作
我正在使用google diff match补丁与文本文件进行比较。第一个输入是xml文件,第二个输入是xml文件,首先我规范化了两个文件的文本。它适用于许多文件,但在某些文件上工作不正常,这表明这两个文件完全不同。但事实并非如此。我的代码是:Java 谷歌差异匹配补丁不';我不能在某些文件上正常工作,java,Java,我正在使用google diff match补丁与文本文件进行比较。第一个输入是xml文件,第二个输入是xml文件,首先我规范化了两个文件的文本。它适用于许多文件,但在某些文件上工作不正常,这表明这两个文件完全不同。但事实并非如此。我的代码是: package com.java.contentMatcher; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.u
package com.java.contentMatcher;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Properties;
import java.util.Scanner;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.filefilter.TrueFileFilter;
import com.java.diff.diff_match_patch;
import com.java.diff.diff_match_patch.Diff;
import com.java.diff.diff_match_patch.Operation;
public class SGL_ContentMatcher {
public static Properties CONFIG_PROPS;
public static String input_path1, input_path2, compaired_file;
public static String text1 = "";
public static void main(String[] args) throws IOException {
Scanner scanIn = new Scanner(System.in);
System.out.println("Enter Html Input Directory : ");
input_path1 = scanIn.nextLine();
System.out.println("Enter XML Input Directory : ");
input_path2 = scanIn.nextLine();
System.out.println("Enter Output Directory : ");
compaired_file = scanIn.nextLine();
LinkedList<File> input_list = (LinkedList<File>) FileUtils.listFiles(new File(input_path1),
TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
LinkedList<File> output_list = (LinkedList<File>) FileUtils.listFiles(new File(input_path2),
TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE);
for (Iterator iterator = input_list.iterator(); iterator.hasNext();) {
File file = (File) iterator.next();
for (Iterator iterator2 = output_list.iterator(); iterator2.hasNext();) {
File file2 = (File) iterator2.next();
String f1 = FilenameUtils.getExtension(file.getName());
String f2 = FilenameUtils.getExtension(file2.getName());
if (f1.equals("html") && f2.equals("xml")) {
String filename1, filename2;
int end = file.getName().lastIndexOf(FilenameUtils.getExtension(file.getName()));
filename1 = file.getName().substring(0, end);
end = file2.getName().lastIndexOf(FilenameUtils.getExtension(file2.getName()));
filename2 = file2.getName().substring(0, end);
System.out.println(filename1 + "---------------" + filename2);
if (filename1.equals(filename2)) {
System.out.println("file1" + file.getName() + "File2" + file2.getName());
// first file
xmlInput = xmlInput.replaceAll("\\n+|\\r+|\\t+", " ").replaceAll("<.*?>", " ")
.replaceAll("&#[a-zA-Z0-9]+;", " ").replaceAll("&[a-zA-Z0-9]+;", " ")
.replaceAll("\\s+", " ");
String xmlInput2 = FileUtils.readFileToString(file2);
xmlInput2 = xmlInput2.replaceAll("\\n+|\\r+|\\t+", " ").replaceAll("<.*?>", " ")
.replaceAll("&#[a-zA-Z0-9]+;", " ").replaceAll("&[a-zA-Z0-9]+;", " ")
.replaceAll("\\s+", " ");
diff_match_patch diff = new diff_match_patch();
LinkedList<Diff> diffrences = diff.diff_main(xmlInput, xmlInput2);
String str = diff.diff_prettyHtml(diffrences);
FileUtils.writeStringToFile(new File(compaired_file + File.separator + file.getName()), str);
// String text1 = "";
String text2 = "";
String text3 = "";
for (Diff d : diffrences) {
if (d.operation == Operation.DELETE) {
text1 = text1 + " " + d.text;
System.err.println("text1" + text1 + "text2" + text2);
} else if (d.operation == Operation.INSERT) {
text2 = text2 + " " + d.text;
// System.err.println("text1"+text1+"text2"+text2);
} else if (d.operation == Operation.EQUAL) {
text3 = text3 + " " + d.text;
}
}
text1 = text1 + " ---->" + file2.getName();
}
}
}
}
FileUtils.writeStringToFile(new File(compaired_file + "/UnMatched.txt"), text1);
}
}
package com.java.contentMatcher;
导入java.io.File;
导入java.io.FileInputStream;
导入java.io.IOException;
导入java.util.Iterator;
导入java.util.LinkedList;
导入java.util.Properties;
导入java.util.Scanner;
导入org.apache.commons.io.FileUtils;
导入org.apache.commons.io.FilenameUtils;
导入org.apache.commons.io.filefilter.TrueFileFilter;
导入com.java.diff.diff_匹配_补丁;
导入com.java.diff.diff_match_patch.diff;
导入com.java.diff.diff_match_patch.Operation;
公共类SGL_内容匹配器{
公共静态属性配置道具;
公共静态字符串输入路径1、输入路径2、比较文件;
公共静态字符串text1=“”;
公共静态void main(字符串[]args)引发IOException{
Scanner scanIn=新扫描仪(System.in);
System.out.println(“输入Html输入目录:”);
input_path1=scanIn.nextLine();
System.out.println(“输入XML输入目录:”);
input_path2=scanIn.nextLine();
System.out.println(“输入输出目录:”);
compaired_file=scanIn.nextLine();
LinkedList输入\列表=(LinkedList)FileUtils.listFiles(新文件(输入\路径1),
TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);
LinkedList输出\列表=(LinkedList)FileUtils.listFiles(新文件(输入\路径2),
TrueFileFilter.INSTANCE,TrueFileFilter.INSTANCE);
for(Iterator Iterator=input_list.Iterator();Iterator.hasNext();){
File=(File)iterator.next();
for(迭代器迭代器2=output_list.Iterator();迭代器2.hasNext();){
File file2=(File)iterator2.next();
字符串f1=FilenameUtils.getExtension(file.getName());
字符串f2=FilenameUtils.getExtension(file2.getName());
if(f1.equals(“html”)和&f2.equals(“xml”)){
字符串filename1,filename2;
int end=file.getName().lastIndexOf(FilenameUtils.getExtension(file.getName());
filename1=file.getName().substring(0,end);
end=file2.getName().lastIndexOf(FilenameUtils.getExtension(file2.getName());
filename2=file2.getName().substring(0,end);
System.out.println(filename1+“--------------”+filename2);
if(filename1.equals(filename2)){
System.out.println(“file1”+file.getName()+“File2”+File2.getName());
//第一档
xmlInput=xmlInput.replaceAll(“\\n+|\\r+|\\t+”,”).replaceAll(“,”)
.replaceAll([a-zA-Z0-9]+;”).replaceAll(&[a-zA-Z0-9]+;”,“”)
.replaceAll(“\\s+”,”);
字符串xmlInput2=FileUtils.readFileToString(file2);
xmlInput2=xmlInput2.replaceAll(“\\n+\\r+\\t+”,”).replaceAll(“,”)
.replaceAll([a-zA-Z0-9]+;”).replaceAll(&[a-zA-Z0-9]+;”,“”)
.replaceAll(“\\s+”,”);
diff_match_patch diff=新的diff_match_patch();
LinkedList Differences=diff.diff_main(xmlInput,xmlInput2);
字符串str=diff.diff_prettyHtml(差异);
FileUtils.writeStringToFile(新文件(compaired_File+File.separator+File.getName()),str);
//字符串text1=“”;
字符串text2=“”;
字符串text3=“”;
用于(差异d:差异){
if(d.operation==operation.DELETE){
text1=text1+“”+d.text;
System.err.println(“text1”+text1+“text2”+text2);
}else if(d.operation==operation.INSERT){
text2=text2+“”+d.text;
//System.err.println(“text1”+text1+“text2”+text2);
}else if(d.operation==operation.EQUAL){
text3=text3+“”+d.text;
}
}
text1=text1+“--->”+file2.getName();
}
}
}
}
FileUtils.writeStringToFile(新文件(compaired_File+“/UnMatched.txt”),text1;
}
}
google diff match修补程序api位于以下位置:
https://code.google.com/p/google-diff-match-patch/downloads/detail?name=diff_match_patch_20121119.zip
我的输入链接是:
https://drive.google.com/open?id=0B9sBq5p886c4VC1IOExLZnllcm8