如何使用java diff utils对java中可能较大的单行文件进行区分?
我正在使用java diff-utils: 它工作得很好,除非一个大文件都在一行中 我使用一个BufferedReader来读取每一行,因此我得到了一个Java堆内存不足异常 如何处理加载大型单行文件以用于java diff-utils 这是我的密码:如何使用java diff utils对java中可能较大的单行文件进行区分?,java,diff,large-files,Java,Diff,Large Files,我正在使用java diff-utils: 它工作得很好,除非一个大文件都在一行中 我使用一个BufferedReader来读取每一行,因此我得到了一个Java堆内存不足异常 如何处理加载大型单行文件以用于java diff-utils 这是我的密码: import difflib.*; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOExceptio
import difflib.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.AbstractList;
import java.util.LinkedList;
import java.util.List;
public class TestDiffUtils {
public TestDiffUtils() {
}
// Helper method to read the files to compare into memory, convert them to a list of Strings which can be used by the DiffUtils library for comparison
private static List fileToLines(String filename) {
List lines = new LinkedList();
String line;
try {
URL path = TestDiffUtils.class.getResource(filename);
File f = new File(path.getFile());
BufferedReader in = new BufferedReader(new FileReader(f));
while ((line = in.readLine()) != null) {
lines.add(line);
}
} catch (IOException e) {
e.printStackTrace();
}
return lines;
}
// Helper method to convert a String to List<Character> : to be used with DiffUtils.diff() method when finding character diffs within the line diffs
public static List<Character> asList(final String string) {
return new AbstractList<Character>() {
public int size() { return string.length(); }
public Character get(int index) { return string.charAt(index); }
};
}
private static void performDiff(String testName) {
String origFileName = null;
String revisedFileName = null;
Boolean continueTest = true; // continue unless we can't
if( testName.compareTo("large-file-test") == 0 )
{
origFileName = "test_large_file.xml";
revisedFileName = "test_large_file_revised.xml";
}else if( testName.compareTo("small-file-test") == 0 ){
origFileName = "originalFile.txt";
revisedFileName = "revisedFile.txt";
}else if( testName.compareTo("large-file-test-single-line") == 0 ){
origFileName = "test_large_file_SingleLine.xml";
revisedFileName = "test_large_file_revised_SingleLine.xml";
}else{
continueTest = false;
}
if( continueTest && origFileName != null && revisedFileName != null ) {
// Convert the orig and revised files to List<String> format that DiffUtils.diff() uses
List<String> originalLines = fileToLines(origFileName);
List<String> revisedLines = fileToLines(revisedFileName);
// Get the line-by-line diffs
Patch patch = DiffUtils.diff(originalLines, revisedLines);
List<Delta> deltas = patch.getDeltas();
for (Delta delta : deltas) {
// The line in the orig file that this current diff occurs at
int lineOfDiffInOrig = delta.getOriginal().getPosition() + 1;
// Only continue with the diff if the revised lines is not empty (we will get IndexOu0tOfBoundsExceptions if we don't do this check)
if (delta.getOriginal().getLines().size() > 0 && delta.getRevised().getLines().size() > 0) {
// Get orig and revised lines in List<Character> format
List<Character> origChars = asList(((String) delta.getOriginal().getLines().get(0)));
List<Character> revisedChars = asList(((String) delta.getRevised().getLines().get(0)));
// Get the character-by-character diffs
Patch deltaPatch = DiffUtils.diff(origChars, revisedChars);
List<Delta> strDeltas = deltaPatch.getDeltas();
for (Delta strDelta : strDeltas) {
int charPosOfDiffInOrig = strDelta.getOriginal().getPosition() + 1;
int lengthOfDiffInOrig = charPosOfDiffInOrig + strDelta.getOriginal().size();
System.out.println("Line" + lineOfDiffInOrig + " : [" + charPosOfDiffInOrig + "," + lengthOfDiffInOrig + "]");
}
}
}
}
}
public static void main(String[] args) {
//performDiff("large-file-test");
//performDiff("small-file-test");
performDiff("large-file-test-single-line");
}
}
导入difflib.*;
导入java.io.BufferedReader;
导入java.io.File;
导入java.io.FileReader;
导入java.io.IOException;
导入java.net.URL;
导入java.util.AbstractList;
导入java.util.LinkedList;
导入java.util.List;
公共类TestDiffUtils{
公共TestDiffUtils(){
}
//Helper方法将要比较的文件读入内存,将它们转换为字符串列表,DiffUtils库可以使用这些字符串进行比较
私有静态列表文件线(字符串文件名){
列表行=新建LinkedList();
弦线;
试一试{
URL路径=TestDiffUtils.class.getResource(文件名);
文件f=新文件(path.getFile());
BufferedReader in=新的BufferedReader(新文件读取器(f));
而((line=in.readLine())!=null){
行。添加(行);
}
}捕获(IOE异常){
e、 printStackTrace();
}
回流线;
}
//将字符串转换为列表的Helper方法:在diff行中查找字符diff时与DiffUtils.diff()方法一起使用
公共静态列表asList(最终字符串){
返回新的AbstractList(){
public int size(){return string.length();}
公共字符get(int-index){return string.charAt(index);}
};
}
私有静态void performDiff(字符串testName){
字符串origFileName=null;
字符串revisedFileName=null;
Boolean continueTest=true;//除非我们不能继续,否则继续
if(testName.compareTo(“大文件测试”)==0)
{
origFileName=“test\u large\u file.xml”;
revisedFileName=“test\u large\u file\u revised.xml”;
}else if(testName.compareTo(“小文件测试”)==0){
origFileName=“originalFile.txt”;
revisedFileName=“revisedFile.txt”;
}else if(testName.compareTo(“大文件测试单行”)==0){
origFileName=“test\u large\u file\u SingleLine.xml”;
revisedFileName=“test\u large\u file\u revised\u SingleLine.xml”;
}否则{
continueTest=false;
}
if(continueTest&&origFileName!=null&&ReviedFileName!=null){
//将原始文件和修订后的文件转换为DiffUtils.diff()使用的列表格式
列表原始线=文件线(原始文件名);
List revisedLines=fileToLines(revisedFileName);
//获取一行一行的差异
Patch Patch=DiffUtils.diff(原始线、修订线);
List delta=patch.getdelta();
对于(三角洲:三角洲){
//原始文件中发生此当前差异的行
int lineOfDiffInOrig=delta.getOriginal().getPosition()+1;
//只有在修改后的行不是空的情况下才继续使用diff(如果不进行此检查,我们将得到indexootOfBoundsExceptions)
如果(delta.getOriginal().getLines().size()>0&&delta.GetRevied().getLines().size()>0){
//以列表格式获取原始行和修订行
List origChars=asList(((字符串)delta.getOriginal().getLines().get(0));
List revisedChars=asList(((字符串)delta.getRevised().getLines().get(0));
//获取每个字符的差异
Patch deltaPatch=DiffUtils.diff(origChars,revisedChars);
List strDeltas=deltaPatch.getDeltas();
用于(增量标准差:标准差){
int charPosOfDiffInOrig=strDelta.getOriginal().getPosition()+1;
int lengthOfDiffInOrig=charPosOfDiffInOrig+strDelta.getOriginal().size();
System.out.println(“Line”+lineOfDiffInOrig+”:[“+charPosOfDiffInOrig+”,“+lengthOfDiffInOrig+”]”;
}
}
}
}
}
公共静态void main(字符串[]args){
//performDiff(“大文件测试”);
//performDiff(“小文件测试”);
performDiff(“大文件测试单行”);
}
}
--
MyersDiff代码:
//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by Fernflower decompiler)
//
package difflib.myers;
import difflib.ChangeDelta;
import difflib.Chunk;
import difflib.DeleteDelta;
import difflib.Delta;
import difflib.DiffAlgorithm;
import difflib.InsertDelta;
import difflib.Patch;
import difflib.myers.DiffNode;
import difflib.myers.DifferentiationFailedException;
import difflib.myers.PathNode;
import difflib.myers.Snake;
import java.lang.reflect.Array;
import java.util.List;
public class MyersDiff implements DiffAlgorithm {
public MyersDiff() {
}
public Patch diff(List<?> original, List<?> revised) {
return this.diff(original.toArray(), revised.toArray());
}
public Patch diff(Object[] orig, Object[] rev) {
try {
PathNode path = buildPath(orig, rev);
return buildRevision(path, orig, rev);
} catch (DifferentiationFailedException var5) {
var5.printStackTrace();
return new Patch();
}
}
public static PathNode buildPath(Object[] orig, Object[] rev) throws DifferentiationFailedException {
if(orig == null) {
throw new IllegalArgumentException("original sequence is null");
} else if(rev == null) {
throw new IllegalArgumentException("revised sequence is null");
} else {
int N = orig.length;
int M = rev.length;
int MAX = N + M + 1;
int size = 1 + 2 * MAX;
int middle = size / 2;
PathNode[] diagonal = new PathNode[size];
diagonal[middle + 1] = new Snake(0, -1, (PathNode)null);
for(int d = 0; d < MAX; ++d) {
for(int k = -d; k <= d; k += 2) {
int kmiddle = middle + k;
int kplus = kmiddle + 1;
int kminus = kmiddle - 1;
PathNode prev = null;
int i;
if(k != -d && (k == d || diagonal[kminus].i >= diagonal[kplus].i)) {
i = diagonal[kminus].i + 1;
prev = diagonal[kminus];
} else {
i = diagonal[kplus].i;
prev = diagonal[kplus];
}
diagonal[kminus] = null;
int j = i - k;
Object node;
for(node = new DiffNode(i, j, prev); i < N && j < M && orig[i].equals(rev[j]); ++j) {
++i;
}
if(i > ((PathNode)node).i) {
node = new Snake(i, j, (PathNode)node);
}
diagonal[kmiddle] = (PathNode)node;
if(i >= N && j >= M) {
return diagonal[kmiddle];
}
}
diagonal[middle + d - 1] = null;
}
throw new DifferentiationFailedException("could not find a diff path");
}
}
public static Patch buildRevision(PathNode path, Object[] orig, Object[] rev) {
if(path == null) {
throw new IllegalArgumentException("path is null");
} else if(orig == null) {
throw new IllegalArgumentException("original sequence is null");
} else if(rev == null) {
throw new IllegalArgumentException("revised sequence is null");
} else {
Patch patch = new Patch();
if(path.isSnake()) {
path = path.prev;
}
while(path != null && path.prev != null && path.prev.j >= 0) {
if(path.isSnake()) {
throw new IllegalStateException("bad diffpath: found snake when looking for diff");
}
int i = path.i;
int j = path.j;
path = path.prev;
int ianchor = path.i;
int janchor = path.j;
Chunk original = new Chunk(ianchor, copyOfRange(orig, ianchor, i));
Chunk revised = new Chunk(janchor, copyOfRange(rev, janchor, j));
Object delta = null;
if(original.size() == 0 && revised.size() != 0) {
delta = new InsertDelta(original, revised);
} else if(original.size() > 0 && revised.size() == 0) {
delta = new DeleteDelta(original, revised);
} else {
delta = new ChangeDelta(original, revised);
}
patch.addDelta((Delta)delta);
if(path.isSnake()) {
path = path.prev;
}
}
return patch;
}
}
public static <T> T[] copyOfRange(T[] original, int from, int to) {
return copyOfRange(original, from, to, original.getClass());
}
public static <T, U> T[] copyOfRange(U[] original, int from, int to, Class<? extends T[]> newType) {
int newLength = to - from;
if(newLength < 0) {
throw new IllegalArgumentException(from + " > " + to);
} else {
Object[] copy = newType == Object[].class?(Object[])(new Object[newLength]):(Object[])((Object[])Array.newInstance(newType.getComponentType(), newLength));
System.arraycopy(original, from, copy, 0, Math.min(original.length - from, newLength));
return copy;
}
}
}
//
//IntelliJ IDEA从.class文件重新创建的源代码
//(由Fernflower反编译器提供动力)
//
包difflib.myers;
导入difflib.changedta;
导入difflib.Chunk;
导入difflib.DeleteDelta;
导入difflib.Delta;
导入difflib.DiffAlgorithm;
导入difflib.InsertDelta;
导入difflib.Patch;
导入difflib.myers.DiffNode;
导入difflib.myers.DifferenticationFailedException;
导入difflib.myers.PathNode;
导入difflib.myers.Snake;
导入java.lang.reflect.Array;
导入java.util.List;
公共类MyersDiff实现了Diff算法{
公共MyersDiff(){
}
公共补丁差异(列表原件,列表修订){
返回这个.diff(original.toArray(),revision.toArray());
}
公共补丁差异(对象[]原始,对象[]修订){
试一试{
PathNode路径=构建路径(源,版本);
返回buildRevision(路径、原点、版本);
}捕获(DifferenticationFailedException变量5){
var5.printStackTrace();
返回新补丁();
}
}
公共静态路径节点构建路径(对象[]原始,对象[]修订)引发DifferenticationFailedException{
if(orig==null){
抛出新的IllegalArgumentException(“原始序列为空”);
}否则如果(rev==null){
恶作剧