如何使用java diff utils对java中可能较大的单行文件进行区分?

如何使用java diff utils对java中可能较大的单行文件进行区分?,java,diff,large-files,Java,Diff,Large Files,我正在使用java diff-utils: 它工作得很好,除非一个大文件都在一行中 我使用一个BufferedReader来读取每一行,因此我得到了一个Java堆内存不足异常 如何处理加载大型单行文件以用于java diff-utils 这是我的密码: import difflib.*; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOExceptio

我正在使用java diff-utils

它工作得很好,除非一个大文件都在一行中

我使用一个BufferedReader来读取每一行,因此我得到了一个Java堆内存不足异常

如何处理加载大型单行文件以用于java diff-utils

这是我的密码:

import difflib.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.AbstractList;
import java.util.LinkedList;
import java.util.List;

public class TestDiffUtils {

    public TestDiffUtils() {

    }

    // Helper method to read the files to compare into memory, convert them to a list of Strings which can be used by the DiffUtils library for comparison
    private static List fileToLines(String filename) {
        List lines = new LinkedList();
        String line;
        try {
            URL path = TestDiffUtils.class.getResource(filename);
            File f = new File(path.getFile());
            BufferedReader in = new BufferedReader(new FileReader(f));
            while ((line = in.readLine()) != null) {
                lines.add(line);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        return lines;
    }

    // Helper method to convert a String to List<Character> : to be used with DiffUtils.diff() method when finding character diffs within the line diffs
    public static List<Character> asList(final String string) {
        return new AbstractList<Character>() {
            public int size() { return string.length(); }
            public Character get(int index) { return string.charAt(index); }
        };
    }

    private static void performDiff(String testName) {
        String origFileName = null;
        String revisedFileName = null;

        Boolean continueTest = true;    // continue unless we can't

        if( testName.compareTo("large-file-test") == 0 )
        {
            origFileName = "test_large_file.xml";
            revisedFileName = "test_large_file_revised.xml";
        }else if( testName.compareTo("small-file-test") == 0 ){
            origFileName = "originalFile.txt";
            revisedFileName = "revisedFile.txt";
        }else if( testName.compareTo("large-file-test-single-line") == 0 ){
            origFileName = "test_large_file_SingleLine.xml";
            revisedFileName = "test_large_file_revised_SingleLine.xml";
        }else{
            continueTest = false;
        }

        if( continueTest && origFileName != null && revisedFileName != null ) {
            // Convert the orig and revised files to List<String> format that DiffUtils.diff() uses
            List<String> originalLines = fileToLines(origFileName);
            List<String> revisedLines = fileToLines(revisedFileName);

            // Get the line-by-line diffs
            Patch patch = DiffUtils.diff(originalLines, revisedLines);
            List<Delta> deltas = patch.getDeltas();
            for (Delta delta : deltas) {
                // The line in the orig file that this current diff occurs at
                int lineOfDiffInOrig = delta.getOriginal().getPosition() + 1;

                // Only continue with the diff if the revised lines is not empty (we will get IndexOu0tOfBoundsExceptions if we don't do this check)
                if (delta.getOriginal().getLines().size() > 0 && delta.getRevised().getLines().size() > 0) {
                    // Get orig and revised lines in List<Character> format
                    List<Character> origChars = asList(((String) delta.getOriginal().getLines().get(0)));
                    List<Character> revisedChars = asList(((String) delta.getRevised().getLines().get(0)));

                    // Get the character-by-character diffs
                    Patch deltaPatch = DiffUtils.diff(origChars, revisedChars);
                    List<Delta> strDeltas = deltaPatch.getDeltas();
                    for (Delta strDelta : strDeltas) {
                        int charPosOfDiffInOrig = strDelta.getOriginal().getPosition() + 1;
                        int lengthOfDiffInOrig = charPosOfDiffInOrig + strDelta.getOriginal().size();

                        System.out.println("Line" + lineOfDiffInOrig + " : [" + charPosOfDiffInOrig + "," + lengthOfDiffInOrig + "]");
                    }
                }
            }
        }
    }

    public static void main(String[] args) {
        //performDiff("large-file-test");
        //performDiff("small-file-test");
        performDiff("large-file-test-single-line");
    }
}
导入difflib.*;
导入java.io.BufferedReader;
导入java.io.File;
导入java.io.FileReader;
导入java.io.IOException;
导入java.net.URL;
导入java.util.AbstractList;
导入java.util.LinkedList;
导入java.util.List;
公共类TestDiffUtils{
公共TestDiffUtils(){
}
//Helper方法将要比较的文件读入内存,将它们转换为字符串列表,DiffUtils库可以使用这些字符串进行比较
私有静态列表文件线(字符串文件名){
列表行=新建LinkedList();
弦线;
试一试{
URL路径=TestDiffUtils.class.getResource(文件名);
文件f=新文件(path.getFile());
BufferedReader in=新的BufferedReader(新文件读取器(f));
而((line=in.readLine())!=null){
行。添加(行);
}
}捕获(IOE异常){
e、 printStackTrace();
}
回流线;
}
//将字符串转换为列表的Helper方法:在diff行中查找字符diff时与DiffUtils.diff()方法一起使用
公共静态列表asList(最终字符串){
返回新的AbstractList(){
public int size(){return string.length();}
公共字符get(int-index){return string.charAt(index);}
};
}
私有静态void performDiff(字符串testName){
字符串origFileName=null;
字符串revisedFileName=null;
Boolean continueTest=true;//除非我们不能继续,否则继续
if(testName.compareTo(“大文件测试”)==0)
{
origFileName=“test\u large\u file.xml”;
revisedFileName=“test\u large\u file\u revised.xml”;
}else if(testName.compareTo(“小文件测试”)==0){
origFileName=“originalFile.txt”;
revisedFileName=“revisedFile.txt”;
}else if(testName.compareTo(“大文件测试单行”)==0){
origFileName=“test\u large\u file\u SingleLine.xml”;
revisedFileName=“test\u large\u file\u revised\u SingleLine.xml”;
}否则{
continueTest=false;
}
if(continueTest&&origFileName!=null&&ReviedFileName!=null){
//将原始文件和修订后的文件转换为DiffUtils.diff()使用的列表格式
列表原始线=文件线(原始文件名);
List revisedLines=fileToLines(revisedFileName);
//获取一行一行的差异
Patch Patch=DiffUtils.diff(原始线、修订线);
List delta=patch.getdelta();
对于(三角洲:三角洲){
//原始文件中发生此当前差异的行
int lineOfDiffInOrig=delta.getOriginal().getPosition()+1;
//只有在修改后的行不是空的情况下才继续使用diff(如果不进行此检查,我们将得到indexootOfBoundsExceptions)
如果(delta.getOriginal().getLines().size()>0&&delta.GetRevied().getLines().size()>0){
//以列表格式获取原始行和修订行
List origChars=asList(((字符串)delta.getOriginal().getLines().get(0));
List revisedChars=asList(((字符串)delta.getRevised().getLines().get(0));
//获取每个字符的差异
Patch deltaPatch=DiffUtils.diff(origChars,revisedChars);
List strDeltas=deltaPatch.getDeltas();
用于(增量标准差:标准差){
int charPosOfDiffInOrig=strDelta.getOriginal().getPosition()+1;
int lengthOfDiffInOrig=charPosOfDiffInOrig+strDelta.getOriginal().size();
System.out.println(“Line”+lineOfDiffInOrig+”:[“+charPosOfDiffInOrig+”,“+lengthOfDiffInOrig+”]”;
}
}
}
}
}
公共静态void main(字符串[]args){
//performDiff(“大文件测试”);
//performDiff(“小文件测试”);
performDiff(“大文件测试单行”);
}
}
-- MyersDiff代码:

//
// Source code recreated from a .class file by IntelliJ IDEA
// (powered by Fernflower decompiler)
//

package difflib.myers;

import difflib.ChangeDelta;
import difflib.Chunk;
import difflib.DeleteDelta;
import difflib.Delta;
import difflib.DiffAlgorithm;
import difflib.InsertDelta;
import difflib.Patch;
import difflib.myers.DiffNode;
import difflib.myers.DifferentiationFailedException;
import difflib.myers.PathNode;
import difflib.myers.Snake;
import java.lang.reflect.Array;
import java.util.List;

public class MyersDiff implements DiffAlgorithm {
    public MyersDiff() {
    }

    public Patch diff(List<?> original, List<?> revised) {
        return this.diff(original.toArray(), revised.toArray());
    }

    public Patch diff(Object[] orig, Object[] rev) {
        try {
            PathNode path = buildPath(orig, rev);
            return buildRevision(path, orig, rev);
        } catch (DifferentiationFailedException var5) {
            var5.printStackTrace();
            return new Patch();
        }
    }

    public static PathNode buildPath(Object[] orig, Object[] rev) throws DifferentiationFailedException {
        if(orig == null) {
            throw new IllegalArgumentException("original sequence is null");
        } else if(rev == null) {
            throw new IllegalArgumentException("revised sequence is null");
        } else {
            int N = orig.length;
            int M = rev.length;
            int MAX = N + M + 1;
            int size = 1 + 2 * MAX;
            int middle = size / 2;
            PathNode[] diagonal = new PathNode[size];
            diagonal[middle + 1] = new Snake(0, -1, (PathNode)null);

            for(int d = 0; d < MAX; ++d) {
                for(int k = -d; k <= d; k += 2) {
                    int kmiddle = middle + k;
                    int kplus = kmiddle + 1;
                    int kminus = kmiddle - 1;
                    PathNode prev = null;
                    int i;
                    if(k != -d && (k == d || diagonal[kminus].i >= diagonal[kplus].i)) {
                        i = diagonal[kminus].i + 1;
                        prev = diagonal[kminus];
                    } else {
                        i = diagonal[kplus].i;
                        prev = diagonal[kplus];
                    }

                    diagonal[kminus] = null;
                    int j = i - k;

                    Object node;
                    for(node = new DiffNode(i, j, prev); i < N && j < M && orig[i].equals(rev[j]); ++j) {
                        ++i;
                    }

                    if(i > ((PathNode)node).i) {
                        node = new Snake(i, j, (PathNode)node);
                    }

                    diagonal[kmiddle] = (PathNode)node;
                    if(i >= N && j >= M) {
                        return diagonal[kmiddle];
                    }
                }

                diagonal[middle + d - 1] = null;
            }

            throw new DifferentiationFailedException("could not find a diff path");
        }
    }

    public static Patch buildRevision(PathNode path, Object[] orig, Object[] rev) {
        if(path == null) {
            throw new IllegalArgumentException("path is null");
        } else if(orig == null) {
            throw new IllegalArgumentException("original sequence is null");
        } else if(rev == null) {
            throw new IllegalArgumentException("revised sequence is null");
        } else {
            Patch patch = new Patch();
            if(path.isSnake()) {
                path = path.prev;
            }

            while(path != null && path.prev != null && path.prev.j >= 0) {
                if(path.isSnake()) {
                    throw new IllegalStateException("bad diffpath: found snake when looking for diff");
                }

                int i = path.i;
                int j = path.j;
                path = path.prev;
                int ianchor = path.i;
                int janchor = path.j;
                Chunk original = new Chunk(ianchor, copyOfRange(orig, ianchor, i));
                Chunk revised = new Chunk(janchor, copyOfRange(rev, janchor, j));
                Object delta = null;
                if(original.size() == 0 && revised.size() != 0) {
                    delta = new InsertDelta(original, revised);
                } else if(original.size() > 0 && revised.size() == 0) {
                    delta = new DeleteDelta(original, revised);
                } else {
                    delta = new ChangeDelta(original, revised);
                }

                patch.addDelta((Delta)delta);
                if(path.isSnake()) {
                    path = path.prev;
                }
            }

            return patch;
        }
    }

    public static <T> T[] copyOfRange(T[] original, int from, int to) {
        return copyOfRange(original, from, to, original.getClass());
    }

    public static <T, U> T[] copyOfRange(U[] original, int from, int to, Class<? extends T[]> newType) {
        int newLength = to - from;
        if(newLength < 0) {
            throw new IllegalArgumentException(from + " > " + to);
        } else {
            Object[] copy = newType == Object[].class?(Object[])(new Object[newLength]):(Object[])((Object[])Array.newInstance(newType.getComponentType(), newLength));
            System.arraycopy(original, from, copy, 0, Math.min(original.length - from, newLength));
            return copy;
        }
    }
}
//
//IntelliJ IDEA从.class文件重新创建的源代码
//(由Fernflower反编译器提供动力)
//
包difflib.myers;
导入difflib.changedta;
导入difflib.Chunk;
导入difflib.DeleteDelta;
导入difflib.Delta;
导入difflib.DiffAlgorithm;
导入difflib.InsertDelta;
导入difflib.Patch;
导入difflib.myers.DiffNode;
导入difflib.myers.DifferenticationFailedException;
导入difflib.myers.PathNode;
导入difflib.myers.Snake;
导入java.lang.reflect.Array;
导入java.util.List;
公共类MyersDiff实现了Diff算法{
公共MyersDiff(){
}
公共补丁差异(列表原件,列表修订){
返回这个.diff(original.toArray(),revision.toArray());
}
公共补丁差异(对象[]原始,对象[]修订){
试一试{
PathNode路径=构建路径(源,版本);
返回buildRevision(路径、原点、版本);
}捕获(DifferenticationFailedException变量5){
var5.printStackTrace();
返回新补丁();
}
}
公共静态路径节点构建路径(对象[]原始,对象[]修订)引发DifferenticationFailedException{
if(orig==null){
抛出新的IllegalArgumentException(“原始序列为空”);
}否则如果(rev==null){
恶作剧