用Java在二进制文件中搜索字节序列_Java_Search_Byte_Binaryfiles

用Java在二进制文件中搜索字节序列

java search

用Java在二进制文件中搜索字节序列,java,search,byte,binaryfiles,Java,Search,Byte,Binaryfiles,我必须使用Java在一组二进制文件中搜索一个字节序列示例：我正在二进制文件中搜索字节序列DEADBEEF（十六进制）。我将如何在Java中实现这一点？是否有一个内置方法，如二进制文件的String.contains（）？否，没有内置方法可以做到这一点。但是，直接复制自（对原始代码应用了两个修复）： /** *模式匹配的Knuth-Morris-Pratt算法 */ 类KMPMatch{ /** *查找文本中模式的第一个匹配项。 */ 公共静态int indexOf（字节[]数据，字节[]模式

我必须使用Java在一组二进制文件中搜索一个字节序列

示例：我正在二进制文件中搜索字节序列

DEADBEEF

（十六进制）。

我将如何在Java中实现这一点？是否有一个内置方法，如二进制文件的

String.contains（）

？

否，没有内置方法可以做到这一点。但是，直接复制自（对原始代码应用了两个修复）：

/**
*模式匹配的Knuth-Morris-Pratt算法
*/
类KMPMatch{
/**
*查找文本中模式的第一个匹配项。
*/
公共静态int indexOf（字节[]数据，字节[]模式）{
如果（data.length==0）返回-1；
int[]故障=计算故障（模式）；
int j=0；
对于（int i=0；i0&&pattern[j]！=data[i]）{
j=故障[j-1]；
}
if（pattern[j]==data[i]）{j++；}
if（j==模式长度）{
返回i-模式长度+1；
}
}
返回-1；
}
/**
*使用引导过程计算故障函数，
*模式与自身相匹配。
*/
私有静态int[]计算失败（字节[]模式）{
int[]失败=新的int[pattern.length]；
int j=0；
for（int i=1；i0&&pattern[j]！=pattern[i]）{
j=故障[j-1]；
}
if（模式[j]==模式[i]）{
j++；
}
失效[i]=j；
}
返回失败；
}
}

private int bytesIndexOf（字节[]源，字节[]搜索，int fromIndex）{
布尔查找=假；
int i；
for（i=fromIndex；i<（source.length-search.length）；i++）{
如果（源[i]==搜索[0]）{
发现=真；
for（int j=0；j

对于那些喜欢库的人，在Twitter的大象鸟开源库（Apache许可证）中有一个Knuth Morris-Pratt算法的实现（见下面的源代码）

您可以在Github上的以下位置找到库：

package com.twitter.elephantbird.util；
导入java.io.IOException；
导入java.io.InputStream；
导入java.util.array；
/**
*基于Knuth-Morris-Pratt算法的高效流搜索类。
*有关算法工作原理的更多信息，请参阅：http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
*/
公共类流搜索器{
受保护字节[]模式；
受保护的int[]边界；
//用于搜索的模式长度上限。对较长的模式引发异常
公共静态最终int MAX_PATTERN_LENGTH=1024；
公共StreamSearcher（字节[]模式）{
设置模式（模式）；
}
/**
*设置此StreamSearcher要使用的新模式。
*@param模式
*StreamSearcher将在未来的搜索调用中寻找的模式（…）
*/
公共void setPattern（字节[]模式）{
如果（pattern.length>最大模式长度）{
抛出新的IllegalArgumentException（“最大图案长度为”+MAX_pattern_length）；
}
pattern=数组.copyOf（pattern，pattern.length）；
边框=新整数[图案长度+1]；
预处理（）；
}
/**
*从当前流位置开始搜索流中模式的下一个匹配项。注意
*流的位置已更改。如果找到匹配项，则流指向匹配项的结尾，即
*模式后的字节。否则，流将被完全消耗。后者是因为InputStream语义使得很难获得
*另一个合理的默认值，即保持流不变。
*
*@如果找到，返回消耗的字节数，-1，否则返回。
*@抛出异常
*/
公共长搜索（InputStream）引发IOException{
长字节读取=0；
int b；
int j=0；
而（（b=stream.read（））！=-1）{
bytesRead++；
而（j>=0&&（字节）b！=pattern_j]）{
j=边界_uj]；
}
//移动到模式中的下一个字符。
++j；
//如果我们匹配了整个图案长度，我们就找到了。返回，
//这将立即自动保存我们在输入流中的位置
//遵循模式匹配。
if（j==图案长度）{
返回字节读取；
}
}
//没有骰子，请注意，流现在已完全消耗。
返回-1；
}
/**
*为要查找的模式的每个前缀建立一个包含最长“边框”的表。此表存储在内部
*并帮助实现Knuth-Moore-Pratt字符串搜索。
*
*有关详细信息，请参阅：http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
*/
受保护的void预处理（）{
int i=0；
int j=-1；
边界i]=j；
while（i<图案长度）{
而（j>=0&&pattern[i]！=pattern[uj]）{
j=边界_uj]；
}
边界[++i]=++j；
}
}
}

您可以使用bigdoc从千兆字节顺序文件中查找字节序列

Github上的Lib和示例位于：

package org.example；
导入java.io.File；
导入java.util.List；
导入org.riversisun.bigdoc.bin.bigfilesearch；
公开课范例{
公共静态void main（字符串[]args）引发异常{
byte[]searchBytes=“你好世界。”.getBytes（“UTF-8”）；
File File=新文件（“/var/tmp/yourBigfile.bin”）；
BigFileSearcher=新的BigFileSearcher（）；
List findList=searcher.searchBigFile（文件，searchBytes）；
/**
 * Knuth-Morris-Pratt Algorithm for Pattern Matching
 */
class KMPMatch {
    /**
     * Finds the first occurrence of the pattern in the text.
     */
    public static int indexOf(byte[] data, byte[] pattern) {
        if (data.length == 0) return -1;

        int[] failure = computeFailure(pattern);    
        int j = 0;

        for (int i = 0; i < data.length; i++) {
            while (j > 0 && pattern[j] != data[i]) {
                j = failure[j - 1];
            }
            if (pattern[j] == data[i]) { j++; }
            if (j == pattern.length) {
                return i - pattern.length + 1;
            }
        }
        return -1;
    }

    /**
     * Computes the failure function using a boot-strapping process,
     * where the pattern is matched against itself.
     */
    private static int[] computeFailure(byte[] pattern) {
        int[] failure = new int[pattern.length];

        int j = 0;
        for (int i = 1; i < pattern.length; i++) {
            while (j > 0 && pattern[j] != pattern[i]) {
                j = failure[j - 1];
            }
            if (pattern[j] == pattern[i]) {
                j++;
            }
            failure[i] = j;
        }

        return failure;
    }
}

private int bytesIndexOf(byte[] source, byte[] search, int fromIndex) {
    boolean find = false;
    int i;
    for (i = fromIndex; i < (source.length - search.length); i++) {
        if (source[i] == search[0]) {
            find = true;
            for (int j = 0; j < search.length; j++) {
                if (source[i + j] != search[j]) {
                    find = false;
                }
            }
        }
        if (find) {
            break;
        }
    }
    if (!find) {
        return -1;
    }
    return i;
}

package com.twitter.elephantbird.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

/**
 * An efficient stream searching class based on the Knuth-Morris-Pratt algorithm.
 * For more on the algorithm works see: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
 */
public class StreamSearcher {

  protected byte[] pattern_;
  protected int[] borders_;

  // An upper bound on pattern length for searching. Throws exception on longer patterns
  public static final int MAX_PATTERN_LENGTH = 1024;

  public StreamSearcher(byte[] pattern) {
    setPattern(pattern);
  }

  /**
   * Sets a new pattern for this StreamSearcher to use.
   * @param pattern
   *          the pattern the StreamSearcher will look for in future calls to search(...)
   */
  public void setPattern(byte[] pattern) {
    if (pattern.length > MAX_PATTERN_LENGTH) {
      throw new IllegalArgumentException("The maximum pattern length is " + MAX_PATTERN_LENGTH);
    }

    pattern_ = Arrays.copyOf(pattern, pattern.length);
    borders_ = new int[pattern_.length + 1];
    preProcess();
  }

  /**
   * Searches for the next occurrence of the pattern in the stream, starting from the current stream position. Note
   * that the position of the stream is changed. If a match is found, the stream points to the end of the match -- i.e. the
   * byte AFTER the pattern. Else, the stream is entirely consumed. The latter is because InputStream semantics make it difficult to have
   * another reasonable default, i.e. leave the stream unchanged.
   *
   * @return bytes consumed if found, -1 otherwise.
   * @throws IOException
   */
  public long search(InputStream stream) throws IOException {
    long bytesRead = 0;

    int b;
    int j = 0;

    while ((b = stream.read()) != -1) {
      bytesRead++;

      while (j >= 0 && (byte)b != pattern_[j]) {
        j = borders_[j];
      }
      // Move to the next character in the pattern.
      ++j;

      // If we've matched up to the full pattern length, we found it.  Return,
      // which will automatically save our position in the InputStream at the point immediately
      // following the pattern match.
      if (j == pattern_.length) {
        return bytesRead;
      }
    }

    // No dice, Note that the stream is now completely consumed.
    return -1;
  }

  /**
   * Builds up a table of longest "borders" for each prefix of the pattern to find. This table is stored internally
   * and aids in implementation of the Knuth-Moore-Pratt string search.
   * <p>
   * For more information, see: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
   */
  protected void preProcess() {
    int i = 0;
    int j = -1;
    borders_[i] = j;
    while (i < pattern_.length) {
      while (j >= 0 && pattern_[i] != pattern_[j]) {
        j = borders_[j];
      }
      borders_[++i] = ++j;
    }
  }
}

package org.example;

import java.io.File;
import java.util.List;

import org.riversun.bigdoc.bin.BigFileSearcher;

public class Example {

    public static void main(String[] args) throws Exception {

        byte[] searchBytes = "hello world.".getBytes("UTF-8");

        File file = new File("/var/tmp/yourBigfile.bin");

        BigFileSearcher searcher = new BigFileSearcher();

        List<Long> findList = searcher.searchBigFile(file, searchBytes);

        System.out.println("positions = " + findList);
    }
}

 import java.util.List;

 import org.riversun.finbin.BigBinarySearcher;

 public class Example {

     public static void main(String[] args) throws Exception {

         BigBinarySearcher bbs = new BigBinarySearcher();

         byte[] iamBigSrcBytes = "Hello world.It's a small world.".getBytes("utf-8");

         byte[] searchBytes = "world".getBytes("utf-8");

         List<Integer> indexList = bbs.searchBytes(iamBigSrcBytes, searchBytes);

         System.out.println("indexList=" + indexList);
     }
 }