Warning: file_get_contents(/data/phpspider/zhask/data//catemap/9/java/356.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
用Java在二进制文件中搜索字节序列_Java_Search_Byte_Binaryfiles - Fatal编程技术网

用Java在二进制文件中搜索字节序列

用Java在二进制文件中搜索字节序列,java,search,byte,binaryfiles,Java,Search,Byte,Binaryfiles,我必须使用Java在一组二进制文件中搜索一个字节序列 示例:我正在二进制文件中搜索字节序列DEADBEEF(十六进制)。 我将如何在Java中实现这一点?是否有一个内置方法,如二进制文件的String.contains()?否,没有内置方法可以做到这一点。但是,直接复制自(对原始代码应用了两个修复): /** *模式匹配的Knuth-Morris-Pratt算法 */ 类KMPMatch{ /** *查找文本中模式的第一个匹配项。 */ 公共静态int indexOf(字节[]数据,字节[]模式

我必须使用Java在一组二进制文件中搜索一个字节序列

示例:我正在二进制文件中搜索字节序列
DEADBEEF
(十六进制)。
我将如何在Java中实现这一点?是否有一个内置方法,如二进制文件的
String.contains()

否,没有内置方法可以做到这一点。但是,直接复制自(对原始代码应用了两个修复):

/**
*模式匹配的Knuth-Morris-Pratt算法
*/
类KMPMatch{
/**
*查找文本中模式的第一个匹配项。
*/
公共静态int indexOf(字节[]数据,字节[]模式){
如果(data.length==0)返回-1;
int[]故障=计算故障(模式);
int j=0;
对于(int i=0;i0&&pattern[j]!=data[i]){
j=故障[j-1];
}
if(pattern[j]==data[i]){j++;}
if(j==模式长度){
返回i-模式长度+1;
}
}
返回-1;
}
/**
*使用引导过程计算故障函数,
*模式与自身相匹配。
*/
私有静态int[]计算失败(字节[]模式){
int[]失败=新的int[pattern.length];
int j=0;
for(int i=1;i0&&pattern[j]!=pattern[i]){
j=故障[j-1];
}
if(模式[j]==模式[i]){
j++;
}
失效[i]=j;
}
返回失败;
}
}
private int bytesIndexOf(字节[]源,字节[]搜索,int fromIndex){
布尔查找=假;
int i;
for(i=fromIndex;i<(source.length-search.length);i++){
如果(源[i]==搜索[0]){
发现=真;
for(int j=0;j
对于那些喜欢库的人,在Twitter的大象鸟开源库(Apache许可证)中有一个Knuth Morris-Pratt算法的实现(见下面的源代码)

您可以在Github上的以下位置找到库:

package com.twitter.elephantbird.util;
导入java.io.IOException;
导入java.io.InputStream;
导入java.util.array;
/**
*基于Knuth-Morris-Pratt算法的高效流搜索类。
*有关算法工作原理的更多信息,请参阅:http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
*/
公共类流搜索器{
受保护字节[]模式;
受保护的int[]边界;
//用于搜索的模式长度上限。对较长的模式引发异常
公共静态最终int MAX_PATTERN_LENGTH=1024;
公共StreamSearcher(字节[]模式){
设置模式(模式);
}
/**
*设置此StreamSearcher要使用的新模式。
*@param模式
*StreamSearcher将在未来的搜索调用中寻找的模式(…)
*/
公共void setPattern(字节[]模式){
如果(pattern.length>最大模式长度){
抛出新的IllegalArgumentException(“最大图案长度为”+MAX_pattern_length);
}
pattern=数组.copyOf(pattern,pattern.length);
边框=新整数[图案长度+1];
预处理();
}
/**
*从当前流位置开始搜索流中模式的下一个匹配项。注意
*流的位置已更改。如果找到匹配项,则流指向匹配项的结尾,即
*模式后的字节。否则,流将被完全消耗。后者是因为InputStream语义使得很难获得
*另一个合理的默认值,即保持流不变。
*
*@如果找到,返回消耗的字节数,-1,否则返回。
*@抛出异常
*/
公共长搜索(InputStream)引发IOException{
长字节读取=0;
int b;
int j=0;
而((b=stream.read())!=-1){
bytesRead++;
而(j>=0&&(字节)b!=pattern_j]){
j=边界_uj];
}
//移动到模式中的下一个字符。
++j;
//如果我们匹配了整个图案长度,我们就找到了。返回,
//这将立即自动保存我们在输入流中的位置
//遵循模式匹配。
if(j==图案长度){
返回字节读取;
}
}
//没有骰子,请注意,流现在已完全消耗。
返回-1;
}
/**
*为要查找的模式的每个前缀建立一个包含最长“边框”的表。此表存储在内部
*并帮助实现Knuth-Moore-Pratt字符串搜索。
*
*有关详细信息,请参阅:http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
*/
受保护的void预处理(){
int i=0;
int j=-1;
边界i]=j;
while(i<图案长度){
而(j>=0&&pattern[i]!=pattern[uj]){
j=边界_uj];
}
边界[++i]=++j;
}
}
}

您可以使用bigdoc从千兆字节顺序文件中查找字节序列

Github上的Lib和示例位于:

package org.example;
导入java.io.File;
导入java.util.List;
导入org.riversisun.bigdoc.bin.bigfilesearch;
公开课范例{
公共静态void main(字符串[]args)引发异常{
byte[]searchBytes=“你好世界。”.getBytes(“UTF-8”);
File File=新文件(“/var/tmp/yourBigfile.bin”);
BigFileSearcher=新的BigFileSearcher();
List findList=searcher.searchBigFile(文件,searchBytes);
/**
 * Knuth-Morris-Pratt Algorithm for Pattern Matching
 */
class KMPMatch {
    /**
     * Finds the first occurrence of the pattern in the text.
     */
    public static int indexOf(byte[] data, byte[] pattern) {
        if (data.length == 0) return -1;

        int[] failure = computeFailure(pattern);    
        int j = 0;

        for (int i = 0; i < data.length; i++) {
            while (j > 0 && pattern[j] != data[i]) {
                j = failure[j - 1];
            }
            if (pattern[j] == data[i]) { j++; }
            if (j == pattern.length) {
                return i - pattern.length + 1;
            }
        }
        return -1;
    }

    /**
     * Computes the failure function using a boot-strapping process,
     * where the pattern is matched against itself.
     */
    private static int[] computeFailure(byte[] pattern) {
        int[] failure = new int[pattern.length];

        int j = 0;
        for (int i = 1; i < pattern.length; i++) {
            while (j > 0 && pattern[j] != pattern[i]) {
                j = failure[j - 1];
            }
            if (pattern[j] == pattern[i]) {
                j++;
            }
            failure[i] = j;
        }

        return failure;
    }
}
private int bytesIndexOf(byte[] source, byte[] search, int fromIndex) {
    boolean find = false;
    int i;
    for (i = fromIndex; i < (source.length - search.length); i++) {
        if (source[i] == search[0]) {
            find = true;
            for (int j = 0; j < search.length; j++) {
                if (source[i + j] != search[j]) {
                    find = false;
                }
            }
        }
        if (find) {
            break;
        }
    }
    if (!find) {
        return -1;
    }
    return i;
}
package com.twitter.elephantbird.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;

/**
 * An efficient stream searching class based on the Knuth-Morris-Pratt algorithm.
 * For more on the algorithm works see: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
 */
public class StreamSearcher {

  protected byte[] pattern_;
  protected int[] borders_;

  // An upper bound on pattern length for searching. Throws exception on longer patterns
  public static final int MAX_PATTERN_LENGTH = 1024;

  public StreamSearcher(byte[] pattern) {
    setPattern(pattern);
  }

  /**
   * Sets a new pattern for this StreamSearcher to use.
   * @param pattern
   *          the pattern the StreamSearcher will look for in future calls to search(...)
   */
  public void setPattern(byte[] pattern) {
    if (pattern.length > MAX_PATTERN_LENGTH) {
      throw new IllegalArgumentException("The maximum pattern length is " + MAX_PATTERN_LENGTH);
    }

    pattern_ = Arrays.copyOf(pattern, pattern.length);
    borders_ = new int[pattern_.length + 1];
    preProcess();
  }

  /**
   * Searches for the next occurrence of the pattern in the stream, starting from the current stream position. Note
   * that the position of the stream is changed. If a match is found, the stream points to the end of the match -- i.e. the
   * byte AFTER the pattern. Else, the stream is entirely consumed. The latter is because InputStream semantics make it difficult to have
   * another reasonable default, i.e. leave the stream unchanged.
   *
   * @return bytes consumed if found, -1 otherwise.
   * @throws IOException
   */
  public long search(InputStream stream) throws IOException {
    long bytesRead = 0;

    int b;
    int j = 0;

    while ((b = stream.read()) != -1) {
      bytesRead++;

      while (j >= 0 && (byte)b != pattern_[j]) {
        j = borders_[j];
      }
      // Move to the next character in the pattern.
      ++j;

      // If we've matched up to the full pattern length, we found it.  Return,
      // which will automatically save our position in the InputStream at the point immediately
      // following the pattern match.
      if (j == pattern_.length) {
        return bytesRead;
      }
    }

    // No dice, Note that the stream is now completely consumed.
    return -1;
  }

  /**
   * Builds up a table of longest "borders" for each prefix of the pattern to find. This table is stored internally
   * and aids in implementation of the Knuth-Moore-Pratt string search.
   * <p>
   * For more information, see: http://www.inf.fh-flensburg.de/lang/algorithmen/pattern/kmpen.htm.
   */
  protected void preProcess() {
    int i = 0;
    int j = -1;
    borders_[i] = j;
    while (i < pattern_.length) {
      while (j >= 0 && pattern_[i] != pattern_[j]) {
        j = borders_[j];
      }
      borders_[++i] = ++j;
    }
  }
}
package org.example;

import java.io.File;
import java.util.List;

import org.riversun.bigdoc.bin.BigFileSearcher;

public class Example {

    public static void main(String[] args) throws Exception {

        byte[] searchBytes = "hello world.".getBytes("UTF-8");

        File file = new File("/var/tmp/yourBigfile.bin");

        BigFileSearcher searcher = new BigFileSearcher();

        List<Long> findList = searcher.searchBigFile(file, searchBytes);

        System.out.println("positions = " + findList);
    }
}
 import java.util.List;

 import org.riversun.finbin.BigBinarySearcher;

 public class Example {

     public static void main(String[] args) throws Exception {

         BigBinarySearcher bbs = new BigBinarySearcher();

         byte[] iamBigSrcBytes = "Hello world.It's a small world.".getBytes("utf-8");

         byte[] searchBytes = "world".getBytes("utf-8");

         List<Integer> indexList = bbs.searchBytes(iamBigSrcBytes, searchBytes);

         System.out.println("indexList=" + indexList);
     }
 }