Java 在InputStream中筛选（搜索和替换）字节数组_Java_Input_Bytearray

Java 在InputStream中筛选（搜索和替换）字节数组

java input

Java 在InputStream中筛选（搜索和替换）字节数组,java,input,bytearray,Java,Input,Bytearray,我有一个InputStream，它将html文件作为输入参数。我必须从输入流中获取字节我有一个字符串：“XYZ”。我想将这个字符串转换成字节格式，并检查我从InputStream获得的字节序列中的字符串是否匹配。如果有，我必须用其他字符串的bye序列替换匹配有人能帮我吗？我使用正则表达式查找和替换。然而，我不知道如何查找和替换字节流以前，我使用jsoup解析html并替换字符串，但是由于一些utf编码问题，当我这样做时，文件似乎已损坏 TL；博士：我的问题是：是一种在Java原始输入流中

我有一个InputStream，它将html文件作为输入参数。我必须从输入流中获取字节

我有一个字符串：

“XYZ”

。我想将这个字符串转换成字节格式，并检查我从InputStream获得的字节序列中的字符串是否匹配。如果有，我必须用其他字符串的bye序列替换匹配

有人能帮我吗？我使用正则表达式查找和替换。然而，我不知道如何查找和替换字节流

以前，我使用jsoup解析html并替换字符串，但是由于一些utf编码问题，当我这样做时，文件似乎已损坏

TL；博士：我的问题是：

是一种在Java原始输入流中查找和替换字节格式字符串的方法吗？

不确定您是否选择了解决问题的最佳方法

也就是说，我不喜欢用“不”来回答问题（而且我的政策是不这样做），所以就这样

看一看

从文件中：

FilterInputStream包含一些其他输入流，它将其用作基本数据源，可能沿途转换数据或提供附加功能

写下来是一个有趣的练习。下面是一个完整的示例：

import java.io.*; import java.util.*; class ReplacingInputStream extends FilterInputStream { LinkedList<Integer> inQueue = new LinkedList<Integer>(); LinkedList<Integer> outQueue = new LinkedList<Integer>(); final byte[] search, replacement; protected ReplacingInputStream(InputStream in, byte[] search, byte[] replacement) { super(in); this.search = search; this.replacement = replacement; } private boolean isMatchFound() { Iterator<Integer> inIter = inQueue.iterator(); for (int i = 0; i < search.length; i++) if (!inIter.hasNext() || search[i] != inIter.next()) return false; return true; } private void readAhead() throws IOException { // Work up some look-ahead. while (inQueue.size() < search.length) { int next = super.read(); inQueue.offer(next); if (next == -1) break; } } @Override public int read() throws IOException { // Next byte already determined. if (outQueue.isEmpty()) { readAhead(); if (isMatchFound()) { for (int i = 0; i < search.length; i++) inQueue.remove(); for (byte b : replacement) outQueue.offer((int) b); } else outQueue.add(inQueue.remove()); } return outQueue.remove(); } // TODO: Override the other read methods. }
给定字符串
“Hello xyz world”
的字节，它将打印：

Hello abc world

在字节流（
InputStream
）上没有任何用于搜索和替换的内置功能
而且，高效、正确地完成这项任务的方法目前还不明显。我已经为streams实现了Boyer-Moore算法，它工作得很好，但需要一些时间。如果没有这样的算法，你必须求助于蛮力方法，在那里你可能会很慢
即使您将HTML解码为文本，因为HTML不是“常规”语言

因此，尽管您遇到了一些困难，但我建议您继续使用原始方法将HTML解析为文档。当您在字符编码方面遇到问题时，从长远来看，修复正确的解决方案可能比修复错误的解决方案更容易。
以下方法可以奏效，但我不知道对性能的影响有多大

用
InputStreamReader
包装
InputStream

用替换字符串的
FilterReader
包装
InputStreamReader
，然后

用
ReaderInputStream
包装
FilterReader
选择适当的编码是至关重要的，否则流的内容将被破坏

如果您想使用正则表达式替换字符串，那么您可以使用我的一个工具，它是
FilterReader
的一个方便的替代品。您可以在Streamflyer的网页上找到字节流的示例。希望这能有所帮助。
我也需要类似的东西，并决定推出自己的解决方案，而不是使用@aioobe上面的示例。看一看这张照片。您可以从maven central中提取库，或者只是复制源代码
这就是你使用它的方式。在本例中，我使用了一个嵌套实例来替换两种模式，即两种fix dos和mac line Ending

新ReplacingInputStream（新ReplacingInputStream（是“\n\r”、“\n”）、“\r”、“\n”）；
以下是完整的源代码：

/** * Simple FilterInputStream that can replace occurrances of bytes with something else. */ public class ReplacingInputStream extends FilterInputStream { // while matching, this is where the bytes go. int[] buf=null; int matchedIndex=0; int unbufferIndex=0; int replacedIndex=0; private final byte[] pattern; private final byte[] replacement; private State state=State.NOT_MATCHED; // simple state machine for keeping track of what we are doing private enum State { NOT_MATCHED, MATCHING, REPLACING, UNBUFFER } /** * @param is input * @return nested replacing stream that replaces \n\r (DOS) and \r (MAC) line endings with UNIX ones "\n". */ public static InputStream newLineNormalizingInputStream(InputStream is) { return new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n"); } /** * Replace occurances of pattern in the input. Note: input is assumed to be UTF-8 encoded. If not the case use byte[] based pattern and replacement. * @param in input * @param pattern pattern to replace. * @param replacement the replacement or null */ public ReplacingInputStream(InputStream in, String pattern, String replacement) { this(in,pattern.getBytes(StandardCharsets.UTF_8), replacement==null ? null : replacement.getBytes(StandardCharsets.UTF_8)); } /** * Replace occurances of pattern in the input. * @param in input * @param pattern pattern to replace * @param replacement the replacement or null */ public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) { super(in); Validate.notNull(pattern); Validate.isTrue(pattern.length>0, "pattern length should be > 0", pattern.length); this.pattern = pattern; this.replacement = replacement; // we will never match more than the pattern length buf = new int[pattern.length]; } @Override public int read(byte[] b, int off, int len) throws IOException { // copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read if (b == null) { throw new NullPointerException(); } else if (off < 0 || len < 0 || len > b.length - off) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } int c = read(); if (c == -1) { return -1; } b[off] = (byte)c; int i = 1; try { for (; i < len ; i++) { c = read(); if (c == -1) { break; } b[off + i] = (byte)c; } } catch (IOException ee) { } return i; } @Override public int read(byte[] b) throws IOException { // call our own read return read(b, 0, b.length); } @Override public int read() throws IOException { // use a simple state machine to figure out what we are doing int next; switch (state) { case NOT_MATCHED: // we are not currently matching, replacing, or unbuffering next=super.read(); if(pattern[0] == next) { // clear whatever was there buf=new int[pattern.length]; // clear whatever was there // make sure we start at 0 matchedIndex=0; buf[matchedIndex++]=next; if(pattern.length == 1) { // edgecase when the pattern length is 1 we go straight to replacing state=State.REPLACING; // reset replace counter replacedIndex=0; } else { // pattern of length 1 state=State.MATCHING; } // recurse to continue matching return read(); } else { return next; } case MATCHING: // the previous bytes matched part of the pattern next=super.read(); if(pattern[matchedIndex]==next) { buf[matchedIndex++]=next; if(matchedIndex==pattern.length) { // we've found a full match! if(replacement==null || replacement.length==0) { // the replacement is empty, go straight to NOT_MATCHED state=State.NOT_MATCHED; matchedIndex=0; } else { // start replacing state=State.REPLACING; replacedIndex=0; } } } else { // mismatch -> unbuffer buf[matchedIndex++]=next; state=State.UNBUFFER; unbufferIndex=0; } return read(); case REPLACING: // we've fully matched the pattern and are returning bytes from the replacement next=replacement[replacedIndex++]; if(replacedIndex==replacement.length) { state=State.NOT_MATCHED; replacedIndex=0; } return next; case UNBUFFER: // we partially matched the pattern before encountering a non matching byte // we need to serve up the buffered bytes before we go back to NOT_MATCHED next=buf[unbufferIndex++]; if(unbufferIndex==matchedIndex) { state=State.NOT_MATCHED; matchedIndex=0; } return next; default: throw new IllegalStateException("no such state " + state); } } @Override public String toString() { return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex; } }

/** *简单的FilterInputStream，可以用其他内容替换字节的出现。 */ 公共类ReplacingInputStream扩展FilterInputStream{ //匹配时，这是字节的位置。 int[]buf=null； int matchedIndex=0； int unbufferIndex=0； int replacedIndex=0；私有最终字节[]模式；私有最终字节[]替换；私有状态=状态。不匹配； //用于跟踪我们正在做的事情的简单状态机私有枚举状态{ 不匹配，匹配，取代，解除缓冲 } /** *@param已输入 *@return嵌套替换流，该流将\n\r（DOS）和\r（MAC）行结尾替换为UNIX行结尾“\n”。 */ 公共静态InputStream newLineNormalizingInputStream（InputStream为）{ 返回新的ReplacingInputStream（新的ReplacingInputStream（是“\n\r”、“\n”）、“\r”、“\n”）； } /** *替换输入中出现的模式。注意：输入假定为UTF-8编码。如果不是这种情况，则使用基于字节[]的模式和替换。 *输入中的@param *@param要替换的模式。 *@param replacement将替换为空 */ 公共ReplacingInputStream（输入流输入、字符串模式、字符串替换）{ 这（在模式中，pattern.getBytes（StandardCharsets.UTF_8），replacement==null？null:replacement.getBytes（StandardCharsets.UTF_8））； } /** *替换输入中出现的模式。 *输入中的@param *@param要替换的模式 *@param replacement将替换为空 */ 公共ReplacingInputStream（输入流输入，字节[]模式，字节[]替换）{ 超级(in),； Validate.notNull（模式）；验证.isTrue（pattern.length>0，“pattern长度应该大于0”，pattern.length）；这个模式=模式；这个。替换=替换； //我们永远不会匹配超过图案长度的图案 buf=新整数[模式长度]； } @凌驾公共整数读取（字节[]b，整数关闭，整数长度）引发IOException{ //父逻辑的副本；我们需要调用我们自己的read（）而不是super.read（），super.read（）将委托而不是调用我们的read 如果（b==null）{ 抛出新的NullPointerException（）； }else if（off<0 | | len<0 | | len>b.长度-off）{ 抛出新的索引 /** * Simple FilterInputStream that can replace occurrances of bytes with something else. */ public class ReplacingInputStream extends FilterInputStream { // while matching, this is where the bytes go. int[] buf=null; int matchedIndex=0; int unbufferIndex=0; int replacedIndex=0; private final byte[] pattern; private final byte[] replacement; private State state=State.NOT_MATCHED; // simple state machine for keeping track of what we are doing private enum State { NOT_MATCHED, MATCHING, REPLACING, UNBUFFER } /** * @param is input * @return nested replacing stream that replaces \n\r (DOS) and \r (MAC) line endings with UNIX ones "\n". */ public static InputStream newLineNormalizingInputStream(InputStream is) { return new ReplacingInputStream(new ReplacingInputStream(is, "\n\r", "\n"), "\r", "\n"); } /** * Replace occurances of pattern in the input. Note: input is assumed to be UTF-8 encoded. If not the case use byte[] based pattern and replacement. * @param in input * @param pattern pattern to replace. * @param replacement the replacement or null */ public ReplacingInputStream(InputStream in, String pattern, String replacement) { this(in,pattern.getBytes(StandardCharsets.UTF_8), replacement==null ? null : replacement.getBytes(StandardCharsets.UTF_8)); } /** * Replace occurances of pattern in the input. * @param in input * @param pattern pattern to replace * @param replacement the replacement or null */ public ReplacingInputStream(InputStream in, byte[] pattern, byte[] replacement) { super(in); Validate.notNull(pattern); Validate.isTrue(pattern.length>0, "pattern length should be > 0", pattern.length); this.pattern = pattern; this.replacement = replacement; // we will never match more than the pattern length buf = new int[pattern.length]; } @Override public int read(byte[] b, int off, int len) throws IOException { // copy of parent logic; we need to call our own read() instead of super.read(), which delegates instead of calling our read if (b == null) { throw new NullPointerException(); } else if (off < 0 || len < 0 || len > b.length - off) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } int c = read(); if (c == -1) { return -1; } b[off] = (byte)c; int i = 1; try { for (; i < len ; i++) { c = read(); if (c == -1) { break; } b[off + i] = (byte)c; } } catch (IOException ee) { } return i; } @Override public int read(byte[] b) throws IOException { // call our own read return read(b, 0, b.length); } @Override public int read() throws IOException { // use a simple state machine to figure out what we are doing int next; switch (state) { case NOT_MATCHED: // we are not currently matching, replacing, or unbuffering next=super.read(); if(pattern[0] == next) { // clear whatever was there buf=new int[pattern.length]; // clear whatever was there // make sure we start at 0 matchedIndex=0; buf[matchedIndex++]=next; if(pattern.length == 1) { // edgecase when the pattern length is 1 we go straight to replacing state=State.REPLACING; // reset replace counter replacedIndex=0; } else { // pattern of length 1 state=State.MATCHING; } // recurse to continue matching return read(); } else { return next; } case MATCHING: // the previous bytes matched part of the pattern next=super.read(); if(pattern[matchedIndex]==next) { buf[matchedIndex++]=next; if(matchedIndex==pattern.length) { // we've found a full match! if(replacement==null || replacement.length==0) { // the replacement is empty, go straight to NOT_MATCHED state=State.NOT_MATCHED; matchedIndex=0; } else { // start replacing state=State.REPLACING; replacedIndex=0; } } } else { // mismatch -> unbuffer buf[matchedIndex++]=next; state=State.UNBUFFER; unbufferIndex=0; } return read(); case REPLACING: // we've fully matched the pattern and are returning bytes from the replacement next=replacement[replacedIndex++]; if(replacedIndex==replacement.length) { state=State.NOT_MATCHED; replacedIndex=0; } return next; case UNBUFFER: // we partially matched the pattern before encountering a non matching byte // we need to serve up the buffered bytes before we go back to NOT_MATCHED next=buf[unbufferIndex++]; if(unbufferIndex==matchedIndex) { state=State.NOT_MATCHED; matchedIndex=0; } return next; default: throw new IllegalStateException("no such state " + state); } } @Override public String toString() { return state.name() + " " + matchedIndex + " " + replacedIndex + " " + unbufferIndex; } } import java.io.IOException; import java.io.InputStream; public class TokenReplacingStream extends InputStream { private final InputStream source; private final byte[] oldBytes; private final byte[] newBytes; private int tokenMatchIndex = 0; private int bytesIndex = 0; private boolean unwinding; private int mismatch; private int numberOfTokensReplaced = 0; public TokenReplacingStream(InputStream source, byte[] oldBytes, byte[] newBytes) { assert oldBytes.length > 0; this.source = source; this.oldBytes = oldBytes; this.newBytes = newBytes; } @Override public int read() throws IOException { if (unwinding) { if (bytesIndex < tokenMatchIndex) { return oldBytes[bytesIndex++]; } else { bytesIndex = 0; tokenMatchIndex = 0; unwinding = false; return mismatch; } } else if (tokenMatchIndex == oldBytes.length) { if (bytesIndex == newBytes.length) { bytesIndex = 0; tokenMatchIndex = 0; numberOfTokensReplaced++; } else { return newBytes[bytesIndex++]; } } int b = source.read(); if (b == oldBytes[tokenMatchIndex]) { tokenMatchIndex++; } else if (tokenMatchIndex > 0) { mismatch = b; unwinding = true; } else { return b; } return read(); } @Override public void close() throws IOException { source.close(); } public int getNumberOfTokensReplaced() { return numberOfTokensReplaced; } } public static void replaceStream(InputStream in, OutputStream out, String search, String replace) throws IOException { replaceStream(new InputStreamReader(in), new OutputStreamWriter(out), search, replace); } public static void replaceStream(Reader in, Writer out, String search, String replace) throws IOException { char[] searchChars = search.toCharArray(); int[] buffer = new int[searchChars.length]; int x, r, si = 0, sm = searchChars.length; while ((r = in.read()) > 0) { if (searchChars[si] == r) { // The char matches our pattern buffer[si++] = r; if (si == sm) { // We have reached a matching string out.write(replace); si = 0; } } else if (si > 0) { // No match and buffered char(s), empty buffer and pass the char forward for (x = 0; x < si; x++) { out.write(buffer[x]); } si = 0; out.write(r); } else { // No match and nothing buffered, just pass the char forward out.write(r); } } // Empty buffer for (x = 0; x < si; x++) { out.write(buffer[x]); } }