String 计算给定长度字符串可能的最大运行次数
几周前,伦比克 字符串String 计算给定长度字符串可能的最大运行次数,string,algorithm,math,String,Algorithm,Math,几周前,伦比克 字符串w的周期p是任何正整数p,使得w[i]=w[i+p] 只要定义了方程的两边。让per(w)表示 w的最小周期的大小。我们说字符串w是 周期性iffper(w)部分回答。这个想法是从Boyer-Moore字符串搜索算法中选取一个页面,进行适当的修改,以使要匹配的字符串来自源字符串 考虑长度为n的字符串查找周期k的运行的问题,其中2k
w
的周期p
是任何正整数p
,使得w[i]=w[i+p]
只要定义了方程的两边。让per(w)
表示
w
的最小周期的大小。我们说字符串w
是
周期性iff
per(w)部分回答。这个想法是从Boyer-Moore字符串搜索算法中选取一个页面,进行适当的修改,以使要匹配的字符串来自源字符串
考虑长度为n
的字符串查找周期k
的运行的问题,其中2k
。如果这个问题有一个多项式时间算法,那么一般问题也有一个。只需为每个2运行一次这样的算法即可找到所有解决方案
想法
在每个字符串中,最后一个字符只能用于有限的运行次数
最后一个0只能将运行添加到
10 + 0 => 100
从年起
00 + 0 => 000
这只是重复。Anf如果它添加了最小跑步,那么下一个可能添加的最小跑步是
110010 + 0 => 1100100
再记
010010 + 0 => 0100100
这不是一次额外的跑步,而是一次重复。下一个可能的补充是
111001001100100
1111001001100100111001001100100
...
数字可以变化,但最小长度为
3, 7, 15, 31
那是
4^1 - 1, 4^2 - 1, ..., 4^n - 1
在字符串开头不需要其他字符,因此
maxaddlast = 4^n - 2
生成通过添加最后一个字符可以添加的最大运行次数
算法
- 搜索长度n时,所有变量都会记录在[maxNumberOfRuns-maxaddlast(n+1),maxNumberOfRuns]中的运行计数中
- 要找到n+1的maxNumberOfRuns解决方案,只需将所有记录的变量扩展为0和1并选中即可
种子
剩下的问题是调整堆栈大小以收集未来种子所需的所有变体
由于没有足够的数据来猜测有效公式,因此选择了自适应算法:
n的初始堆栈大小是从n-1猜测出来的
对于每个解决方案,都会检查使用的堆栈大小,以确保堆栈中始终有1的空间
如果堆栈在某个n处被完全使用,堆栈大小将增加,计算将在n处重新开始
结果
length 104 with 91 runs
在600秒内到达。然后使用默认设置耗尽内存。使用-Xmx16G或更多。对于较大的数字,必须修改代码以将种子保留在磁盘上,而不是内存中
而且它比蛮力法快得多
**代码**
下面是我的Java示例代码:
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import de.bb.util.Pair;
/**
* A search algorithm to find all runs for increasing lengths of strings of 0s
* and 1s.
*
* This algorithm uses a seed to generate the candidates for the next search.
* The seed contains the solutions for rho(n), rho(n) - 1, ..., minstart(n).
* Since the seed size is unknown, it starts with a minimal seed: minstart(n) =
* rho(n) - 1; After the solutions are calculated the all seeds are checked. If
* a seed with minstart(n) was used, that minstart(n) gets decremented and the
* search is restarted at position n + 1. This guarantees that the seed is
* always large enough.
*
* Optional TODO: Since the seed can occupy large amounts of memory, the seed is
* maintained on disk.
*
* @author Stefan "Bebbo" Franke (c) 2016
*/
public class MaxNumberOfRunsAdaptive {
private static long start;
private ArrayList<Pair<byte[], ArrayList<Integer>>> seed = new ArrayList<>();
private int max;
private ArrayList<ArrayList<Pair<byte[], ArrayList<Integer>>>> nextSeedStack;
private ArrayList<Integer> maxs = new ArrayList<>();
private ArrayList<Integer> diffs = new ArrayList<>();
private ArrayList<Integer> totals = new ArrayList<>();
private int total;
private byte[] buffer;
public static void main(String[] args) {
int limit = 9999;
if (args.length == 1) {
try {
limit = Integer.parseInt(args[0]);
} catch (Exception e) {
}
}
start = System.currentTimeMillis();
new MaxNumberOfRunsAdaptive().run(limit);
long took = (System.currentTimeMillis() - start) / 100;
System.out.println("took " + (took / 10.) + "s");
}
/**
* Find a string with the max number of runs for all lengths from 2 to
* limit;
*
* @param limit
* the limit to stop calculation.
*/
private void run(int limit) {
maxs.add(0);
maxs.add(0);
diffs.add(0);
diffs.add(1);
totals.add(0);
totals.add(0);
ArrayList<Integer> n0 = new ArrayList<Integer>();
n0.add(0);
seed.add(Pair.makePair(new byte[] { '0' }, n0));
saveSeed(2);
for (int i = 2; i <= limit;) {
int restart = compose(i);
if (restart < i) {
System.out.println("*** restarting at: " + restart + " ***");
i = restart;
loadSeed(i);
total = totals.get(i - 1);
} else {
saveSeed(i + 1);
++i;
}
}
}
/**
* Load the seed for the length from disk.
*
* @param length
*/
private void loadSeed(int length) {
try {
seed.clear();
final FileReader fr = new FileReader("seed-" + length + ".txt");
final BufferedReader br = new BufferedReader(fr);
for (String line = br.readLine(); line != null; line = br.readLine()) {
final int space = line.indexOf(' ');
final byte[] b = line.substring(0, space).getBytes();
final String sends = line.substring(space + 2, line.length() - 1);
final ArrayList<Integer> ends = new ArrayList<>();
for (final String s : sends.split(",")) {
ends.add(Integer.parseInt(s.trim()));
}
seed.add(Pair.makePair(b, ends));
}
fr.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Save the seed for the given length to the disk.
*
* @param length
* the length
*/
private void saveSeed(int length) {
try {
final FileWriter fos = new FileWriter("seed-" + length + ".txt");
for (final Pair<byte[], ArrayList<Integer>> p : seed) {
fos.write(new String(p.getFirst()) + " " + p.getSecond().toString() + "\n");
}
fos.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Compose new strings from all available bases. Also collect the candidates
* for the next base.
*/
private int compose(int length) {
max = 0;
int nextStackSize;
if (diffs.size() > length)
nextStackSize = diffs.get(length) + 1;
else
nextStackSize = diffs.get(length - 1) - 1;
if (nextStackSize < 2)
nextStackSize = 2;
// setup collector for next bases
nextSeedStack = new ArrayList<>();
for (int i = 0; i < nextStackSize; ++i) {
nextSeedStack.add(new ArrayList<Pair<byte[], ArrayList<Integer>>>());
}
buffer = new byte[length];
// extend the bases
for (Pair<byte[], ArrayList<Integer>> e : seed) {
final byte[] s = e.getFirst();
System.arraycopy(s, 0, buffer, 0, length - 1);
if (s.length < 3 || s[s.length - 1] == '1' || s[s.length - 2] == '1' || s[s.length - 3] == '1') {
buffer[length - 1] = '0';
test(length, e.getSecond());
}
if (s.length < 3 || s[s.length - 1] == '0' || s[s.length - 2] == '0' || s[s.length - 3] == '0') {
buffer[length - 1] = '1';
test(length, e.getSecond());
}
}
long took = (System.currentTimeMillis() - start) / 100;
final ArrayList<String> solutions = new ArrayList<String>();
for (Pair<byte[], ArrayList<Integer>> p : nextSeedStack.get(nextSeedStack.size() - 1)) {
solutions.add(new String(p.getFirst()));
}
total += solutions.size();
if (totals.size() <= length)
totals.add(0);
totals.set(length, total);
if (maxs.size() <= length) {
maxs.add(0);
}
maxs.set(length, max);
System.out.println(length + " " + max + " " + (took / 10.) + " " + total + " " + solutions);
seed.clear();
// setup base for next level
for (ArrayList<Pair<byte[], ArrayList<Integer>>> t : nextSeedStack) {
seed.addAll(t);
}
if (diffs.size() <= length) {
diffs.add(1);
}
int restart = length;
// check for restart
for (final String b : solutions) {
for (int i = 2; i < b.length(); ++i) {
int diff = maxs.get(i) - countRuns(b.substring(0, i));
if (diff >= diffs.get(i)) {
if (i < restart)
restart = i;
diffs.set(i, diff + 1);
}
}
}
System.out.println(diffs);
return restart;
}
/**
* Test the current buffer and at it to the next seed stack,
*
* @param l
* the current length
* @param endRuns
* the end runs to store
*/
void test(final int l, final ArrayList<Integer> endRuns) {
final ArrayList<Integer> r = incrementalCountRuns(l, endRuns);
final int n = r.get(r.size() - 1);
// shift the nextBaseStack
while (max < n) {
nextSeedStack.remove(0);
nextSeedStack.add(new ArrayList<Pair<byte[], ArrayList<Integer>>>());
++max;
}
// add to set in stack, if in stack
final int index = nextSeedStack.size() - 1 - max + n;
if (index >= 0)
nextSeedStack.get(index).add(Pair.makePair(buffer.clone(), r));
}
/**
* Find incremental the runs incremental.
*
* @param l
* the lengths
* @param endRuns
* the runs of length-1 ending at length -1
* @return a new array containing the end runs plus the length
*/
private ArrayList<Integer> incrementalCountRuns(final int l, final ArrayList<Integer> endRuns) {
final ArrayList<Integer> res = new ArrayList<Integer>();
int sz = endRuns.size();
// last end run dummy - contains the run count
int n = endRuns.get(--sz);
int pos = 0;
for (int i = l - 2; i >= 0; i -= 2) {
int p = (l - i) / 2;
// found something ?
if (equals(buffer, i, buffer, i + p, p)) {
while (i > 0 && buffer[i - 1] == buffer[i - 1 + p]) {
--i;
}
int lasti = -1;
while (pos < sz) {
lasti = endRuns.get(pos);
if (lasti <= i)
break;
lasti = -1;
++pos;
}
if (lasti != i)
++n;
res.add(i);
}
}
res.add(n);
return res;
}
/**
* Compares one segment of a byte array with a segment of a 2nd byte array.
*
* @param a
* first byte array
* @param aOff
* offset into first byte array
* @param b
* second byte array
* @param bOff
* offset into second byte array
* @param len
* length of the compared segments
* @return true if the segments are equal, otherwise false
*/
public final static boolean equals(byte a[], int aOff, byte b[], int bOff, int len) {
if (a == null || b == null)
return a == b;
while (len-- > 0)
if (a[aOff + len] != b[bOff + len])
return false;
return true;
}
/**
* Simple slow stupid method to count the runs in a String.
*
* @param s
* the string
* @return the count of runs.
*/
static int countRuns(String s) {
int n = 0;
int l = s.length();
for (int i = 0; i < l - 1; ++i) {
for (int k = i + 1; k < l; ++k) {
int p = 0;
while (i + p < k && k + p < l) {
if (s.charAt(i + p) != s.charAt(k + p))
break;
++p;
}
if (i + p == k) {
int jj = k + p - 1;
if (i > 0 && s.charAt(i - 1) == s.charAt(i - 1 + p)) {
continue;
}
while (jj + 1 < l && s.charAt(jj + 1) == s.charAt(jj + 1 - p)) {
++jj;
++k;
}
++n;
}
}
}
return n;
}
}
导入java.io.BufferedReader;
导入java.io.FileReader;
导入java.io.FileWriter;
导入java.util.ArrayList;
导入de.bb.util.Pair;
/**
*查找0字符串长度递增的所有运行的搜索算法
*和1s。
*
*该算法使用种子为下一次搜索生成候选项。
*种子包含rho(n)、rho(n)-1、…、minstart(n)的溶液。
*由于种子大小未知,因此它从最小种子开始:minstart(n)=
*rho(n)-1;计算溶液后,检查所有种子。如果
*使用了一个带有minstart(n)的种子,该minstart(n)递减,并且
*在位置n+1处重新开始搜索。这保证了种子是安全的
*总是足够大。
*
*可选TODO:由于种子可能占用大量内存,因此种子是
*在磁盘上维护。
*
*@作者Stefan“Bebbo”Franke(c)2016
*/
公共类MaxNumberOfRunsAdaptive{
私有静态长启动;
private ArrayList seed=new ArrayList();
私人int max;
私有ArrayList nextSeedStack;
private ArrayList maxs=new ArrayList();
private ArrayList diff=新的ArrayList();
私有ArrayList总计=新ArrayList();
私人整数合计;
专用字节[]缓冲区;
公共静态void main(字符串[]args){
整数极限=9999;
如果(args.length==1){
试一试{
limit=Integer.parseInt(args[0]);
}捕获(例外e){
}
}
start=System.currentTimeMillis();
新的MaxNumberOfRunsAdaptive().run(限制);
长时间=(System.currentTimeMillis()-start)/100;
System.out.println(“take”+(take/10.)+“s”);
}
/**
*查找一个字符串,其所有长度的最大运行次数从2到
*限制;
*
*@param限制
*停止计算的限制。
*/
私有无效运行(整数限制){
最大相加(0);
最大相加(0);
差异相加(0);
差异添加(1);
总计。添加(0);
总计。添加(0);
ArrayList n0=新的ArrayList();
添加(0);
seed.add(Pair.makePair(新字节[]{0'},n0));
种子保存(2);
对于(int i=2;i长度)
nextStackSize=diff.get(长度)+1;
其他的
nextStackSize=diff.get(长度-1)-1;
如果(下一个堆栈大小<2)
nextStackSize=2;
//为下一个基地设置收集器
nextSeedStack=newarraylist();
对于(int i=0;ilength 104 with 91 runs
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import de.bb.util.Pair;
/**
* A search algorithm to find all runs for increasing lengths of strings of 0s
* and 1s.
*
* This algorithm uses a seed to generate the candidates for the next search.
* The seed contains the solutions for rho(n), rho(n) - 1, ..., minstart(n).
* Since the seed size is unknown, it starts with a minimal seed: minstart(n) =
* rho(n) - 1; After the solutions are calculated the all seeds are checked. If
* a seed with minstart(n) was used, that minstart(n) gets decremented and the
* search is restarted at position n + 1. This guarantees that the seed is
* always large enough.
*
* Optional TODO: Since the seed can occupy large amounts of memory, the seed is
* maintained on disk.
*
* @author Stefan "Bebbo" Franke (c) 2016
*/
public class MaxNumberOfRunsAdaptive {
private static long start;
private ArrayList<Pair<byte[], ArrayList<Integer>>> seed = new ArrayList<>();
private int max;
private ArrayList<ArrayList<Pair<byte[], ArrayList<Integer>>>> nextSeedStack;
private ArrayList<Integer> maxs = new ArrayList<>();
private ArrayList<Integer> diffs = new ArrayList<>();
private ArrayList<Integer> totals = new ArrayList<>();
private int total;
private byte[] buffer;
public static void main(String[] args) {
int limit = 9999;
if (args.length == 1) {
try {
limit = Integer.parseInt(args[0]);
} catch (Exception e) {
}
}
start = System.currentTimeMillis();
new MaxNumberOfRunsAdaptive().run(limit);
long took = (System.currentTimeMillis() - start) / 100;
System.out.println("took " + (took / 10.) + "s");
}
/**
* Find a string with the max number of runs for all lengths from 2 to
* limit;
*
* @param limit
* the limit to stop calculation.
*/
private void run(int limit) {
maxs.add(0);
maxs.add(0);
diffs.add(0);
diffs.add(1);
totals.add(0);
totals.add(0);
ArrayList<Integer> n0 = new ArrayList<Integer>();
n0.add(0);
seed.add(Pair.makePair(new byte[] { '0' }, n0));
saveSeed(2);
for (int i = 2; i <= limit;) {
int restart = compose(i);
if (restart < i) {
System.out.println("*** restarting at: " + restart + " ***");
i = restart;
loadSeed(i);
total = totals.get(i - 1);
} else {
saveSeed(i + 1);
++i;
}
}
}
/**
* Load the seed for the length from disk.
*
* @param length
*/
private void loadSeed(int length) {
try {
seed.clear();
final FileReader fr = new FileReader("seed-" + length + ".txt");
final BufferedReader br = new BufferedReader(fr);
for (String line = br.readLine(); line != null; line = br.readLine()) {
final int space = line.indexOf(' ');
final byte[] b = line.substring(0, space).getBytes();
final String sends = line.substring(space + 2, line.length() - 1);
final ArrayList<Integer> ends = new ArrayList<>();
for (final String s : sends.split(",")) {
ends.add(Integer.parseInt(s.trim()));
}
seed.add(Pair.makePair(b, ends));
}
fr.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Save the seed for the given length to the disk.
*
* @param length
* the length
*/
private void saveSeed(int length) {
try {
final FileWriter fos = new FileWriter("seed-" + length + ".txt");
for (final Pair<byte[], ArrayList<Integer>> p : seed) {
fos.write(new String(p.getFirst()) + " " + p.getSecond().toString() + "\n");
}
fos.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Compose new strings from all available bases. Also collect the candidates
* for the next base.
*/
private int compose(int length) {
max = 0;
int nextStackSize;
if (diffs.size() > length)
nextStackSize = diffs.get(length) + 1;
else
nextStackSize = diffs.get(length - 1) - 1;
if (nextStackSize < 2)
nextStackSize = 2;
// setup collector for next bases
nextSeedStack = new ArrayList<>();
for (int i = 0; i < nextStackSize; ++i) {
nextSeedStack.add(new ArrayList<Pair<byte[], ArrayList<Integer>>>());
}
buffer = new byte[length];
// extend the bases
for (Pair<byte[], ArrayList<Integer>> e : seed) {
final byte[] s = e.getFirst();
System.arraycopy(s, 0, buffer, 0, length - 1);
if (s.length < 3 || s[s.length - 1] == '1' || s[s.length - 2] == '1' || s[s.length - 3] == '1') {
buffer[length - 1] = '0';
test(length, e.getSecond());
}
if (s.length < 3 || s[s.length - 1] == '0' || s[s.length - 2] == '0' || s[s.length - 3] == '0') {
buffer[length - 1] = '1';
test(length, e.getSecond());
}
}
long took = (System.currentTimeMillis() - start) / 100;
final ArrayList<String> solutions = new ArrayList<String>();
for (Pair<byte[], ArrayList<Integer>> p : nextSeedStack.get(nextSeedStack.size() - 1)) {
solutions.add(new String(p.getFirst()));
}
total += solutions.size();
if (totals.size() <= length)
totals.add(0);
totals.set(length, total);
if (maxs.size() <= length) {
maxs.add(0);
}
maxs.set(length, max);
System.out.println(length + " " + max + " " + (took / 10.) + " " + total + " " + solutions);
seed.clear();
// setup base for next level
for (ArrayList<Pair<byte[], ArrayList<Integer>>> t : nextSeedStack) {
seed.addAll(t);
}
if (diffs.size() <= length) {
diffs.add(1);
}
int restart = length;
// check for restart
for (final String b : solutions) {
for (int i = 2; i < b.length(); ++i) {
int diff = maxs.get(i) - countRuns(b.substring(0, i));
if (diff >= diffs.get(i)) {
if (i < restart)
restart = i;
diffs.set(i, diff + 1);
}
}
}
System.out.println(diffs);
return restart;
}
/**
* Test the current buffer and at it to the next seed stack,
*
* @param l
* the current length
* @param endRuns
* the end runs to store
*/
void test(final int l, final ArrayList<Integer> endRuns) {
final ArrayList<Integer> r = incrementalCountRuns(l, endRuns);
final int n = r.get(r.size() - 1);
// shift the nextBaseStack
while (max < n) {
nextSeedStack.remove(0);
nextSeedStack.add(new ArrayList<Pair<byte[], ArrayList<Integer>>>());
++max;
}
// add to set in stack, if in stack
final int index = nextSeedStack.size() - 1 - max + n;
if (index >= 0)
nextSeedStack.get(index).add(Pair.makePair(buffer.clone(), r));
}
/**
* Find incremental the runs incremental.
*
* @param l
* the lengths
* @param endRuns
* the runs of length-1 ending at length -1
* @return a new array containing the end runs plus the length
*/
private ArrayList<Integer> incrementalCountRuns(final int l, final ArrayList<Integer> endRuns) {
final ArrayList<Integer> res = new ArrayList<Integer>();
int sz = endRuns.size();
// last end run dummy - contains the run count
int n = endRuns.get(--sz);
int pos = 0;
for (int i = l - 2; i >= 0; i -= 2) {
int p = (l - i) / 2;
// found something ?
if (equals(buffer, i, buffer, i + p, p)) {
while (i > 0 && buffer[i - 1] == buffer[i - 1 + p]) {
--i;
}
int lasti = -1;
while (pos < sz) {
lasti = endRuns.get(pos);
if (lasti <= i)
break;
lasti = -1;
++pos;
}
if (lasti != i)
++n;
res.add(i);
}
}
res.add(n);
return res;
}
/**
* Compares one segment of a byte array with a segment of a 2nd byte array.
*
* @param a
* first byte array
* @param aOff
* offset into first byte array
* @param b
* second byte array
* @param bOff
* offset into second byte array
* @param len
* length of the compared segments
* @return true if the segments are equal, otherwise false
*/
public final static boolean equals(byte a[], int aOff, byte b[], int bOff, int len) {
if (a == null || b == null)
return a == b;
while (len-- > 0)
if (a[aOff + len] != b[bOff + len])
return false;
return true;
}
/**
* Simple slow stupid method to count the runs in a String.
*
* @param s
* the string
* @return the count of runs.
*/
static int countRuns(String s) {
int n = 0;
int l = s.length();
for (int i = 0; i < l - 1; ++i) {
for (int k = i + 1; k < l; ++k) {
int p = 0;
while (i + p < k && k + p < l) {
if (s.charAt(i + p) != s.charAt(k + p))
break;
++p;
}
if (i + p == k) {
int jj = k + p - 1;
if (i > 0 && s.charAt(i - 1) == s.charAt(i - 1 + p)) {
continue;
}
while (jj + 1 < l && s.charAt(jj + 1) == s.charAt(jj + 1 - p)) {
++jj;
++k;
}
++n;
}
}
}
return n;
}
}