C++ 在位数组中查找N 1位的字符串
正如标题所说,我想在可变大小(M)的位数组中找到n个1位的连续运行C++ 在位数组中查找N 1位的字符串,c++,c,bit-manipulation,C++,C,Bit Manipulation,正如标题所说,我想在可变大小(M)的位数组中找到n个1位的连续运行 通常的用例是使用查找表展开内部循环 字节有四类: 00000001 - // Bytes ending with one or more 1's. These start a run. 11111111 - // All 1's. These continue a run. 10000000 - // Bytes starting with 1's but ending with 0's. These end a run.
通常的用例是使用查找表展开内部循环 字节有四类:
00000001 - // Bytes ending with one or more 1's. These start a run.
11111111 - // All 1's. These continue a run.
10000000 - // Bytes starting with 1's but ending with 0's. These end a run.
10111000 - // All the rest. These can be enders or short runs.
制作一个查找表,让您能够区分这些。然后一次处理一个字节的位数组
编辑
我希望对查找表的内容不那么含糊。具体来说,我建议您需要三个表,每个表有256个条目,用于以下特性:
Number of bits set.
Number of bits set before first zero.
Number of bits set after last zero.
根据您的操作方式,您可能不需要第一个。int nr=0;
int nr = 0;
for ( int i = 0; i < M; ++i )
{
if ( bits[i] )
++nr;
else
{
nr = 0; continue;
}
if ( nr == n ) return i - nr + 1; // start position
}
对于(int i=0;i
你说的暴力是什么意思?O(M*N)还是这个O(M)解?如果你是这个意思,那么我不确定你还能优化多少东西
的确,我们可以通过遍历每个字节而不是每个位来实现不断的改进。我想到了这一点:
当我说byte时,我指的是N位的序列
for ( int i = 0; i < M; i += N )
if ( bits[i] == 0 ) // if the first bit of a byte is 0, that byte alone cannot be a solution. Neither can it be a solution in conjunction with the previous byte, so skip it.
continue;
else // if the first bit is 1, then either the current byte is a solution on its own or it is a solution in conjunction with the previous byte
{
// search the bits in the previous byte.
int nrprev = 0;
while ( i - nrprev >= 0 && bits[i - nrprev] ) ++nrprev;
// search the bits in the current byte;
int nrcurr = 0;
while ( bits[i + nrcurr + 1] && nrcurr + nrprev <= N ) ++nrcurr;
if ( nrcurr + nrprev >= N ) // solution starting at i - nrprev + 1.
return i - nrprev + 1;
}
for(int i=0;i=0&&bits[i-nrprov])++nrprov;
//搜索当前字节中的位;
int nrcur=0;
while(位[i+nrcur+1]&&nrcur+nrprov=N)//从i-nrprov+1开始的解决方案。
返回i-nrprov+1;
}
没有测试。可能需要一些额外的条件来确保正确性,但这个想法似乎是合理的。我在运行MIPS内核的嵌入式设备上做了类似的事情。MIPS体系结构包括
CLZ
指令(“计数前导零”),该指令将返回指定寄存器的前导零位数。如果需要计算前导一位,只需在调用CLZ
之前反转数据即可
例如,假设您有一个C语言函数CLZ
作为汇编指令的别名:
unsigned numbits = 0, totalbits = 0;
while (data != 0 && numbits != N) {
numbits = CLZ(data); // count leading zeroes
data <<= numbits; // shift off leading zeroes
totalbits += numbits; // keep track of how many bits we've shifted off
numbits = CLZ(~data); // count leading ones
data <<= numbits; // shift off leading ones
totalbits += numbits; // keep track of how many bits we've shifted off
}
unsigned numbits=0,totalbits=0;
while(数据!=0&&numbits!=N){
numbits=CLZ(data);//计算前导零
数据简单回答:
给定要检查的值V
,获取N
M
-位宽寄存器。对于N
中的所有N
,将寄存器N
设置为V>
将按位AND(全部N)
转储到另一个M宽寄存器中。然后只需找到该寄存器中设置的位,这将是所有位运行的开始
我确信,如果您没有m
位宽寄存器,您可以将其调整为更小的寄存器大小。这很容易解决,并且您不需要计数零指令
y = x ^ x-1
为您提供一个由1组成的字符串,最大为x
中的最低有效1位
y + 1
是下一个单独的位,可以是1或0,并且
x ^ x-(y+1)
给出从该位到下一个1位的一串1
然后您可以将搜索模式乘以(y+1)并递归
我正在研究提取字符串的算法…等等
是的…很容易解决…当我在研究这个问题时,注意还有一个技巧。如果你把一个单词分成n
位的子串,那么一系列≥2n-1
1必须至少覆盖一个子串。为简单起见,假设子串为4位,字为32位。您可以同时检查子串以快速过滤输入:
const unsigned int word_starts = 0x11111111;
unsigned int word = whatever;
unsigned int flips = word + word_starts;
if ( carry bit from previous addition ) return true;
return ~ ( word ^ flips ) & word_starts;
这是因为,在加法操作之后,中的每一位(除第一位之外)都会翻转中对应于word_中1位的
开始
相等(根据二进制加法的定义)
你可以通过异或、反运算、和运算来提取进位。如果没有设置进位,1字符串将不存在
不幸的是,您必须检查最后的进位,这是C无法做到的,但大多数处理器都可以做到。如果您在英特尔兼容的平台上,则需要检查BSF(位向前扫描)和BSR(位反向扫描)asm指令可以帮助您删除第一个和最后一个零位。这将比暴力方法更有效。对于您正在做的事情来说,这可能有点过头了,但我需要一些重量级的东西来进行自定义文件系统块分配。如果N<32,则可以删除代码的后半部分
为了向后兼容,第一个字的最高有效位被视为位0
请注意,该算法在末尾使用一个哨兵字(全零)停止任何搜索,而不是持续检查数组的结尾。还请注意,该算法允许从位数组中的任何位置开始搜索(通常是最后一次成功分配的结尾)而不是总是从位数组的开头开始
提供您自己的特定于编译器的msbit32()函数
#define leftMask(x) (((int32_t)(0x80000000)) >> ((x) - 1)) // cast so that sign extended (arithmetic) shift used
#define rightMask(x) (1 << ((x) - 1))
/* Given a multi-word bitmap array find a run of consecutive set bits and clear them.
*
* Returns 0 if bitrun not found.
* 1 if bitrun found, foundIndex contains the bit index of the first bit in the run (bit index 0 is the most significant bit of the word at lowest address).
*/
static int findBitRun(int runLen, uint32_t *pBegin, uint32_t *pStartMap, uint32_t *pEndMap, uint32_t *foundIndex)
{
uint32_t *p = pBegin;
unsigned int bit;
if (runLen == 1)
{ // optimise the simple & hopefully common case
do {
if (*p)
{
bit = msbit32(*p);
*p &= ~(1 << bit);
*foundIndex = ((p - pStartMap) * 32ul) + (31 - bit);
return 1;
}
if (++p > pEndMap)
{
p = pStartMap;
}
} while (p != pBegin);
}
else if (runLen < 32)
{
uint32_t rmask = (1 << runLen) - 1;
do {
uint32_t map = *p;
if (map)
{
// We want to find a run of at least runLen consecutive ones within the word.
// We do this by ANDing each bit with the runLen-1 bits to the right
// if there are any ones remaining then this word must have a suitable run.
// The single bit case is handled above so can assume a minimum run of 2 required
uint32_t w = map & (map << 1); // clobber any 1 bit followed by 0 bit
int todo = runLen - 2; // -2 as clobbered 1 bit and want to leave 1 bit
if (todo > 2)
{
w &= w << 2; // clobber 2 bits
todo -= 2;
if (todo > 4)
{
w &= w << 4; // clobber 4 bits
todo -= 4;
if (todo > 8)
{
w &= w << 8; // clobber 8 bits
todo -= 8;
}
}
}
w &= w << todo; // clobber any not accounted for
if (w) // had run >= runLen within word
{
bit = msbit32(w); // must be start of left most run
*p &= ~(rmask << ((bit + 1) - runLen));
*foundIndex = ((p - pStartMap) * 32ul) + (31 - bit);
return 1;
}
else if ((map & 1) && (p[1] & 0x80000000ul)) // assumes sentinel at end of map
{
// possibly have a run overlapping two words
// calculate number of bits at right of current word
int rbits = msbit32((map + 1) ^ map);
int lmask = rmask << ((32 + rbits) - runLen);
if ((p[1] | lmask) == p[1])
{
p[0] &= ~((1 << rbits) - 1);
p[1] &= ~lmask;
*foundIndex = ((p - pStartMap) * 32ul) + (32 - rbits);
return 1;
}
}
}
if (++p > pEndMap)
{
p = pStartMap;
}
} while (p != pBegin);
}
else // bit run spans multiple words
{
pEndMap -= (runLen - 1)/32; // don't run off end
if (pBegin > pEndMap)
{
pBegin = pStartMap;
}
do {
if ((p[0] & 1) && ((p[0] | p[1]) == 0xfffffffful)) // may be first word of run
{
uint32_t map = *p;
uint32_t *ps = p; // set an anchor
uint32_t bitsNeeded;
int sbits;
if (map == 0xfffffffful)
{
if (runLen == 32) // easy case
{
*ps = 0;
*foundIndex = (ps - pStartMap) * 32ul;
return 1;
}
sbits = 32;
}
else
{
sbits = msbit32((map + 1) ^ map);
}
bitsNeeded = runLen - sbits;
while (p[1] == 0xfffffffful)
{
if (bitsNeeded <= 32)
{
p[1] = ~(0xfffffffful << (32 - bitsNeeded));
while (p != ps)
{
*p = 0;
--p;
}
*ps &= ~rightMask(sbits);
*foundIndex = ((p - pStartMap) * 32ul) + (32 - sbits);
return 1;
}
bitsNeeded -= 32;
if (++p == pBegin)
{
++pBegin; // ensure we terminate
}
}
if ((bitsNeeded < 32) & (p[1] & 0x80000000ul))
{
uint32_t lmask = leftMask(bitsNeeded);
if ((p[1] | lmask) == p[1])
{
p[1] &= ~lmask;
while (p != ps)
{
*p = 0;
--p;
}
*ps &= ~rightMask(sbits);
*foundIndex = ((p - pStartMap) * 32ul) + (32 - sbits);
return 1;
}
}
}
if (++p > pEndMap)
{
p = pStartMap;
}
} while (p != pBegin);
}
return 0;
}
#定义leftMask(x)(((int32_t)(0x8000000))>>((x)-1))//强制转换以便使用符号扩展(算术)移位
#定义右掩码(x)(1是固定的,还是您正在寻找最长的序列?到目前为止您尝试了哪些优化?关于平台和编译器的细节?什么是琐碎的实现?查找表?是否可以使用汇编?平台是什么?可以使用汇编程序。平台是c64x+dsp。在该平台上查找表速度较慢。算术i你是对的,你不可能得到比O(M)快的渐近速度(因为在最坏的情况下,你总是要看所有的位),但是因为我们这里讨论的是位,这是可能的
#define leftMask(x) (((int32_t)(0x80000000)) >> ((x) - 1)) // cast so that sign extended (arithmetic) shift used
#define rightMask(x) (1 << ((x) - 1))
/* Given a multi-word bitmap array find a run of consecutive set bits and clear them.
*
* Returns 0 if bitrun not found.
* 1 if bitrun found, foundIndex contains the bit index of the first bit in the run (bit index 0 is the most significant bit of the word at lowest address).
*/
static int findBitRun(int runLen, uint32_t *pBegin, uint32_t *pStartMap, uint32_t *pEndMap, uint32_t *foundIndex)
{
uint32_t *p = pBegin;
unsigned int bit;
if (runLen == 1)
{ // optimise the simple & hopefully common case
do {
if (*p)
{
bit = msbit32(*p);
*p &= ~(1 << bit);
*foundIndex = ((p - pStartMap) * 32ul) + (31 - bit);
return 1;
}
if (++p > pEndMap)
{
p = pStartMap;
}
} while (p != pBegin);
}
else if (runLen < 32)
{
uint32_t rmask = (1 << runLen) - 1;
do {
uint32_t map = *p;
if (map)
{
// We want to find a run of at least runLen consecutive ones within the word.
// We do this by ANDing each bit with the runLen-1 bits to the right
// if there are any ones remaining then this word must have a suitable run.
// The single bit case is handled above so can assume a minimum run of 2 required
uint32_t w = map & (map << 1); // clobber any 1 bit followed by 0 bit
int todo = runLen - 2; // -2 as clobbered 1 bit and want to leave 1 bit
if (todo > 2)
{
w &= w << 2; // clobber 2 bits
todo -= 2;
if (todo > 4)
{
w &= w << 4; // clobber 4 bits
todo -= 4;
if (todo > 8)
{
w &= w << 8; // clobber 8 bits
todo -= 8;
}
}
}
w &= w << todo; // clobber any not accounted for
if (w) // had run >= runLen within word
{
bit = msbit32(w); // must be start of left most run
*p &= ~(rmask << ((bit + 1) - runLen));
*foundIndex = ((p - pStartMap) * 32ul) + (31 - bit);
return 1;
}
else if ((map & 1) && (p[1] & 0x80000000ul)) // assumes sentinel at end of map
{
// possibly have a run overlapping two words
// calculate number of bits at right of current word
int rbits = msbit32((map + 1) ^ map);
int lmask = rmask << ((32 + rbits) - runLen);
if ((p[1] | lmask) == p[1])
{
p[0] &= ~((1 << rbits) - 1);
p[1] &= ~lmask;
*foundIndex = ((p - pStartMap) * 32ul) + (32 - rbits);
return 1;
}
}
}
if (++p > pEndMap)
{
p = pStartMap;
}
} while (p != pBegin);
}
else // bit run spans multiple words
{
pEndMap -= (runLen - 1)/32; // don't run off end
if (pBegin > pEndMap)
{
pBegin = pStartMap;
}
do {
if ((p[0] & 1) && ((p[0] | p[1]) == 0xfffffffful)) // may be first word of run
{
uint32_t map = *p;
uint32_t *ps = p; // set an anchor
uint32_t bitsNeeded;
int sbits;
if (map == 0xfffffffful)
{
if (runLen == 32) // easy case
{
*ps = 0;
*foundIndex = (ps - pStartMap) * 32ul;
return 1;
}
sbits = 32;
}
else
{
sbits = msbit32((map + 1) ^ map);
}
bitsNeeded = runLen - sbits;
while (p[1] == 0xfffffffful)
{
if (bitsNeeded <= 32)
{
p[1] = ~(0xfffffffful << (32 - bitsNeeded));
while (p != ps)
{
*p = 0;
--p;
}
*ps &= ~rightMask(sbits);
*foundIndex = ((p - pStartMap) * 32ul) + (32 - sbits);
return 1;
}
bitsNeeded -= 32;
if (++p == pBegin)
{
++pBegin; // ensure we terminate
}
}
if ((bitsNeeded < 32) & (p[1] & 0x80000000ul))
{
uint32_t lmask = leftMask(bitsNeeded);
if ((p[1] | lmask) == p[1])
{
p[1] &= ~lmask;
while (p != ps)
{
*p = 0;
--p;
}
*ps &= ~rightMask(sbits);
*foundIndex = ((p - pStartMap) * 32ul) + (32 - sbits);
return 1;
}
}
}
if (++p > pEndMap)
{
p = pStartMap;
}
} while (p != pBegin);
}
return 0;
}