Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/algorithm/12.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C++ 分段粗筛_C++_Algorithm_Primes_Sieve Of Eratosthenes - Fatal编程技术网

C++ 分段粗筛

C++ 分段粗筛,c++,algorithm,primes,sieve-of-eratosthenes,C++,Algorithm,Primes,Sieve Of Eratosthenes,在互联网上遇到这个高效的分段素筛,请帮助我了解下一个向量的使用 细分市场规模的具体选择如何影响绩效 const int L1D_CACHE_SIZE = 32768; void segmented_sieve(int64_t limit, int segment_size = L1D_CACHE_SIZE) { int sqrt = (int) std::sqrt((double) limit); int64_t count = (limit < 2) ? 0 : 1;

在互联网上遇到这个高效的分段素筛,请帮助我了解下一个向量的使用

细分市场规模的具体选择如何影响绩效

const int L1D_CACHE_SIZE = 32768;
void segmented_sieve(int64_t limit, int segment_size = L1D_CACHE_SIZE)
{
    int sqrt = (int) std::sqrt((double) limit);
    int64_t count = (limit < 2) ? 0 : 1;
    int64_t s = 2;
    int64_t n = 3;

    // vector used for sieving
    std::vector<char> sieve(segment_size);

    // generate small primes <= sqrt
    std::vector<char> is_prime(sqrt + 1, 1);
    for (int i = 2; i * i <= sqrt; i++)
        if (is_prime[i])
            for (int j = i * i; j <= sqrt; j += i)
                is_prime[j] = 0;

    std::vector<int> primes;
    std::vector<int> next;

    for (int64_t low = 0; low <= limit; low += segment_size)
    {
        std::fill(sieve.begin(), sieve.end(), 1);

        // current segment = interval [low, high]
        int64_t high = std::min(low + segment_size - 1, limit);

        // store small primes needed to cross off multiples
        for (; s * s <= high; s++)
        {
            if (is_prime[s])
            {
                primes.push_back((int) s);
                next.push_back((int)(s * s - low));
            }
        }
        // sieve the current segment
        for (std::size_t i = 1; i < primes.size(); i++)
        {
            int j = next[i];
            for (int k = primes[i] * 2; j < segment_size; j += k)
                sieve[j] = 0;
            next[i] = j - segment_size;
        }

        for (; n <= high; n += 2)
            if (sieve[n - low]) // n is a prime
                count++;
    }

    std::cout << count << " primes found." << std::endl;
} 
const int L1D\u CACHE\u SIZE=32768;
无效分段筛网(int64\u t限制,int段大小=L1D\u缓存大小)
{
int sqrt=(int)标准::sqrt((双)极限);
int64_t计数=(限值<2)?0:1;
int64_t s=2;
int64_t n=3;
//用于筛选的向量
标准::向量筛(段尺寸);

//生成小素数我不是这方面的专家,但我的直觉告诉我:

  • 极限筛搜索表

    要安装到CPU的一级缓存中 充分利用当前硬件体系结构的性能提升

  • next
    vector

    如果你想分割筛子 然后,您必须记住每个已筛选素数的最后一个索引,例如:

    • 筛分素数:2,3,5
    • 片段大小:8

       |0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7|0 1 2 3 4 5 6 7| // segments
      -----------------------------------------------
      2|-   x   x    x   x   x   x   x    x   x   x   x   
      3|-     x      x      x      x      x      x      x  
      5|-          x          x          x          x      
      -----------------------------------------------
       |                 ^                ^                ^ 
                                  // next value offset for each prime
      
    所以,在填充下一段时,您将继续平稳地


  • 下面是同一算法的一个更简洁的公式,这将使原理更加清晰(部分)。这将初始化一个压缩(仅几率)筛选而不是计算素数,但所涉及的原则是相同的。下载并运行.cpp以查看段大小的影响。基本上,最佳值应该是CPU的一级缓存大小。太小,由于轮数增加而产生的开销开始占主导地位;太大,您将受到较慢计时的惩罚二级缓存和三级缓存。另请参阅


    注意:通过一种称为“预筛选”的技巧,可以从这一时间再减少一秒,即将预先计算的图案爆破到位图中,而不是在开始时将其归零。这将使完整筛选的gcc计时降低到2.1秒。这种技巧与缓存大小块中的分段筛选配合使用效果非常好。

    Reserve你推回的向量。@PratikKumar顺便说一句,出于某些目的(如非有序/统一使用iPrime?测试)是周期性筛选,整体性能更快。请看这里(您可能会感兴趣)
    void initialise_packed_sieve_4G (void *data, unsigned segment_bytes = 1 << 15, unsigned end_bit = 1u << 31)
    {
       typedef std::vector<prime_and_offset_t>::iterator prime_iter_t;
       std::vector<prime_and_offset_t> small_factors;
    
       initialise_odd_primes_and_offsets_64K(small_factors);
    
       unsigned segment_bits = segment_bytes * CHAR_BIT;
       unsigned partial_bits = end_bit % segment_bits;
       unsigned segments     = end_bit / segment_bits + (partial_bits != 0);
    
       unsigned char *segment = static_cast<unsigned char *>(data);
       unsigned bytes = segment_bytes;
    
       for ( ; segments--; segment += segment_bytes)
       {
          if (segments == 0 && partial_bits)
          {
             segment_bits = partial_bits;
             bytes = (partial_bits + CHAR_BIT - 1) / CHAR_BIT;
          }
    
          std::memset(segment, 0, bytes);
    
          for (prime_iter_t p = small_factors.begin(); p != small_factors.end(); ++p)
          {
             unsigned n = p->prime;
             unsigned i = p->next_offset;
    
             for ( ; i < segment_bits; i += n)
             {
                set_bit(segment, i);
             }
    
              p->next_offset = i - segment_bits;
          }
       }
    }
    
    for (index_t k = 1; k <= max_factor_bit; ++k)
    {
       if (bitmap_t::traits::bt(bm.bm, k))  continue;
    
       index_t n = (k << 1) + 1;     // == index_for_value(value_for_index(k) * 2) == n
       index_t i = square(n) >> 1;   // == index_for_value(square(n))
    
       if (i < offset)
       {
          i += ((offset - i) / n) * n;
       }
    
       for ( ; i <= new_max_bit; i += n)
       {
          bitmap_t::traits::bts(bm.bm, i); 
       }
    }
    
    sieve bits = 2147483648 (equiv. number = 4294967295)
    
    segment size    4096 (2^12) bytes ...   4.091 s   1001.2 M/s
    segment size    8192 (2^13) bytes ...   3.723 s   1100.2 M/s
    segment size   16384 (2^14) bytes ...   3.534 s   1159.0 M/s
    segment size   32768 (2^15) bytes ...   3.418 s   1198.4 M/s
    segment size   65536 (2^16) bytes ...   3.894 s   1051.9 M/s
    segment size  131072 (2^17) bytes ...   4.265 s    960.4 M/s
    segment size  262144 (2^18) bytes ...   4.453 s    919.8 M/s
    segment size  524288 (2^19) bytes ...   5.002 s    818.9 M/s
    segment size 1048576 (2^20) bytes ...   5.176 s    791.3 M/s
    segment size 2097152 (2^21) bytes ...   5.135 s    797.7 M/s
    segment size 4194304 (2^22) bytes ...   5.251 s    780.0 M/s
    segment size 8388608 (2^23) bytes ...   7.412 s    552.6 M/s
    
    digest { 203280221, 0C903F86, 5B253F12, 774A3204 }