C++ C++；与条件变量相比，队列应用程序中的20信号量似乎较慢_C++_Performance_Semaphore_C++20_Condition Variable

C++ C++；与条件变量相比，队列应用程序中的20信号量似乎较慢

c++ performance

C++ C++；与条件变量相比，队列应用程序中的20信号量似乎较慢,c++,performance,semaphore,c++20,condition-variable,C++,Performance,Semaphore,C++20,Condition Variable,出于研究目的，我比较了单生产者单消费者队列的实现。所以我比较了条件变量实现和C++20计数信号量实现。我会猜到信号量实现会更快，但事实并非如此。在我的计算机上，在Windows、MSVC下，信号量的实现大约慢25%。我在下面介绍了这两种实现条件变量实现有一个很小的功能优势：可以使用done（）API函数实现中止操作，而信号量实现需要一个特殊的“stop”值排队以解锁和退出拉线程。在我的想象中，单生产者单消费者队列是信号量的典型应用，但显然不是现在我想知道：我是不是做了一些不聪明的事情，

出于研究目的，我比较了单生产者单消费者队列的实现。所以我比较了条件变量实现和C++20计数信号量实现。我会猜到信号量实现会更快，但事实并非如此。在我的计算机上，在Windows、MSVC下，信号量的实现大约慢25%。我在下面介绍了这两种实现

条件变量实现有一个很小的功能优势：可以使用done（）API函数实现中止操作，而信号量实现需要一个特殊的“stop”值排队以解锁和退出拉线程。在我的想象中，单生产者单消费者队列是信号量的典型应用，但显然不是

现在我想知道：

我是不是做了一些不聪明的事情，以至于我的信号量实现速度太慢了
Microsoft计数信号量实现是否可能太慢
我只是误解了队列是信号量的适当应用程序吗
当一个队列不是一个合适的应用程序时，对于哪个应用程序，信号量的性能优于条件变量

条件变量实现：

#include <array>
#include <mutex>
#include <condition_variable>

/*
* locked_single_producer_single_consumer_queue_T is responsible for locked packet communication
* between 2 threads. One thread pushes, the other thread pulls.
*/
template<class T, int N = 16> // N must be a power 2
class locked_single_producer_single_consumer_queue_T
{
public:
    /* When packet fits in the queue, then push shall return immediatelly. Otherwise it will block until it can push the packet. */
    void push(T const& packet)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [this] {return ((m_tail - m_head) & m_mask) != 1; });
        m_data[m_head++] = packet;
        m_head &= m_mask;
        lock.unlock();
        m_cv.notify_one();
    }
    /* When packet could be retreived from the queue, then pull shall return immediatelly. Otherwise it will block until it can pull the packet. */
    bool pull(T& packet)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [this] {return (((m_head - m_tail) & m_mask) != 0) || m_done; });
        if(((m_head - m_tail) & m_mask) != 0) [[likely]]
        {
            packet = m_data[m_tail++];
            m_tail &= m_mask;
            lock.unlock();
            m_cv.notify_one();
            return true;
        }
        return false;
    }
    /* done() indicates that the pushing thread stopped. The pulling thread can continue reading
       the remainder of the queue and should then return */
    void done()
    {
        {
            std::lock_guard<std::mutex> lock(m_mutex);
            m_done = true;
        }
        m_cv.notify_one();
    }
private:
    static_assert((N& (N - 1)) == 0, "N must be a power of 2");
    static signed int const m_mask = N - 1;
    using data_t = std::array<T, N>;
    data_t m_data;
    std::mutex m_mutex;
    std::condition_variable m_cv;
    int m_tail{ 0 };
    int m_head{ 0 };
    bool m_done{};
};

#include <array>
#include <semaphore>
#include <atomic>

/*
* locked_single_producer_single_consumer_queue2_T is responsible for locking packet communication
* between 2 threads. One thread pushes, the other thread pulls.
*/
template<class T, int N = 16> // N must be a power 2
class locked_single_producer_single_consumer_queue2_T
{
public:
    /* When packet fits in the queue, then push shall return immediatelly. Otherwise it will block until it can push the packet. */
    void push(T const& packet)
    {
        m_available_space.acquire();
        int head = m_head.load(std::memory_order_acquire);
        m_data[head++ & m_mask] = packet;
        m_head.store(head, std::memory_order_release);
        m_available_packages.release();
    }
    /* When packet could be retreived from the queue, then pull shall return immediatelly. Otherwise it will block until it can pull the packet. */
    T pull()
    {
        m_available_packages.acquire();
        int tail = m_tail.load(std::memory_order_acquire);
        T packet = m_data[tail++ & m_mask];
        m_tail.store(tail, std::memory_order_release);
        m_available_space.release();
        return packet;
    }
private:
    static_assert((N& (N - 1)) == 0, "N must be a power of 2");
    static signed int const m_mask = N - 1;
    using data_t = std::array<T, N>;
    data_t m_data;
    std::atomic_int m_tail{ 0 };
    std::atomic_int m_head{ 0 };
    std::counting_semaphore<N> m_available_space{ N };
    std::counting_semaphore<N> m_available_packages{ 0 };
};

#包括
#包括
#包括
/*
*锁定的\u单个\u生产者\u单个\u消费者\u队列\u T负责锁定的数据包通信
*在两个线程之间。一条线推动，另一条线拉动。
*/
模板//N必须是幂2
类锁定\u单个\u生产者\u单个\u消费者\u队列\u T
{
公众：
/*当数据包进入队列时，push应立即返回。否则，它将阻塞，直到它能够推送数据包为止*/
无效推送（T常量和数据包）
{
std：：唯一的_锁（m_互斥）；
m_cv.wait（lock，[this]{return（（m_tail-m_head）&m_mask）！=1；}）；
m_数据[m_头++]=数据包；
m_头&=m_面罩；
lock.unlock（）；
m_cv.通知_one（）；
}
/*当数据包可以从队列中检索时，pull应立即返回。否则，它将阻塞，直到它可以提取数据包为止*/
布尔拉（T&packet）
{
std：：唯一的_锁（m_互斥）；
m|u cv.wait（锁定，[此]{返回（（m|u head-m|tail）和m|u mask）！=0）| m|u done；}）；
如果（（m_头-m_尾）&m_面具）！=0）[[可能]]
{
数据包=m_数据[m_tail++]；
m_tail&=m_mask；
lock.unlock（）；
m_cv.通知_one（）；
返回true；
}
返回false；
}
/*done（）表示推动线程停止。拉动线程可以继续读取
队列的其余部分，然后应返回*/
作废完成（）
{
{
std：：锁和保护锁（mumutex）；
m_done=真；
}
m_cv.通知_one（）；
}
私人：
静态断言（（N&（N-1））==0，“N必须是2的幂”）；
静态有符号整数常量m_mask=N-1；
使用数据_t=std:：数组；
数据；
std:：mutex m_mutex；
std：：条件变量m_cv；
int m_尾{0}；
int m_头{0}；
布尔·穆多内{}；
};

信号量实现：

#include <array>
#include <mutex>
#include <condition_variable>

/*
* locked_single_producer_single_consumer_queue_T is responsible for locked packet communication
* between 2 threads. One thread pushes, the other thread pulls.
*/
template<class T, int N = 16> // N must be a power 2
class locked_single_producer_single_consumer_queue_T
{
public:
    /* When packet fits in the queue, then push shall return immediatelly. Otherwise it will block until it can push the packet. */
    void push(T const& packet)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [this] {return ((m_tail - m_head) & m_mask) != 1; });
        m_data[m_head++] = packet;
        m_head &= m_mask;
        lock.unlock();
        m_cv.notify_one();
    }
    /* When packet could be retreived from the queue, then pull shall return immediatelly. Otherwise it will block until it can pull the packet. */
    bool pull(T& packet)
    {
        std::unique_lock<std::mutex> lock(m_mutex);
        m_cv.wait(lock, [this] {return (((m_head - m_tail) & m_mask) != 0) || m_done; });
        if(((m_head - m_tail) & m_mask) != 0) [[likely]]
        {
            packet = m_data[m_tail++];
            m_tail &= m_mask;
            lock.unlock();
            m_cv.notify_one();
            return true;
        }
        return false;
    }
    /* done() indicates that the pushing thread stopped. The pulling thread can continue reading
       the remainder of the queue and should then return */
    void done()
    {
        {
            std::lock_guard<std::mutex> lock(m_mutex);
            m_done = true;
        }
        m_cv.notify_one();
    }
private:
    static_assert((N& (N - 1)) == 0, "N must be a power of 2");
    static signed int const m_mask = N - 1;
    using data_t = std::array<T, N>;
    data_t m_data;
    std::mutex m_mutex;
    std::condition_variable m_cv;
    int m_tail{ 0 };
    int m_head{ 0 };
    bool m_done{};
};

#include <array>
#include <semaphore>
#include <atomic>

/*
* locked_single_producer_single_consumer_queue2_T is responsible for locking packet communication
* between 2 threads. One thread pushes, the other thread pulls.
*/
template<class T, int N = 16> // N must be a power 2
class locked_single_producer_single_consumer_queue2_T
{
public:
    /* When packet fits in the queue, then push shall return immediatelly. Otherwise it will block until it can push the packet. */
    void push(T const& packet)
    {
        m_available_space.acquire();
        int head = m_head.load(std::memory_order_acquire);
        m_data[head++ & m_mask] = packet;
        m_head.store(head, std::memory_order_release);
        m_available_packages.release();
    }
    /* When packet could be retreived from the queue, then pull shall return immediatelly. Otherwise it will block until it can pull the packet. */
    T pull()
    {
        m_available_packages.acquire();
        int tail = m_tail.load(std::memory_order_acquire);
        T packet = m_data[tail++ & m_mask];
        m_tail.store(tail, std::memory_order_release);
        m_available_space.release();
        return packet;
    }
private:
    static_assert((N& (N - 1)) == 0, "N must be a power of 2");
    static signed int const m_mask = N - 1;
    using data_t = std::array<T, N>;
    data_t m_data;
    std::atomic_int m_tail{ 0 };
    std::atomic_int m_head{ 0 };
    std::counting_semaphore<N> m_available_space{ N };
    std::counting_semaphore<N> m_available_packages{ 0 };
};

#包括
#包括
#包括
/*
*锁定的\u单个\u生产者\u单个\u消费者\u队列2\T负责锁定数据包通信
*在两个线程之间。一条线推动，另一条线拉动。
*/
模板//N必须是幂2
类锁定\u单个\u生产者\u单个\u消费者\u队列2\T
{
公众：
/*当数据包进入队列时，push应立即返回。否则，它将阻塞，直到它能够推送数据包为止*/
无效推送（T常量和数据包）
{
m_可用空间。获取（）；
int head=m_head.load（标准：：内存顺序获取）；
m_数据[head++&m_mask]=数据包；
m_head.store（head，std:：memory_order_release）；
m_可用_包。发布（）；
}
/*当数据包可以从队列中检索时，pull应立即返回。否则，它将阻塞，直到它可以提取数据包为止*/
T pull（）
{
m_可用_包。获取（）；
int tail=m_tail.load（标准：：内存顺序获取）；
T packet=m_数据[tail++&m_掩码]；
m_tail.store（tail，std：：内存\u订单\u发布）；
m_可用空间。释放（）；
返回包；
}
私人：
静态断言（（N&（N-1））==0，“N必须是2的幂”）；
静态有符号整数常量m_mask=N-1；
使用数据_t=std:：数组；
数据；
std：：原子_int m_tail{0}；
std：：原子内部m_头{0}；
std：：计数信号量m_可用空间{N}；
std：：计数\u信号量m\u可用\u包{0}；
};

***编辑***

根据要求，我还包括了一个完整的测试程序。它已经包括了这两种实现。（需要带信号量的C++20）

#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
#包括
/*
*锁定的\u单个\u生产者\u单个\u消费者\u队列\u T负责锁定的数据包通信
*在两个线程之间。一条线推动，另一条线拉动。
*/
模板//N必须是幂2
类锁定\u单个\u生产者\u单个\u消费者\u队列\u T
{
公众：
/*当数据包进入队列时，push应立即返回。否则，它将阻塞，直到它能够推送数据包为止*/
无效推送（T常量和数据包）
{
std：：唯一的_锁（m_互斥）；
m_cv.wait（lock，[this]{return（（m_tail-m_head）&m_mask）！=1；}）；
m_数据[m_头++]=数据包；
m_头&=m_面罩；
lock.unlock（）；
m_cv.通知_one（）；
}
/*当数据包可以从队列中检索时，pull应立即返回。否则，它将阻塞，直到它可以提取数据包为止*/
布尔拉（T&packet）
{
std：：唯一的_锁（m_互斥）；
m_cv.等待（位置