C 故障排除多个生产者单个消费者循环缓冲区

C 故障排除多个生产者单个消费者循环缓冲区,c,linux,multithreading,circular-buffer,C,Linux,Multithreading,Circular Buffer,我一直在尝试在C for Linux中实现MPSC循环缓冲区。 这里是缓冲区结构: typedef struct mpsc_buffer_s { sem_t semaphore; unsigned char cache_pad_1[CACHE_LINE - sizeof(sem_t)]; uint64_t write_pos; unsigned char cache_pad_2[CACHE_LINE - sizeof(uint64_t)]; size_t

我一直在尝试在C for Linux中实现MPSC循环缓冲区。 这里是缓冲区结构:

typedef struct mpsc_buffer_s {
    sem_t semaphore;
    unsigned char cache_pad_1[CACHE_LINE - sizeof(sem_t)];
    uint64_t write_pos;
    unsigned char cache_pad_2[CACHE_LINE - sizeof(uint64_t)];
    size_t size;
    unsigned char cache_pad_3[CACHE_LINE - sizeof(size_t)];
    uint64_t read_pos;
    unsigned char cache_pad_4[CACHE_LINE - sizeof(uint64_t)];
    void **buffer;
} mpsc_buffer_t  __attribute__ ((__aligned__(CACHE_LINE)));
以下是相关功能:

mpsc_buffer_t* init_mpsc_buffer(size_t size) {
    mpsc_buffer_t *new_buffer;
    // allocation and init
    posix_memalign((void**) &new_buffer, CACHE_LINE, sizeof(mpsc_buffer_t));
    new_buffer->size = size;
    new_buffer->read_pos = 0;
    new_buffer->write_pos = 0;
    int rc = sem_init(&new_buffer->semaphore, 0, 0);
    ABORT_ON_ERR(rc, "Semaphore init failed");
    // allocating internal pointers buffer
    new_buffer->buffer = malloc(new_buffer->size * sizeof(void*));
    memset(new_buffer->buffer, 0, new_buffer->size * sizeof(void*));
    return new_buffer;
}
void add_to_buffer(mpsc_buffer_t *buffer, void *element) {
    // get next address to write into
    uint64_t write_pos = __sync_fetch_and_add(&buffer->write_pos, 1) %   buffer->size;
    //spin lock until the address is free
    while(!__sync_bool_compare_and_swap(&(buffer->buffer[write_pos]), NULL, element));
    // increment semaphore
    int rc = sem_post(&buffer->semaphore);
    ABORT_ON_ERR(rc, "Semaphore unlock failed");
}

void* get_from_buffer(mpsc_buffer_t *buffer) {
    int rc = sem_wait(&buffer->semaphore);
    ABORT_ON_ERR(rc, "Semaphore wait failed");

    uint64_t read_pos = buffer->read_pos % buffer->size;
    void *element = buffer->buffer[read_pos];
    if(!element) {
        error_print("cannot get NULL stuff - read_pos %u", read_pos);
    }
    buffer->buffer[read_pos] = NULL;
    buffer->read_pos++;
    return element;
}
我使用这种缓冲区来传递指针。很明显,我不发送空指针

当我将生产者的数量从2增加到3时,会出现一个野bug:然后,消费者开始读取空值。由于我不主动发送空指针,这意味着使用者线程获得一个正信号量,但随后从读取位置读取一个空值

另一方面,缓冲区中的一些指针没有被清除,从而导致潜在的死锁


算法中是否存在逻辑错误,或者这些问题可能与我看不到的缓存机制有关?

在递增写索引和分配条目指针之间存在竞争条件

考虑这样一种情况,生产者A增加写入索引,但耗尽其时间片。同时,生产者B再次增加写索引,填充下一个条目——记住,A还没有填充它的条目——并增加信号量。现在,如果消费者C在A之前醒来,它有充分的理由相信A已经填充了它的条目,并抓住它。因为它还没有被填充,所以它是空的

换言之:

Producer A     Producer B     Consumer C

write_pos++
               write_pos++
               sets buffer[]
               sem_post()
                              sem_wait()
                              read_pos++
                              uses buffer[]
sets buffer[]
sem_post()
                              sem_wait()
                              read_pos++
                              uses buffer[]
生产商越多,出现上述情况的概率就越高

解决方案很简单:添加一个
write_pos2
计数器,它序列化写入程序,以便它们按照正确的顺序发布信号量

考虑以下示例程序:

#define  _POSIX_C_SOURCE 200809L
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>

typedef struct {
    sem_t              semaphore;
    uint64_t           size;
    volatile uint64_t  wrnext;  /* Next free write slot */
    volatile uint64_t  wrindex; /* Write index, second half */
    volatile uint64_t  rdindex; /* Read index */
    void              *entry[];
} cbuffer;

static cbuffer *cbuffer_destroy(cbuffer *const cbuf)
{
    if (cbuf) {
        cbuf->size = 0;
        cbuf->wrnext = 0;
        cbuf->wrindex = 0;
        cbuf->rdindex = 0;
        sem_destroy(&cbuf->semaphore);
        free(cbuf);
    }
    return NULL;
}

static cbuffer *cbuffer_create(const size_t size)
{
    cbuffer *cbuf;

    if (size < 2) {
        errno = EINVAL;
        return NULL;
    }

    cbuf = malloc(sizeof *cbuf + size * sizeof cbuf->entry[0]);
    if (!cbuf) {
        errno = ENOMEM;
        return NULL;
    }

    memset(cbuf->entry, 0, size * sizeof cbuf->entry[0]);

    sem_init(&cbuf->semaphore, 0, 0);
    cbuf->size = size;
    cbuf->wrnext = 0;
    cbuf->wrindex = 0;
    cbuf->rdindex = 0;

    return cbuf;
}

static void cbuffer_add(cbuffer *const cbuf, void *const entry)
{
    uint64_t wrnext;

    /* Get next nose value. */
    wrnext = __sync_fetch_and_add(&cbuf->wrnext, (uint64_t)1);

    /* Spin while buffer full. */
    while (!__sync_bool_compare_and_swap(&cbuf->entry[wrnext % cbuf->size], NULL, entry))
        ;

    /* Spin until we can update the head to match next. */
    while (!__sync_bool_compare_and_swap(&cbuf->wrindex, wrnext, wrnext + (uint64_t)1))
        ;

    /* TODO: check for -1 and errno == EOVERFLOW */
    sem_post(&cbuf->semaphore);
}

static void *cbuffer_get(cbuffer *const cbuf)
{
    uint64_t rdindex;

    /* Get the index of the oldest entry. */
    rdindex = __sync_fetch_and_add(&cbuf->rdindex, (uint64_t)1);

    sem_wait(&cbuf->semaphore);

    /* Pop entry. */
    return __sync_fetch_and_and(&cbuf->entry[rdindex % cbuf->size], NULL);
}

static volatile int done = 0;
static cbuffer     *cb = NULL;

void *consumer_thread(void *payload)
{
    const long    id = (long)payload;
    unsigned long count = 0UL;
    void         *entry;

    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    while (1) {
        if (done)
            return NULL;

        entry = cbuffer_get(cb);
        count++;

        if (!entry) {
            printf("Consumer %ld: NULL pointer at %lu encountered!\n", id, count);
            fflush(stderr);
            done = 1;
            return NULL;
        }

    }
}

void *producer_thread(void *payload __attribute__((unused)))
{
    unsigned long count = 0UL;

    pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
    pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);

    while (1) {

        if (done)
            return NULL;

        cbuffer_add(cb, (void *)(256UL + (count & 255UL)));

    }
}

int main(int argc, char *argv[])
{
    pthread_attr_t  attrs;
    pthread_t      *producer_id;
    pthread_t      *consumer_id;
    sigset_t        blocked;
    siginfo_t       info;
    struct timespec timeout;
    int             producers, consumers, size, i, result;
    char            dummy;

    if (argc != 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
        fprintf(stderr, "       %s SIZE PRODUCERS CONSUMERS\n", argv[0]);
        fprintf(stderr, "\n");
        return 1;
    }

    if (sscanf(argv[1], " %d %c", &size, &dummy) != 1 || size < 2) {
        fprintf(stderr, "%s: Invalid circular buffer size.\n", argv[1]);
        return 1;
    }

    if (sscanf(argv[2], " %d %c", &producers, &dummy) != 1 || producers < 1) {
        fprintf(stderr, "%s: Invalid number of producer threads.\n", argv[2]);
        return 1;
    }

    if (sscanf(argv[3], " %d %c", &consumers, &dummy) != 1 || consumers < 1) {
        fprintf(stderr, "%s: Invalid number of consumer threads.\n", argv[3]);
        return 1;
    }

    cb = cbuffer_create(size);
    producer_id = malloc((size_t)producers * sizeof *producer_id);
    consumer_id = malloc((size_t)consumers * sizeof *consumer_id);
    if (!cb || !producer_id || !consumer_id) {
        fprintf(stderr, "%s.\n", strerror(ENOMEM));
        return 1;
    }

    sigemptyset(&blocked);
    sigaddset(&blocked, SIGINT);
    sigaddset(&blocked, SIGTERM);
    sigprocmask(SIG_BLOCK, &blocked, NULL);

    pthread_attr_init(&attrs);
    pthread_attr_setstacksize(&attrs, 32768);

    /* Start consumer threads. */
    for (i = 0; i < consumers; i++) {
        result = pthread_create(&consumer_id[i], &attrs, consumer_thread, (void *)(1L + (long)i));
        if (result) {
            fprintf(stderr, "Cannot start consumer threads: %s.\n", strerror(result));
            exit(1);
        }
    }

    /* Start producer threads. */
    for (i = 0; i < producers; i++) {
        result = pthread_create(&producer_id[i], &attrs, producer_thread, (void *)(1L + (long)i));
        if (result) {
            fprintf(stderr, "Cannot start producer threads: %s.\n", strerror(result));
            exit(1);
        }
    }

    pthread_attr_destroy(&attrs);

    printf("Press CTRL+C or send SIGTERM to process %ld to stop testing.\n", (long)getpid());
    fflush(stdout);

    while (1) {

        if (done)
            break;

        timeout.tv_sec = (time_t)0;
        timeout.tv_nsec = 10000000L; /* 0.010000000 seconds */
        result = sigtimedwait(&blocked, &info, &timeout);
        if (result != -1 || errno != EAGAIN) {
            done = 1;
            break;
        }
    }

    printf("Exiting...\n");
    fflush(stdout);

    for (i = 0; i < producers; i++)
        pthread_cancel(producer_id[i]);
    for (i = 0; i < consumers; i++)
        pthread_cancel(consumer_id[i]);

    for (i = 0; i < producers; i++)
        pthread_join(producer_id[i], NULL);
    for (i = 0; i < consumers; i++)
        pthread_join(consumer_id[i], NULL);

    cb = cbuffer_destroy(cb);
    free(producer_id);
    free(consumer_id);

    return 0;
}
请注意,上面的
get_all()
将反转列表,以便最早的条目位于返回列表的第一位。这使得消费者可以轻松地按照添加的顺序处理所有条目,而在常见情况下,开销最小


问题?

为什么在结构中使用手动填充?你确定真的需要吗?你有没有做过基准测试?与使用
posix_memalign
而不是普通的
malloc
一样,您是否进行了测量以确定它的价值?@JoachimPileborg我已经习惯性地引入了填充/对齐分配,因为我在其他类型的缓冲区实现中使用了它(事实证明它是值得的)。除了占用内存之外,我看不到其他缺点。我还没有机会对这段代码进行基准测试。草图对这个问题非常清楚。我将进行一些测量,并考虑采用自由列表/堆栈方法
 struct node {
     struct node *next;
     /* whatever data here */
 };

void add_one(volatile struct node **const list, struct node *item)
{
    do {
        item->next = (*list) ? (*list)->next : NULL;
    } while (!__sync_bool_compare_and_swap(list, item->next, item);
}

struct node *get_one(volatile struct node **const list)
{
    struct node *first, *next;
    do {
         first = *list;
         next = (first) ? first->next : NULL;
    } while (!__sync_bool_compare_and_swap(list, first, next);
    if (first)
        first->next = NULL;
    return first;
}

struct node *get_all(volatile struct node **const list)
{
    struct node *all, *root;
    do {
        all = *list;
    } while (!__sync_bool_compare_and_swap(list, all, NULL));
    root = NULL;
    while (all) {
        struct node *const curr = all;
        all = all->next;
        curr->next = root;
        root = curr;
    }
    return root;
}