C 故障排除多个生产者单个消费者循环缓冲区
我一直在尝试在C for Linux中实现MPSC循环缓冲区。 这里是缓冲区结构:C 故障排除多个生产者单个消费者循环缓冲区,c,linux,multithreading,circular-buffer,C,Linux,Multithreading,Circular Buffer,我一直在尝试在C for Linux中实现MPSC循环缓冲区。 这里是缓冲区结构: typedef struct mpsc_buffer_s { sem_t semaphore; unsigned char cache_pad_1[CACHE_LINE - sizeof(sem_t)]; uint64_t write_pos; unsigned char cache_pad_2[CACHE_LINE - sizeof(uint64_t)]; size_t
typedef struct mpsc_buffer_s {
sem_t semaphore;
unsigned char cache_pad_1[CACHE_LINE - sizeof(sem_t)];
uint64_t write_pos;
unsigned char cache_pad_2[CACHE_LINE - sizeof(uint64_t)];
size_t size;
unsigned char cache_pad_3[CACHE_LINE - sizeof(size_t)];
uint64_t read_pos;
unsigned char cache_pad_4[CACHE_LINE - sizeof(uint64_t)];
void **buffer;
} mpsc_buffer_t __attribute__ ((__aligned__(CACHE_LINE)));
以下是相关功能:
mpsc_buffer_t* init_mpsc_buffer(size_t size) {
mpsc_buffer_t *new_buffer;
// allocation and init
posix_memalign((void**) &new_buffer, CACHE_LINE, sizeof(mpsc_buffer_t));
new_buffer->size = size;
new_buffer->read_pos = 0;
new_buffer->write_pos = 0;
int rc = sem_init(&new_buffer->semaphore, 0, 0);
ABORT_ON_ERR(rc, "Semaphore init failed");
// allocating internal pointers buffer
new_buffer->buffer = malloc(new_buffer->size * sizeof(void*));
memset(new_buffer->buffer, 0, new_buffer->size * sizeof(void*));
return new_buffer;
}
void add_to_buffer(mpsc_buffer_t *buffer, void *element) {
// get next address to write into
uint64_t write_pos = __sync_fetch_and_add(&buffer->write_pos, 1) % buffer->size;
//spin lock until the address is free
while(!__sync_bool_compare_and_swap(&(buffer->buffer[write_pos]), NULL, element));
// increment semaphore
int rc = sem_post(&buffer->semaphore);
ABORT_ON_ERR(rc, "Semaphore unlock failed");
}
void* get_from_buffer(mpsc_buffer_t *buffer) {
int rc = sem_wait(&buffer->semaphore);
ABORT_ON_ERR(rc, "Semaphore wait failed");
uint64_t read_pos = buffer->read_pos % buffer->size;
void *element = buffer->buffer[read_pos];
if(!element) {
error_print("cannot get NULL stuff - read_pos %u", read_pos);
}
buffer->buffer[read_pos] = NULL;
buffer->read_pos++;
return element;
}
我使用这种缓冲区来传递指针。很明显,我不发送空指针
当我将生产者的数量从2增加到3时,会出现一个野bug:然后,消费者开始读取空值。由于我不主动发送空指针,这意味着使用者线程获得一个正信号量,但随后从读取位置读取一个空值
另一方面,缓冲区中的一些指针没有被清除,从而导致潜在的死锁
算法中是否存在逻辑错误,或者这些问题可能与我看不到的缓存机制有关?在递增写索引和分配条目指针之间存在竞争条件 考虑这样一种情况,生产者A增加写入索引,但耗尽其时间片。同时,生产者B再次增加写索引,填充下一个条目——记住,A还没有填充它的条目——并增加信号量。现在,如果消费者C在A之前醒来,它有充分的理由相信A已经填充了它的条目,并抓住它。因为它还没有被填充,所以它是空的 换言之:
Producer A Producer B Consumer C
write_pos++
write_pos++
sets buffer[]
sem_post()
sem_wait()
read_pos++
uses buffer[]
sets buffer[]
sem_post()
sem_wait()
read_pos++
uses buffer[]
生产商越多,出现上述情况的概率就越高
解决方案很简单:添加一个write_pos2
计数器,它序列化写入程序,以便它们按照正确的顺序发布信号量
考虑以下示例程序:
#define _POSIX_C_SOURCE 200809L
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
typedef struct {
sem_t semaphore;
uint64_t size;
volatile uint64_t wrnext; /* Next free write slot */
volatile uint64_t wrindex; /* Write index, second half */
volatile uint64_t rdindex; /* Read index */
void *entry[];
} cbuffer;
static cbuffer *cbuffer_destroy(cbuffer *const cbuf)
{
if (cbuf) {
cbuf->size = 0;
cbuf->wrnext = 0;
cbuf->wrindex = 0;
cbuf->rdindex = 0;
sem_destroy(&cbuf->semaphore);
free(cbuf);
}
return NULL;
}
static cbuffer *cbuffer_create(const size_t size)
{
cbuffer *cbuf;
if (size < 2) {
errno = EINVAL;
return NULL;
}
cbuf = malloc(sizeof *cbuf + size * sizeof cbuf->entry[0]);
if (!cbuf) {
errno = ENOMEM;
return NULL;
}
memset(cbuf->entry, 0, size * sizeof cbuf->entry[0]);
sem_init(&cbuf->semaphore, 0, 0);
cbuf->size = size;
cbuf->wrnext = 0;
cbuf->wrindex = 0;
cbuf->rdindex = 0;
return cbuf;
}
static void cbuffer_add(cbuffer *const cbuf, void *const entry)
{
uint64_t wrnext;
/* Get next nose value. */
wrnext = __sync_fetch_and_add(&cbuf->wrnext, (uint64_t)1);
/* Spin while buffer full. */
while (!__sync_bool_compare_and_swap(&cbuf->entry[wrnext % cbuf->size], NULL, entry))
;
/* Spin until we can update the head to match next. */
while (!__sync_bool_compare_and_swap(&cbuf->wrindex, wrnext, wrnext + (uint64_t)1))
;
/* TODO: check for -1 and errno == EOVERFLOW */
sem_post(&cbuf->semaphore);
}
static void *cbuffer_get(cbuffer *const cbuf)
{
uint64_t rdindex;
/* Get the index of the oldest entry. */
rdindex = __sync_fetch_and_add(&cbuf->rdindex, (uint64_t)1);
sem_wait(&cbuf->semaphore);
/* Pop entry. */
return __sync_fetch_and_and(&cbuf->entry[rdindex % cbuf->size], NULL);
}
static volatile int done = 0;
static cbuffer *cb = NULL;
void *consumer_thread(void *payload)
{
const long id = (long)payload;
unsigned long count = 0UL;
void *entry;
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
while (1) {
if (done)
return NULL;
entry = cbuffer_get(cb);
count++;
if (!entry) {
printf("Consumer %ld: NULL pointer at %lu encountered!\n", id, count);
fflush(stderr);
done = 1;
return NULL;
}
}
}
void *producer_thread(void *payload __attribute__((unused)))
{
unsigned long count = 0UL;
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
while (1) {
if (done)
return NULL;
cbuffer_add(cb, (void *)(256UL + (count & 255UL)));
}
}
int main(int argc, char *argv[])
{
pthread_attr_t attrs;
pthread_t *producer_id;
pthread_t *consumer_id;
sigset_t blocked;
siginfo_t info;
struct timespec timeout;
int producers, consumers, size, i, result;
char dummy;
if (argc != 4 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
fprintf(stderr, " %s SIZE PRODUCERS CONSUMERS\n", argv[0]);
fprintf(stderr, "\n");
return 1;
}
if (sscanf(argv[1], " %d %c", &size, &dummy) != 1 || size < 2) {
fprintf(stderr, "%s: Invalid circular buffer size.\n", argv[1]);
return 1;
}
if (sscanf(argv[2], " %d %c", &producers, &dummy) != 1 || producers < 1) {
fprintf(stderr, "%s: Invalid number of producer threads.\n", argv[2]);
return 1;
}
if (sscanf(argv[3], " %d %c", &consumers, &dummy) != 1 || consumers < 1) {
fprintf(stderr, "%s: Invalid number of consumer threads.\n", argv[3]);
return 1;
}
cb = cbuffer_create(size);
producer_id = malloc((size_t)producers * sizeof *producer_id);
consumer_id = malloc((size_t)consumers * sizeof *consumer_id);
if (!cb || !producer_id || !consumer_id) {
fprintf(stderr, "%s.\n", strerror(ENOMEM));
return 1;
}
sigemptyset(&blocked);
sigaddset(&blocked, SIGINT);
sigaddset(&blocked, SIGTERM);
sigprocmask(SIG_BLOCK, &blocked, NULL);
pthread_attr_init(&attrs);
pthread_attr_setstacksize(&attrs, 32768);
/* Start consumer threads. */
for (i = 0; i < consumers; i++) {
result = pthread_create(&consumer_id[i], &attrs, consumer_thread, (void *)(1L + (long)i));
if (result) {
fprintf(stderr, "Cannot start consumer threads: %s.\n", strerror(result));
exit(1);
}
}
/* Start producer threads. */
for (i = 0; i < producers; i++) {
result = pthread_create(&producer_id[i], &attrs, producer_thread, (void *)(1L + (long)i));
if (result) {
fprintf(stderr, "Cannot start producer threads: %s.\n", strerror(result));
exit(1);
}
}
pthread_attr_destroy(&attrs);
printf("Press CTRL+C or send SIGTERM to process %ld to stop testing.\n", (long)getpid());
fflush(stdout);
while (1) {
if (done)
break;
timeout.tv_sec = (time_t)0;
timeout.tv_nsec = 10000000L; /* 0.010000000 seconds */
result = sigtimedwait(&blocked, &info, &timeout);
if (result != -1 || errno != EAGAIN) {
done = 1;
break;
}
}
printf("Exiting...\n");
fflush(stdout);
for (i = 0; i < producers; i++)
pthread_cancel(producer_id[i]);
for (i = 0; i < consumers; i++)
pthread_cancel(consumer_id[i]);
for (i = 0; i < producers; i++)
pthread_join(producer_id[i], NULL);
for (i = 0; i < consumers; i++)
pthread_join(consumer_id[i], NULL);
cb = cbuffer_destroy(cb);
free(producer_id);
free(consumer_id);
return 0;
}
请注意,上面的get_all()
将反转列表,以便最早的条目位于返回列表的第一位。这使得消费者可以轻松地按照添加的顺序处理所有条目,而在常见情况下,开销最小
问题?为什么在结构中使用手动填充?你确定真的需要吗?你有没有做过基准测试?与使用
posix_memalign
而不是普通的malloc
一样,您是否进行了测量以确定它的价值?@JoachimPileborg我已经习惯性地引入了填充/对齐分配,因为我在其他类型的缓冲区实现中使用了它(事实证明它是值得的)。除了占用内存之外,我看不到其他缺点。我还没有机会对这段代码进行基准测试。草图对这个问题非常清楚。我将进行一些测量,并考虑采用自由列表/堆栈方法
struct node {
struct node *next;
/* whatever data here */
};
void add_one(volatile struct node **const list, struct node *item)
{
do {
item->next = (*list) ? (*list)->next : NULL;
} while (!__sync_bool_compare_and_swap(list, item->next, item);
}
struct node *get_one(volatile struct node **const list)
{
struct node *first, *next;
do {
first = *list;
next = (first) ? first->next : NULL;
} while (!__sync_bool_compare_and_swap(list, first, next);
if (first)
first->next = NULL;
return first;
}
struct node *get_all(volatile struct node **const list)
{
struct node *all, *root;
do {
all = *list;
} while (!__sync_bool_compare_and_swap(list, all, NULL));
root = NULL;
while (all) {
struct node *const curr = all;
all = all->next;
curr->next = root;
root = curr;
}
return root;
}