C 提高共享内存隔离Linux内核的性能

C 提高共享内存隔离Linux内核的性能,c,linux,performance,shared-memory,timing,C,Linux,Performance,Shared Memory,Timing,我试图优化对共享内存的读写性能。我有一个程序写入共享内存,另一个从中读取 我曾经在我的etc/default/grub文件中使用以下行帮助隔离这两个程序要运行的CPU: GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_idle.max_cstate=1 isolcpus=6,7" CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6

我试图优化对共享内存的读写性能。我有一个程序写入共享内存,另一个从中读取

我曾经在我的
etc/default/grub
文件中使用以下行帮助隔离这两个程序要运行的CPU:

GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_idle.max_cstate=1 isolcpus=6,7"
           CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7       
   0:         20          0          0          0          0          0          0          0   IO-APIC    2-edge      timer
   1:          2          0          0          0          0          0          0          0   IO-APIC    1-edge      i8042
   8:          1          0          0          0          0          0          0          0   IO-APIC    8-edge      rtc0
   9:          0          0          0          0          0          0          0          0   IO-APIC    9-fasteoi   acpi
  12:          2          0          0          0          1          1          0          0   IO-APIC   12-edge      i8042
  16:          0          0          0          0          0          0          0          0   IO-APIC   16-fasteoi   i801_smbus, pcim_das1602_16
  19:          2          0          0          0          8         10          6          2   IO-APIC   19-fasteoi 
 120:          0          0          0          0          0          0          0          0   PCI-MSI 16384-edge      aerdrv
 121:         99        406          0          0         14       5960          6          0   PCI-MSI 327680-edge      xhci_hcd
 122:       8726        133         47         28       4126       3910      22638        795   PCI-MSI 376832-edge      ahci[0000:00:17.0]
 123:          2          0          0          0          2          0          3       3663   PCI-MSI 520192-edge      eno1
 124:       3411          0          2          1        176      24498         77         11   PCI-MSI 32768-edge      i915
 125:         45          0          0          0          3          6          0          0   PCI-MSI 360448-edge      mei_me
 126:        432          0          0          0        144        913         28          1   PCI-MSI 514048-edge      snd_hda_intel:card0
 NMI:          1          1          1          1          1          1          1          1   Non-maskable interrupts
 LOC:      12702      10338      10247      10515       9969      10386      16658      13568   Local timer interrupts
 SPU:          0          0          0          0          0          0          0          0   Spurious interrupts
 PMI:          1          1          1          1          1          1          1          1   Performance monitoring interrupts
 IWI:          0          0          0          0          0          0          0          0   IRQ work interrupts
 RTR:          7          0          0          0          0          0          0          0   APIC ICR read retries
 RES:       4060       2253       1026        708        595        846        887        751   Rescheduling interrupts
 CAL:      11906      10423      11418       9894      14562      11000      21479      11223   Function call interrupts
 TLB:      10620       8996      10060       8674      13172       9622      20121       9838   TLB shootdowns
 TRM:          0          0          0          0          0          0          0          0   Thermal event interrupts
 THR:          0          0          0          0          0          0          0          0   Threshold APIC interrupts
 DFR:          0          0          0          0          0          0          0          0   Deferred Error APIC interrupts
 MCE:          0          0          0          0          0          0          0          0   Machine check exceptions
 MCP:          2          2          2          2          2          2          2          2   Machine check polls
 ERR:          0
 MIS:          0
 PIN:          0          0          0          0          0          0          0          0   Posted-interrupt notification event
 PIW:          0          0          0          0          0          0          0          0   Posted-interrupt wakeup event
我正在使用
taskset-c6 writer
taskset-c7 reader
将这些程序设置为在这些cpu上运行

使用,我使用以下代码将两个程序设置为具有最高调度优先级:

struct sched_param param;   
param.sched_priority = sched_get_priority_max(SCHED_FIFO);

if(sched_setscheduler(0, SCHED_FIFO, &param) == -1) 
{
     perror("sched_setscheduler failed");
     exit(-1);
}
我定义了一个用于共享内存的结构,其中包含所需的同步工具,以及一个timespec结构和两个程序之间的双精度传递,如下所示:

typedef struct
{
    // Synchronization objects
    pthread_mutex_t ipc_mutex;
    sem_t ipc_sem;
    // Shared data
    double value;
    volatile int read_cond;
    volatile int end_cond;
    double start_time;
    struct timespec ts;
} shared_data_t;
共享内存初始化:

// ftok to generate unique key 
key_t key = ftok("shmfile",65); 

// shmget returns an identifier in shmid 
int shmid = shmget(key,1024,0666|IPC_CREAT); 
ftruncate(shmid, sizeof(shared_data_t));

// shmat to attach to shared memory 
shared_data_t* sdata = (shared_data_t*) shmat(shmid,(void*)0,0); 
sdata->value = 0;
// ftok to generate unique key 
key_t key = ftok("shmfile",65); 

// shmget returns an identifier in shmid 
int shmid = shmget(key,1024,0666|IPC_CREAT); 
ftruncate(shmid, sizeof(shared_data_t));

// shmat to attach to shared memory 
shared_data_t* sdata = (shared_data_t*) shmat(shmid,(void*)0,0); 
作者:

// ftok to generate unique key 
key_t key = ftok("shmfile",65); 

// shmget returns an identifier in shmid 
int shmid = shmget(key,1024,0666|IPC_CREAT); 
ftruncate(shmid, sizeof(shared_data_t));

// shmat to attach to shared memory 
shared_data_t* sdata = (shared_data_t*) shmat(shmid,(void*)0,0); 
sdata->value = 0;
// ftok to generate unique key 
key_t key = ftok("shmfile",65); 

// shmget returns an identifier in shmid 
int shmid = shmget(key,1024,0666|IPC_CREAT); 
ftruncate(shmid, sizeof(shared_data_t));

// shmat to attach to shared memory 
shared_data_t* sdata = (shared_data_t*) shmat(shmid,(void*)0,0); 
阅读器:

// ftok to generate unique key 
key_t key = ftok("shmfile",65); 

// shmget returns an identifier in shmid 
int shmid = shmget(key,1024,0666|IPC_CREAT); 
ftruncate(shmid, sizeof(shared_data_t));

// shmat to attach to shared memory 
shared_data_t* sdata = (shared_data_t*) shmat(shmid,(void*)0,0); 
sdata->value = 0;
// ftok to generate unique key 
key_t key = ftok("shmfile",65); 

// shmget returns an identifier in shmid 
int shmid = shmget(key,1024,0666|IPC_CREAT); 
ftruncate(shmid, sizeof(shared_data_t));

// shmat to attach to shared memory 
shared_data_t* sdata = (shared_data_t*) shmat(shmid,(void*)0,0); 
在Writer中初始化同步工具

pthread_mutexattr_t mutex_attr;
pthread_mutexattr_init(&mutex_attr);
pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_init(&sdata->ipc_mutex, &mutex_attr);
sem_init(&sdata->ipc_sem, 1, 0);
//detach from shared memory 
shmdt(sdata); 
pthread_mutex_unlock(&sdata->ipc_mutex);
pthread_mutex_destroy(&sdata->ipc_mutex);

//detach from shared memory 
shmdt(sdata); 

// destroy the shared memory 
shmctl(shmid,IPC_RMID,NULL); 
编写代码

for (int i = 0; i < 20000000; ++i)
    {
        pthread_mutex_lock(&sdata->ipc_mutex);
        sdata->value++;
        clock_gettime(CLOCK_MONOTONIC, &sdata->ts);
        sdata->start_time = (BILLION*sdata->ts.tv_sec) + sdata->ts.tv_nsec;
        sdata->read_cond = 1;
        pthread_mutex_unlock(&sdata->ipc_mutex);
        sem_wait(&sdata->ipc_sem);
    }
fprintf(stderr, "done writing\n" );

pthread_mutex_lock(&sdata->ipc_mutex);
sdata->end_cond = 1;
pthread_mutex_unlock(&sdata->ipc_mutex);
在读卡器中清理

pthread_mutexattr_t mutex_attr;
pthread_mutexattr_init(&mutex_attr);
pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED);
pthread_mutex_init(&sdata->ipc_mutex, &mutex_attr);
sem_init(&sdata->ipc_sem, 1, 0);
//detach from shared memory 
shmdt(sdata); 
pthread_mutex_unlock(&sdata->ipc_mutex);
pthread_mutex_destroy(&sdata->ipc_mutex);

//detach from shared memory 
shmdt(sdata); 

// destroy the shared memory 
shmctl(shmid,IPC_RMID,NULL); 
目标是最大限度地减少这两个操作之间花费的时间。理想情况下,我希望能够保证从写入值开始读取的时间小于1微秒。然而,我得到的结果是:

max time: 5852.000000, counter: 0
max time: 18769.000000, counter: 30839
max time: 27416.000000, counter: 66632
max time: 28668.000000, counter: 1820109
max time: 121362.000000, counter: 1853346
done writing
avg time to read: 277.959
max time to read: 121362
min time to read: 60
count above max threshhold of 1000 ns: 1871
表示有多次(约0.01%的读取次数)读取超过1 us,最高可达121us

我的问题如下:

typedef struct
{
    // Synchronization objects
    pthread_mutex_t ipc_mutex;
    sem_t ipc_sem;
    // Shared data
    double value;
    volatile int read_cond;
    volatile int end_cond;
    double start_time;
    struct timespec ts;
} shared_data_t;
由于我已将优先级设置为最高,并隔离了运行这些程序的CPU,导致这些峰值的原因是什么?

我从中了解到,我不应该期望时钟时间具有纳秒精度。这些尖峰仅仅是时钟时间的误差吗

我考虑的另一个选择是,尽管这些核心(6和7)被设置为最高优先级,但它们却以某种方式被中断

任何帮助都将不胜感激

编辑

根据下面的注释,以下是我的
/proc/interrupts
文件的内容:

GRUB_CMDLINE_LINUX_DEFAULT="quiet splash intel_idle.max_cstate=1 isolcpus=6,7"
           CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7       
   0:         20          0          0          0          0          0          0          0   IO-APIC    2-edge      timer
   1:          2          0          0          0          0          0          0          0   IO-APIC    1-edge      i8042
   8:          1          0          0          0          0          0          0          0   IO-APIC    8-edge      rtc0
   9:          0          0          0          0          0          0          0          0   IO-APIC    9-fasteoi   acpi
  12:          2          0          0          0          1          1          0          0   IO-APIC   12-edge      i8042
  16:          0          0          0          0          0          0          0          0   IO-APIC   16-fasteoi   i801_smbus, pcim_das1602_16
  19:          2          0          0          0          8         10          6          2   IO-APIC   19-fasteoi 
 120:          0          0          0          0          0          0          0          0   PCI-MSI 16384-edge      aerdrv
 121:         99        406          0          0         14       5960          6          0   PCI-MSI 327680-edge      xhci_hcd
 122:       8726        133         47         28       4126       3910      22638        795   PCI-MSI 376832-edge      ahci[0000:00:17.0]
 123:          2          0          0          0          2          0          3       3663   PCI-MSI 520192-edge      eno1
 124:       3411          0          2          1        176      24498         77         11   PCI-MSI 32768-edge      i915
 125:         45          0          0          0          3          6          0          0   PCI-MSI 360448-edge      mei_me
 126:        432          0          0          0        144        913         28          1   PCI-MSI 514048-edge      snd_hda_intel:card0
 NMI:          1          1          1          1          1          1          1          1   Non-maskable interrupts
 LOC:      12702      10338      10247      10515       9969      10386      16658      13568   Local timer interrupts
 SPU:          0          0          0          0          0          0          0          0   Spurious interrupts
 PMI:          1          1          1          1          1          1          1          1   Performance monitoring interrupts
 IWI:          0          0          0          0          0          0          0          0   IRQ work interrupts
 RTR:          7          0          0          0          0          0          0          0   APIC ICR read retries
 RES:       4060       2253       1026        708        595        846        887        751   Rescheduling interrupts
 CAL:      11906      10423      11418       9894      14562      11000      21479      11223   Function call interrupts
 TLB:      10620       8996      10060       8674      13172       9622      20121       9838   TLB shootdowns
 TRM:          0          0          0          0          0          0          0          0   Thermal event interrupts
 THR:          0          0          0          0          0          0          0          0   Threshold APIC interrupts
 DFR:          0          0          0          0          0          0          0          0   Deferred Error APIC interrupts
 MCE:          0          0          0          0          0          0          0          0   Machine check exceptions
 MCP:          2          2          2          2          2          2          2          2   Machine check polls
 ERR:          0
 MIS:          0
 PIN:          0          0          0          0          0          0          0          0   Posted-interrupt notification event
 PIW:          0          0          0          0          0          0          0          0   Posted-interrupt wakeup event
我尝试将中断122和123的smp关联更改为核心0和1,这似乎没有任何作用,因为当我重置计算机时,这些关联仍然分别设置为核心6和7


即使没有重置和简单地重新运行我的程序,我也看不到这些CPU内核所服务的中断数量有任何变化。

请参阅
/proc/interrupts
,找出哪些内核正在服务于哪些中断。
pthread\u mutex\u unlock(&sdata->ipc\u mutex);pthread_mutex_lock(&sdata->ipc_mutex)-我个人认为,在你的程序中加入这种东西不是一条合理的途径。你到底想在这里实现什么?为什么不使用适当的
pthread\u cond\t
机制?@oakad-通过使用条件变量唤醒休眠线程会导致延迟太高,无法满足我的要求,这就是为什么我在这里实现了一个忙等待解决方案。然后你必须至少在那里
sched_yield
——你不会给其他进程留下任何获得锁的机会。一般来说,如果你的延迟要求太苛刻,你应该考虑实现一个所谓的“中断器”算法:(检查第8页的总体思路;这个想法的几个C++端口也存在。