Multithreading MCS锁实现中的死锁 硬件: 达尔文内核版本13.2.0:Thu Apr 17 23:03:13 PDT 2014;根目录:xnu-2422.100.13~1/RELEASE\u X86\u 64 X86\u 64 原子学 1#如果没有原子效用# 2.定义原子效用 3. 4#包括 5. 6#define BARRIER()_uuasm_uuuvolatile(“:“内存”) 7. 8#定义CPU_RELAX()_uASM_uu volatile(“暂停\n\t”:“内存”) 9 10#定义存储围栏(); 11 12类原子 13 { 14公众: 15 16 /** 17*检查addr处的值是否等于oldval,如果是,则将其替换为newva l 18*并返回旧值 19 */ 20内联静态大小比较索引更改(可变大小添加、大小旧值、大小新值) 21 { 22尺码; 23 \uuuu asm \uuuuuuuuuu易失性(“锁定cmpxchgq%2,%1\n\t” 24:“=a”(ret),“+m”(*addr) 25:“r”(新值),“0”(旧值) 26.“记忆”); 27返回ret; 28 } 29 30 /** 31*以原子方式将x存储到addr并返回上一个 32*存储在addr中 33 */ 34内联静态大小加载和存储(大小x、可变大小x添加) 36 { 37个尺码; 38 \uuuu asm \uuuuuuuuuu易失性(“锁xchgq%1,%0\n\t” 39:“+m”(*addr),“=r”(ret) 40:“1”(x)); 41返回ret; 42 } 43 44 }; 45 46#endif mcs.hpp 1#如果没有MCS锁定# 2#定义MCS#U锁# 3. 4#包括“atomics.hpp” 5#包括 6. 7级麦克斯洛克 8 { 9结构mcs\u锁 10 { 11 mcs_lock_t():next(0),locked(false){ 12结构mcs\U lock\U t*next; 13门上锁; 14 }; 15 16公众: 17 typedef结构mcs_锁和mcs_锁; 18 19私人: 20 mcs_锁**尾; 21静态boost::特定于线程的ptr tls_节点; 22 23公众: 24 MCSLock(mcs\u lock**lock\u tail):tail(lock\u tail) 25 { 26如果(tls_node.get()=0) 27 tls_节点重置(新mcs_锁()); 28 } 29 30无效锁() 31 { 32 mcs_lock*thread_node=tls_node.get(); 33线程\u节点->下一步=0; 34线程\节点->锁定=真; 35 36易失性mcs_-lock*pred=重新解释( 37原子库::loadAndStore( 38重新解释铸件(螺纹节点), 39重新解释铸件(尾部) 40 ) 41 ); 42如果(pred!=0) 43 { 44 pred->next=*尾部; 45 46仓库围栏(); 47//BARRIER();//需要防止在prev->next=tail和thread_node->locked之间重新排序。(WR harzard) 48 49//在局部变量上旋转。有人给我解锁plz!! 50时(线程\节点->锁定) 51 CPU_RELAX(); 52 53 } 54 } 55 56无效解锁() 57 { 58 mcs_lock*thread_node=tls_node.get(); 59如果(线程\节点->下一步==0) 60 { 61//如果为false,则表示新线程请求锁。现在释放新线程的锁 62如果( 63原子库::比较索引更改( 64重新解释铸件(尾部), 65重新解释铸件(螺纹节点), 66 0 67)=重新解释铸件(螺纹节点)68) 69 { 70人返回; 71 } 72 73 while(线程\节点->下一步==0) 74 CPU_RELAX(); 75 } 76 77线程\节点->下一步->锁定=假; 78 } 79 }; 80 81 boost::线程特定的线程ptr MCSLock::tls\u节点; 82#endif mcs_test.cpp 1#包括“mcs.hpp” 2#包括 3#包括 4#包括 5#定义NUM#u线程16 6#定义NUM#u迭代次数100 7. 8个std::向量元素; 9 MCSLock::mcs_lock*tail=0; 10 11无效*线程运行(无效*数据) 12 { 13麦克斯洛克锁和尾锁; 14用于(int i=0;inext = 0; 34 thread_node->locked = true; 35 36 volatile mcs_lock* pred = reinterpret_cast( 37 AtomicUtils::loadAndStore( 38 reinterpret_cast( thread_node ), 39 reinterpret_cast( tail ) 40 ) 41 ); 42 if( pred != 0 ) 43 { 44 pred->next = *tail; 45 46 STORE_FENCE(); 47 //BARRIER(); // Required to prevent re ordering between prev->next = tail and thread_node->locked. ( WR harzard ) 48 49 // Spin on a local variable. Someone unlock me plz !! 50 while( thread_node->locked ) 51 CPU_RELAX(); 52 53 } 54 } 55 56 void unlock() 57 { 58 mcs_lock* thread_node = tls_node.get(); 59 if( thread_node->next == 0 ) 60 { 61 // If false, then we a new thread has request for lock. Now release t he lock for the new thread 62 if( 63 AtomicUtils::compareAndExchange( 64 reinterpret_cast( tail ), 65 reinterpret_cast( thread_node ), 66 0 67 ) == reinterpret_cast( thread_node ) 68 ) 69 { 70 return; 71 } 72 73 while( thread_node->next == 0 ) 74 CPU_RELAX(); 75 } 76 77 thread_node->next->locked = false; 78 } 79 }; 80 81 boost::thread_specific_ptr MCSLock::tls_node; 82 #endif mcs_test.cpp 1 #include "mcs.hpp" 2 #include <iostream> 3 #include <pthread.h> 4 #include <vector> 5 #define NUM_THREADS 16 6 #define NUM_ITERATIONS 100 7 8 std::vector<int> elements; 9 MCSLock::mcs_lock *tail = 0; 10 11 void* thread_run( void* data ) 12 { 13 MCSLock lock( &tail ); 14 for( int i = 0; i < NUM_ITERATIONS; ++i ) 15 { 16 lock.lock(); 17 elements.push_back( i ); 18 lock.unlock(); 19 } 20 21 return 0; 22 } 23 24 int main() 25 { 26 pthread_t threads[ NUM_THREADS ]; 27 elements.reserve( NUM_THREADS * NUM_ITERATIONS ); 28 29 { 30 for( int i = 0; i < NUM_THREADS; ++i ) 31 pthread_create( &threads[i], NULL, thread_run, NULL ); 32 33 for( int i = 0; i < NUM_THREADS; ++i ) 34 pthread_join( threads[i], NULL ); 35 36 std::cout <<"\nExiting main thread: " << std::endl; 37 } 38 }
clang难道没有内置这些内联asm块(如gcc的“同步”和“比较”和“交换”)吗?为什么要重新发明轮子 第二,我真的想在loadAndStore中添加内存缓冲。在执行xchgq之前,您需要确保编译器在寄存器中保存的任何写入都被刷新到内存中。同样,这将阻止gcc在xchgq之前优化内存读取。这两种情况都是不好的 第三,我将检查asm输出中的while循环(thread\u node->locked和thread\u node->next)。由于这些变量不是易失性的,gcc可能会对此进行优化,以便只执行一次读取Multithreading MCS锁实现中的死锁 硬件: 达尔文内核版本13.2.0:Thu Apr 17 23:03:13 PDT 2014;根目录:xnu-2422.100.13~1/RELEASE\u X86\u 64 X86\u 64 原子学 1#如果没有原子效用# 2.定义原子效用 3. 4#包括 5. 6#define BARRIER()_uuasm_uuuvolatile(“:“内存”) 7. 8#定义CPU_RELAX()_uASM_uu volatile(“暂停\n\t”:“内存”) 9 10#定义存储围栏(); 11 12类原子 13 { 14公众: 15 16 /** 17*检查addr处的值是否等于oldval,如果是,则将其替换为newva l 18*并返回旧值 19 */ 20内联静态大小比较索引更改(可变大小添加、大小旧值、大小新值) 21 { 22尺码; 23 \uuuu asm \uuuuuuuuuu易失性(“锁定cmpxchgq%2,%1\n\t” 24:“=a”(ret),“+m”(*addr) 25:“r”(新值),“0”(旧值) 26.“记忆”); 27返回ret; 28 } 29 30 /** 31*以原子方式将x存储到addr并返回上一个 32*存储在addr中 33 */ 34内联静态大小加载和存储(大小x、可变大小x添加) 36 { 37个尺码; 38 \uuuu asm \uuuuuuuuuu易失性(“锁xchgq%1,%0\n\t” 39:“+m”(*addr),“=r”(ret) 40:“1”(x)); 41返回ret; 42 } 43 44 }; 45 46#endif mcs.hpp 1#如果没有MCS锁定# 2#定义MCS#U锁# 3. 4#包括“atomics.hpp” 5#包括 6. 7级麦克斯洛克 8 { 9结构mcs\u锁 10 { 11 mcs_lock_t():next(0),locked(false){ 12结构mcs\U lock\U t*next; 13门上锁; 14 }; 15 16公众: 17 typedef结构mcs_锁和mcs_锁; 18 19私人: 20 mcs_锁**尾; 21静态boost::特定于线程的ptr tls_节点; 22 23公众: 24 MCSLock(mcs\u lock**lock\u tail):tail(lock\u tail) 25 { 26如果(tls_node.get()=0) 27 tls_节点重置(新mcs_锁()); 28 } 29 30无效锁() 31 { 32 mcs_lock*thread_node=tls_node.get(); 33线程\u节点->下一步=0; 34线程\节点->锁定=真; 35 36易失性mcs_-lock*pred=重新解释( 37原子库::loadAndStore( 38重新解释铸件(螺纹节点), 39重新解释铸件(尾部) 40 ) 41 ); 42如果(pred!=0) 43 { 44 pred->next=*尾部; 45 46仓库围栏(); 47//BARRIER();//需要防止在prev->next=tail和thread_node->locked之间重新排序。(WR harzard) 48 49//在局部变量上旋转。有人给我解锁plz!! 50时(线程\节点->锁定) 51 CPU_RELAX(); 52 53 } 54 } 55 56无效解锁() 57 { 58 mcs_lock*thread_node=tls_node.get(); 59如果(线程\节点->下一步==0) 60 { 61//如果为false,则表示新线程请求锁。现在释放新线程的锁 62如果( 63原子库::比较索引更改( 64重新解释铸件(尾部), 65重新解释铸件(螺纹节点), 66 0 67)=重新解释铸件(螺纹节点)68) 69 { 70人返回; 71 } 72 73 while(线程\节点->下一步==0) 74 CPU_RELAX(); 75 } 76 77线程\节点->下一步->锁定=假; 78 } 79 }; 80 81 boost::线程特定的线程ptr MCSLock::tls\u节点; 82#endif mcs_test.cpp 1#包括“mcs.hpp” 2#包括 3#包括 4#包括 5#定义NUM#u线程16 6#定义NUM#u迭代次数100 7. 8个std::向量元素; 9 MCSLock::mcs_lock*tail=0; 10 11无效*线程运行(无效*数据) 12 { 13麦克斯洛克锁和尾锁; 14用于(int i=0;inext = 0; 34 thread_node->locked = true; 35 36 volatile mcs_lock* pred = reinterpret_cast( 37 AtomicUtils::loadAndStore( 38 reinterpret_cast( thread_node ), 39 reinterpret_cast( tail ) 40 ) 41 ); 42 if( pred != 0 ) 43 { 44 pred->next = *tail; 45 46 STORE_FENCE(); 47 //BARRIER(); // Required to prevent re ordering between prev->next = tail and thread_node->locked. ( WR harzard ) 48 49 // Spin on a local variable. Someone unlock me plz !! 50 while( thread_node->locked ) 51 CPU_RELAX(); 52 53 } 54 } 55 56 void unlock() 57 { 58 mcs_lock* thread_node = tls_node.get(); 59 if( thread_node->next == 0 ) 60 { 61 // If false, then we a new thread has request for lock. Now release t he lock for the new thread 62 if( 63 AtomicUtils::compareAndExchange( 64 reinterpret_cast( tail ), 65 reinterpret_cast( thread_node ), 66 0 67 ) == reinterpret_cast( thread_node ) 68 ) 69 { 70 return; 71 } 72 73 while( thread_node->next == 0 ) 74 CPU_RELAX(); 75 } 76 77 thread_node->next->locked = false; 78 } 79 }; 80 81 boost::thread_specific_ptr MCSLock::tls_node; 82 #endif mcs_test.cpp 1 #include "mcs.hpp" 2 #include <iostream> 3 #include <pthread.h> 4 #include <vector> 5 #define NUM_THREADS 16 6 #define NUM_ITERATIONS 100 7 8 std::vector<int> elements; 9 MCSLock::mcs_lock *tail = 0; 10 11 void* thread_run( void* data ) 12 { 13 MCSLock lock( &tail ); 14 for( int i = 0; i < NUM_ITERATIONS; ++i ) 15 { 16 lock.lock(); 17 elements.push_back( i ); 18 lock.unlock(); 19 } 20 21 return 0; 22 } 23 24 int main() 25 { 26 pthread_t threads[ NUM_THREADS ]; 27 elements.reserve( NUM_THREADS * NUM_ITERATIONS ); 28 29 { 30 for( int i = 0; i < NUM_THREADS; ++i ) 31 pthread_create( &threads[i], NULL, thread_run, NULL ); 32 33 for( int i = 0; i < NUM_THREADS; ++i ) 34 pthread_join( threads[i], NULL ); 35 36 std::cout <<"\nExiting main thread: " << std::endl; 37 } 38 },multithreading,locking,x86-64,atomic,inline-assembly,Multithreading,Locking,X86 64,Atomic,Inline Assembly,clang难道没有内置这些内联asm块(如gcc的“同步”和“比较”和“交换”)吗?为什么要重新发明轮子 第二,我真的想在loadAndStore中添加内存缓冲。在执行xchgq之前,您需要确保编译器在寄存器中保存的任何写入都被刷新到内存中。同样,这将阻止gcc在xchgq之前优化内存读取。这两种情况都是不好的 第三,我将检查asm输出中的while循环(thread\u node->locked和thread\u node->next)。由于这些变量不是易失性的,gcc可能会对此进行优化,以便
这些可能无法解决您的问题,但这正是我要开始的地方。仅供参考,使用内联asm滚动您自己的锁通常是毫无意义的。我可以看到编写您自己的无锁队列的好处,但在普通队列周围使用库锁会更有意义。即使对于无锁队列,也使用C++11
std::atomic
而不是手动操作将自己的原语放在易失性对象之上。@PeterCordes:我认为这是值得考虑的。这是5-6年前提出的问题。大多数发行版的稳定版本都没有使用支持C++11原子原语的GCC。有一段时间,你必须做的就是滚动自己的原语。@MichaelPetch:我看到了问题的日期,3年后呃,C++11。是的,这很公平
Hardware:
Darwin Kernel Version 13.2.0: Thu Apr 17 23:03:13 PDT 2014; root:xnu-2422.100.13~1/RELEASE_X86_64 x86_64
atomics.hpp
1 #ifndef ATOMIC_UTILS_H
2 #define ATOMIC_UTILS_H
3
4 #include
5
6 #define BARRIER() __asm__ volatile ( "": : :"memory" )
7
8 #define CPU_RELAX() __asm__ volatile( "pause\n\t": : :"memory" )
9
10 #define STORE_FENCE() __asm__ volatile("mfence" ::: "memory");
11
12 class AtomicUtils
13 {
14 public:
15
16 /**
17 * check if the value at addr is equal to oldval, if so replace it with newva l
18 * and return the oldval
19 */
20 inline static size_t compareAndExchange( volatile size_t* addr, size_t oldval , size_t newval )
21 {
22 size_t ret;
23 __asm__ volatile( "lock cmpxchgq %2, %1\n\t"
24 :"=a"(ret), "+m"(*addr)
25 : "r"(newval), "0"(oldval)
26 : "memory" );
27 return ret;
28 }
29
30 /**
31 * Atomically stores x into addr and returns the previous
32 * stored in addr
33 */
34 inline static size_t loadAndStore( size_t x, volatile size_t* addr )
36 {
37 size_t ret;
38 __asm__ volatile( "lock xchgq %1, %0\n\t"
39 : "+m"(*addr), "=r"(ret)
40 : "1"(x) );
41 return ret;
42 }
43
44 };
45
46 #endif
mcs.hpp
1 #ifndef MCS_LOCK_H
2 #define MCS_LOCK_H
3
4 #include "atomics.hpp"
5 #include
6
7 class MCSLock
8 {
9 struct mcs_lock_t
10 {
11 mcs_lock_t():next(0), locked(false){}
12 struct mcs_lock_t* next;
13 bool locked;
14 };
15
16 public:
17 typedef struct mcs_lock_t mcs_lock;
18
19 private:
20 mcs_lock** tail;
21 static boost::thread_specific_ptr tls_node;
22
23 public:
24 MCSLock( mcs_lock** lock_tail ):tail( lock_tail )
25 {
26 if( tls_node.get() == 0 )
27 tls_node.reset( new mcs_lock() );
28 }
29
30 void lock()
31 {
32 mcs_lock* thread_node = tls_node.get();
33 thread_node->next = 0;
34 thread_node->locked = true;
35
36 volatile mcs_lock* pred = reinterpret_cast(
37 AtomicUtils::loadAndStore(
38 reinterpret_cast( thread_node ),
39 reinterpret_cast( tail )
40 )
41 );
42 if( pred != 0 )
43 {
44 pred->next = *tail;
45
46 STORE_FENCE();
47 //BARRIER(); // Required to prevent re ordering between prev->next = tail and thread_node->locked. ( WR harzard )
48
49 // Spin on a local variable. Someone unlock me plz !!
50 while( thread_node->locked )
51 CPU_RELAX();
52
53 }
54 }
55
56 void unlock()
57 {
58 mcs_lock* thread_node = tls_node.get();
59 if( thread_node->next == 0 )
60 {
61 // If false, then we a new thread has request for lock. Now release t he lock for the new thread
62 if(
63 AtomicUtils::compareAndExchange(
64 reinterpret_cast( tail ),
65 reinterpret_cast( thread_node ),
66 0
67 ) == reinterpret_cast( thread_node ) 68 )
69 {
70 return;
71 }
72
73 while( thread_node->next == 0 )
74 CPU_RELAX();
75 }
76
77 thread_node->next->locked = false;
78 }
79 };
80
81 boost::thread_specific_ptr MCSLock::tls_node;
82 #endif
mcs_test.cpp
1 #include "mcs.hpp"
2 #include <iostream>
3 #include <pthread.h>
4 #include <vector>
5 #define NUM_THREADS 16
6 #define NUM_ITERATIONS 100
7
8 std::vector<int> elements;
9 MCSLock::mcs_lock *tail = 0;
10
11 void* thread_run( void* data )
12 {
13 MCSLock lock( &tail );
14 for( int i = 0; i < NUM_ITERATIONS; ++i )
15 {
16 lock.lock();
17 elements.push_back( i );
18 lock.unlock();
19 }
20
21 return 0;
22 }
23
24 int main()
25 {
26 pthread_t threads[ NUM_THREADS ];
27 elements.reserve( NUM_THREADS * NUM_ITERATIONS );
28
29 {
30 for( int i = 0; i < NUM_THREADS; ++i )
31 pthread_create( &threads[i], NULL, thread_run, NULL );
32
33 for( int i = 0; i < NUM_THREADS; ++i )
34 pthread_join( threads[i], NULL );
35
36 std::cout <<"\nExiting main thread: " << std::endl;
37 }
38 }