2014-07-05 82 views
1
 
Hardware: 
Darwin Kernel Version 13.2.0: Thu Apr 17 23:03:13 PDT 2014; root:xnu-2422.100.13~1/RELEASE_X86_64 x86_64 
 
    atomics.hpp 

    1 #ifndef ATOMIC_UTILS_H 
    2 #define ATOMIC_UTILS_H 
    3 
    4 #include 
    5 
    6 #define BARRIER() __asm__ volatile ("": : :"memory") 
    7 
    8 #define CPU_RELAX() __asm__ volatile("pause\n\t": : :"memory") 
    9 
10 #define STORE_FENCE() __asm__ volatile("mfence" ::: "memory"); 
11 
12 class AtomicUtils 
13 { 
14  public: 
15 
16  /** 
17  * check if the value at addr is equal to oldval, if so replace it with newva l 
18  * and return the oldval 
19  */ 
20  inline static size_t compareAndExchange(volatile size_t* addr, size_t oldval , size_t newval) 
21  { 
22  size_t ret; 
23  __asm__ volatile("lock cmpxchgq %2, %1\n\t" 
24      :"=a"(ret), "+m"(*addr) 
25      : "r"(newval), "0"(oldval) 
26      : "memory"); 
27  return ret; 
28  } 
29 
30  /** 
31  * Atomically stores x into addr and returns the previous 
32  * stored in addr 
33  */ 
34 inline static size_t loadAndStore(size_t x, volatile size_t* addr) 
36  { 
37  size_t ret; 
38  __asm__ volatile("lock xchgq %1, %0\n\t" 
39       : "+m"(*addr), "=r"(ret) 
40       : "1"(x)); 
41  return ret; 
42  } 
43 
44 }; 
45 
46 #endif 
 
    mcs.hpp 

    1 #ifndef MCS_LOCK_H 
    2 #define MCS_LOCK_H 
    3 
    4 #include "atomics.hpp" 
    5 #include 
    6 
    7 class MCSLock 
    8 { 
    9  struct mcs_lock_t 
10  { 
11   mcs_lock_t():next(0), locked(false){} 
12   struct mcs_lock_t* next; 
13   bool locked; 
14  }; 
15 
16  public: 
17  typedef struct mcs_lock_t mcs_lock; 
18 
19  private: 
20  mcs_lock** tail; 
21  static boost::thread_specific_ptr tls_node; 
22 
23  public: 
24  MCSLock(mcs_lock** lock_tail):tail(lock_tail) 
25  { 
26  if(tls_node.get() == 0) 
27   tls_node.reset(new mcs_lock()); 
28  } 
29 
30  void lock() 
31  { 
32  mcs_lock* thread_node = tls_node.get(); 
33  thread_node->next = 0; 
34  thread_node->locked = true; 
35 
36  volatile mcs_lock* pred = reinterpret_cast(
37       AtomicUtils::loadAndStore(
38        reinterpret_cast(thread_node), 
39        reinterpret_cast(tail) 
40       ) 
41      ); 
42  if(pred != 0) 
43  { 
44   pred->next = *tail; 
45 
46   STORE_FENCE(); 
47   //BARRIER(); // Required to prevent re ordering between prev->next = tail  and thread_node->locked. (WR harzard) 
48 
49   // Spin on a local variable. Someone unlock me plz !! 
50   while(thread_node->locked) 
51    CPU_RELAX(); 
52 
53  } 
54  } 
55 
56  void unlock() 
57  { 
58   mcs_lock* thread_node = tls_node.get(); 
59   if(thread_node->next == 0) 
60   { 
61    // If false, then we a new thread has request for lock. Now release t he lock for the new thread 
62    if(
63      AtomicUtils::compareAndExchange(
64       reinterpret_cast(tail), 
65       reinterpret_cast(thread_node), 
66       0 
67     ) == reinterpret_cast(thread_node) 68   ) 
69    { 
70     return; 
71    } 
72 
73    while(thread_node->next == 0) 
74     CPU_RELAX(); 
75   } 
76 
77   thread_node->next->locked = false; 
78  } 
79 }; 
80 
81 boost::thread_specific_ptr MCSLock::tls_node; 
82 #endif 
mcs_test.cpp 

    1 #include "mcs.hpp" 
    2 #include <iostream> 
    3 #include <pthread.h> 
    4 #include <vector> 
    5 #define NUM_THREADS 16 
    6 #define NUM_ITERATIONS 100 
    7 
    8 std::vector<int> elements; 
    9 MCSLock::mcs_lock *tail = 0; 
10 
11 void* thread_run(void* data) 
12 { 
13 MCSLock lock(&tail); 
14 for(int i = 0; i < NUM_ITERATIONS; ++i) 
15 { 
16  lock.lock(); 
17  elements.push_back(i); 
18  lock.unlock(); 
19 } 
20 
21 return 0; 
22 } 
23 
24 int main() 
25 { 
26  pthread_t threads[ NUM_THREADS ]; 
27  elements.reserve(NUM_THREADS * NUM_ITERATIONS); 
28 
29  { 
30   for(int i = 0; i < NUM_THREADS; ++i) 
31    pthread_create(&threads[i], NULL, thread_run, NULL); 
32 
33   for(int i = 0; i < NUM_THREADS; ++i) 
34    pthread_join(threads[i], NULL); 
35 
36   std::cout <<"\nExiting main thread: " << std::endl; 
37  } 
38 } 

上述代碼是使用鐺編譯死鎖在MCS鎖實施

問題:

我看到一個1或2個線程都卡在鎖()在第50行。除了主線程之外,卡在lock()中的線程沒有其他線程存活。這意味着當其他線程調用unlock()時,它們以某種方式不會爲其他變量設置locked = false並退出。

任何關於調試這個指針嗎?

卡住了很多小時,沒有線索。

回答

0

不是鏗鏘有內置這些​​行內彙編塊(如gcc的__sync_val_compare_and_swap)?爲什麼重新發明輪子?

其次,我真的想着將內存clobber添加到loadAndStore。您需要確保在執行xchgq之前,編譯器在寄存器中執行的任何寫操作都會刷新到內存中。同樣,它會阻止gcc優化內存讀取到xchgq之前。要麼是壞的。第三,我會檢查你的while循環(thread_node-> locked和thread_node-> next)的asm輸出。由於這些變量不是不穩定的,所以gcc可以優化它只執行一次讀取。

這些可能無法解決您的問題,但這就是我要開始的地方。