为什么每个人都说SpinLock更快?

我在互联网上阅读了很多文档,文章和post。 几乎每个人和每个地方都认为SpinLock对于短时间运行的代码来说速度更快,但我做了一个测试,在我看来,简单的Monitor.Enter比SpinLock.Enter运行得更快(Test是针对.NET 4.5编译的)

using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Diagnostics; using System.Threading.Tasks; using System.Linq; using System.Globalization; using System.ComponentModel; using System.Threading; using System.Net.Sockets; using System.Net; class Program { static int _loopsCount = 1000000; static int _threadsCount = -1; static ProcessPriorityClass _processPriority = ProcessPriorityClass.RealTime; static ThreadPriority _threadPriority = ThreadPriority.Highest; static long _testingVar = 0; static void Main(string[] args) { _threadsCount = Environment.ProcessorCount; Console.WriteLine("Cores/processors count: {0}", Environment.ProcessorCount); Process.GetCurrentProcess().PriorityClass = _processPriority; TimeSpan tsInterlocked = ExecuteInterlocked(); TimeSpan tsSpinLock = ExecuteSpinLock(); TimeSpan tsMonitor = ExecuteMonitor(); Console.WriteLine("Test with interlocked: {0} ms\r\nTest with SpinLock: {1} ms\r\nTest with Monitor: {2} ms", tsInterlocked.TotalMilliseconds, tsSpinLock.TotalMilliseconds, tsMonitor.TotalMilliseconds); Console.ReadLine(); } static TimeSpan ExecuteInterlocked() { _testingVar = 0; ManualResetEvent _startEvent = new ManualResetEvent(false); CountdownEvent _endCountdown = new CountdownEvent(_threadsCount); Thread[] threads = new Thread[_threadsCount]; for (int i = 0; i  { _startEvent.WaitOne(); for (int j = 0; j < _loopsCount; j++) { Interlocked.Increment(ref _testingVar); } _endCountdown.Signal(); }); threads[i].Priority = _threadPriority; threads[i].Start(); } Stopwatch sw = Stopwatch.StartNew(); _startEvent.Set(); _endCountdown.Wait(); return sw.Elapsed; } static SpinLock _spinLock = new SpinLock(); static TimeSpan ExecuteSpinLock() { _testingVar = 0; ManualResetEvent _startEvent = new ManualResetEvent(false); CountdownEvent _endCountdown = new CountdownEvent(_threadsCount); Thread[] threads = new Thread[_threadsCount]; for (int i = 0; i  { _startEvent.WaitOne(); bool lockTaken; for (int j = 0; j < _loopsCount; j++) { lockTaken = false; try { _spinLock.Enter(ref lockTaken); _testingVar++; } finally { if (lockTaken) { _spinLock.Exit(); } } } _endCountdown.Signal(); }); threads[i].Priority = _threadPriority; threads[i].Start(); } Stopwatch sw = Stopwatch.StartNew(); _startEvent.Set(); _endCountdown.Wait(); return sw.Elapsed; } static object _locker = new object(); static TimeSpan ExecuteMonitor() { _testingVar = 0; ManualResetEvent _startEvent = new ManualResetEvent(false); CountdownEvent _endCountdown = new CountdownEvent(_threadsCount); Thread[] threads = new Thread[_threadsCount]; for (int i = 0; i  { _startEvent.WaitOne(); bool lockTaken; for (int j = 0; j < _loopsCount; j++) { lockTaken = false; try { Monitor.Enter(_locker, ref lockTaken); _testingVar++; } finally { if (lockTaken) { Monitor.Exit(_locker); } } } _endCountdown.Signal(); }); threads[i].Priority = _threadPriority; threads[i].Start(); } Stopwatch sw = Stopwatch.StartNew(); _startEvent.Set(); _endCountdown.Wait(); return sw.Elapsed; } } 

在具有24个2.5 GHz内核的服务器上,使用x64编译的此应用程序产生以下结果:

 Cores/processors count: 24 Test with interlocked: 1373.0829 ms Test with SpinLock: 10894.6283 ms Test with Monitor: 1171.1591 ms 

您只是没有测试SpinLock可以改进线程的场景。 自旋锁定的核心思想是线程上下文切换是非常昂贵的操作,成本在2000到10,000个cpu周期之间。 并且如果线程可能通过等待一点(旋转)来获取锁定,那么等待的额外周期可以通过避免线程上下文切换来获得回报。

因此基本要求是锁定保持很短的时间,这在您的情况下是正确的。 并且有可能获得锁定的合理几率。 在你的情况下不是这样,锁被不少于24个线程激烈争夺。 所有旋转和燃烧核心都没有机会获得锁定。

在此测试中,Monitor将最有效,因为它将等待获取锁的线程排队。 它们被暂停,直到其中一个有机会获得锁,当锁被释放时从等待队列释放。 给予他们一个公平的机会轮流,从而最大化他们将同时完成的几率。 Interlocked.Increment也不错,但不能提供公平保证。

可能很难判断Spinlock是否是正确的方法,你必须衡量。 并发分析器是一种正确的工具。