@@ -378,7 +378,7 @@ where
378
378
fn remove_cycle (
379
379
query_map : & QueryMap ,
380
380
jobs : & mut Vec < QueryJobId > ,
381
- wakelist : & mut Vec < Arc < QueryWaiter > > ,
381
+ wakelist : & Mutex < Vec < Arc < QueryWaiter > > > ,
382
382
) -> bool {
383
383
let mut visited = FxHashSet :: default ( ) ;
384
384
let mut stack = Vec :: new ( ) ;
@@ -466,7 +466,7 @@ fn remove_cycle(
466
466
* waiter. cycle . lock ( ) = Some ( error) ;
467
467
468
468
// Put the waiter on the list of things to resume
469
- wakelist. push ( waiter) ;
469
+ wakelist. lock ( ) . push ( waiter) ;
470
470
471
471
true
472
472
} else {
@@ -478,36 +478,40 @@ fn remove_cycle(
478
478
/// If a query cycle is found it will break the cycle by finding an edge which
479
479
/// uses a query latch and then resuming that waiter.
480
480
/// There may be multiple cycles involved in a deadlock, so this searches
481
- /// all active queries for cycles before finally resuming all the waiters at once.
481
+ /// all active queries for cycles. But only one waiter will be resumed at once.
482
482
pub fn break_query_cycles ( query_map : QueryMap , registry : & rayon_core:: Registry ) {
483
- let mut wakelist = Vec :: new ( ) ;
483
+ // FIXME: change `remove_cycle` to make it return one waiter at a call.
484
+ // So that we can avoid use the global list here.
485
+ static WAKELIST : Mutex < Vec < Arc < QueryWaiter > > > = Mutex :: new ( Vec :: new ( ) ) ;
486
+
484
487
let mut jobs: Vec < QueryJobId > = query_map. keys ( ) . cloned ( ) . collect ( ) ;
485
488
486
489
let mut found_cycle = false ;
487
490
488
491
while jobs. len ( ) > 0 {
489
- if remove_cycle ( & query_map, & mut jobs, & mut wakelist ) {
492
+ if remove_cycle ( & query_map, & mut jobs, & WAKELIST ) {
490
493
found_cycle = true ;
491
494
}
492
495
}
493
-
496
+ let mut wake = WAKELIST . lock ( ) ;
494
497
// Check that a cycle was found. It is possible for a deadlock to occur without
495
498
// a query cycle if a query which can be waited on uses Rayon to do multithreading
496
499
// internally. Such a query (X) may be executing on 2 threads (A and B) and A may
497
500
// wait using Rayon on B. Rayon may then switch to executing another query (Y)
498
501
// which in turn will wait on X causing a deadlock. We have a false dependency from
499
502
// X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here
500
503
// only considers the true dependency and won't detect a cycle.
501
- if !found_cycle {
504
+ if !found_cycle && wake . is_empty ( ) {
502
505
panic ! (
503
506
"deadlock detected as we're unable to find a query cycle to break\n \
504
507
current query map:\n {:#?}",
505
508
query_map
506
509
) ;
507
510
}
508
511
509
- // FIXME: Ensure this won't cause a deadlock before we return
510
- for waiter in wakelist. into_iter ( ) {
512
+ // Only one waiter is resumed at a time to avoid waking up multiple
513
+ // waiters at the same time and causing deadlock due to thread grabbing.
514
+ if let Some ( waiter) = wake. pop ( ) {
511
515
waiter. notify ( registry) ;
512
516
}
513
517
}
0 commit comments