-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Fix flaky multi-node cluster tests (member removal, SBR, sharding) #8025
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
595bb06
9e60d20
baa6de3
4ce0ee4
9418e9e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,13 +5,15 @@ | |
| // </copyright> | ||
| //----------------------------------------------------------------------- | ||
|
|
||
| using System; | ||
| using System.Linq; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Akka.Cluster.TestKit; | ||
| using Akka.Configuration; | ||
| using Akka.MultiNode.TestAdapter; | ||
| using Akka.Remote.TestKit; | ||
| using FluentAssertions; | ||
| using FluentAssertions.Extensions; | ||
|
|
||
| namespace Akka.Cluster.Tests.MultiNode | ||
| { | ||
|
|
@@ -47,41 +49,45 @@ protected NodeDowningAndBeingRemovedSpec(NodeDowningAndBeingRemovedSpecSpecConfi | |
| } | ||
|
|
||
| [MultiNodeFact] | ||
| public void NodeDowningAndBeingRemovedSpecs() | ||
| public async Task NodeDowningAndBeingRemovedSpecs() | ||
| { | ||
| Node_that_is_downed_must_eventually_be_removed_from_membership(); | ||
| await Node_that_is_downed_must_eventually_be_removed_from_membership(); | ||
| } | ||
|
|
||
| public void Node_that_is_downed_must_eventually_be_removed_from_membership() | ||
| public async Task Node_that_is_downed_must_eventually_be_removed_from_membership() | ||
| { | ||
| AwaitClusterUp(_config.First, _config.Second, _config.Third); | ||
| await AwaitClusterUpAsync(CancellationToken.None, _config.First, _config.Second, _config.Third); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't need the |
||
|
|
||
| Within(TimeSpan.FromSeconds(30), () => | ||
| var secondAddress = GetAddress(_config.Second); | ||
| var thirdAddress = GetAddress(_config.Third); | ||
|
|
||
| await WithinAsync(45.Seconds(), async () => | ||
| { | ||
| RunOn(() => | ||
| await RunOnAsync(() => | ||
| { | ||
| Cluster.Down(GetAddress(_config.Second)); | ||
| Cluster.Down(GetAddress(_config.Third)); | ||
| Cluster.Down(secondAddress); | ||
| Cluster.Down(thirdAddress); | ||
| return Task.CompletedTask; | ||
| }, _config.First); | ||
| EnterBarrier("second-and-third-down"); | ||
| await EnterBarrierAsync("second-and-third-down"); | ||
|
|
||
| RunOn(() => | ||
| await RunOnAsync(async () => | ||
| { | ||
| // verify that the node is shut down | ||
| AwaitCondition(() => Cluster.IsTerminated); | ||
| await AwaitConditionAsync(() => Task.FromResult(Cluster.IsTerminated)); | ||
| }, _config.Second, _config.Third); | ||
| EnterBarrier("second-and-third-shutdown"); | ||
| await EnterBarrierAsync("second-and-third-shutdown"); | ||
|
|
||
| RunOn(() => | ||
| await RunOnAsync(async () => | ||
| { | ||
| AwaitAssert(() => | ||
| await AwaitAssertAsync(() => | ||
| { | ||
| ClusterView.Members.Select(c => c.Address).Should().NotContain(GetAddress(_config.Second)); | ||
| ClusterView.Members.Select(c => c.Address).Should().NotContain(GetAddress(_config.Third)); | ||
| ClusterView.Members.Select(c => c.Address).Should().NotContain(secondAddress); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Address caching optimization - just re-using the ones we resolved earlier. |
||
| ClusterView.Members.Select(c => c.Address).Should().NotContain(thirdAddress); | ||
| }); | ||
| }, _config.First); | ||
|
|
||
| EnterBarrier("finished"); | ||
| await EnterBarrierAsync("finished"); | ||
| }); | ||
|
|
||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,13 +5,15 @@ | |
| // </copyright> | ||
| //----------------------------------------------------------------------- | ||
|
|
||
| using System; | ||
| using System.Linq; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Akka.Cluster.TestKit; | ||
| using Akka.Configuration; | ||
| using Akka.MultiNode.TestAdapter; | ||
| using Akka.Remote.TestKit; | ||
| using FluentAssertions; | ||
| using FluentAssertions.Extensions; | ||
|
|
||
| namespace Akka.Cluster.Tests.MultiNode | ||
| { | ||
|
|
@@ -46,48 +48,52 @@ protected NodeLeavingAndExitingAndBeingRemovedSpec(NodeLeavingAndExitingAndBeing | |
| } | ||
|
|
||
| [MultiNodeFact] | ||
| public void NodeLeavingAndExitingAndBeingRemovedSpecs() | ||
| public async Task NodeLeavingAndExitingAndBeingRemovedSpecs() | ||
| { | ||
| Node_that_is_leaving_non_singleton_cluster_eventually_set_to_removed_and_removed_from_membership_ring_and_seen_table(); | ||
| await Node_that_is_leaving_non_singleton_cluster_eventually_set_to_removed_and_removed_from_membership_ring_and_seen_table(); | ||
| } | ||
|
|
||
| public void Node_that_is_leaving_non_singleton_cluster_eventually_set_to_removed_and_removed_from_membership_ring_and_seen_table() | ||
| public async Task Node_that_is_leaving_non_singleton_cluster_eventually_set_to_removed_and_removed_from_membership_ring_and_seen_table() | ||
| { | ||
| AwaitClusterUp(_config.First, _config.Second, _config.Third); | ||
| await AwaitClusterUpAsync(CancellationToken.None, _config.First, _config.Second, _config.Third); | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't need the |
||
|
|
||
| Within(TimeSpan.FromSeconds(15), () => | ||
| var secondAddress = GetAddress(_config.Second); | ||
|
|
||
| // Increased timeout from 15s to 45s for CI variability | ||
| await WithinAsync(45.Seconds(), async () => | ||
| { | ||
| RunOn(() => | ||
| await RunOnAsync(() => | ||
| { | ||
| Cluster.Leave(GetAddress(_config.Second)); | ||
| Cluster.Leave(secondAddress); | ||
| return Task.CompletedTask; | ||
| }, _config.First); | ||
| EnterBarrier("second-left"); | ||
| await EnterBarrierAsync("second-left"); | ||
|
|
||
| RunOn(() => | ||
| await RunOnAsync(async () => | ||
| { | ||
| EnterBarrier("second-shutdown"); | ||
| await EnterBarrierAsync("second-shutdown"); | ||
| // this test verifies that the removal is performed via the ExitingCompleted message, | ||
| // otherwise we would have `MarkNodeAsUnavailable(second)` to trigger the FailureDetectorPuppet | ||
|
|
||
| // verify that the 'second' node is no longer part of the 'members'/'unreachable' set | ||
| AwaitAssert(() => | ||
| await AwaitAssertAsync(() => | ||
| { | ||
| ClusterView.Members.Select(c => c.Address).Should().NotContain(GetAddress(_config.Second)); | ||
| ClusterView.Members.Select(c => c.Address).Should().NotContain(secondAddress); | ||
| }); | ||
| AwaitAssert(() => | ||
| await AwaitAssertAsync(() => | ||
| { | ||
| ClusterView.UnreachableMembers.Select(c => c.Address).Should().NotContain(GetAddress(_config.Second)); | ||
| ClusterView.UnreachableMembers.Select(c => c.Address).Should().NotContain(secondAddress); | ||
| }); | ||
| }, _config.First, _config.Third); | ||
|
|
||
| RunOn(() => | ||
| await RunOnAsync(async () => | ||
| { | ||
| // verify that the second node is shut down | ||
| AwaitCondition(() => Cluster.IsTerminated); | ||
| EnterBarrier("second-shutdown"); | ||
| await AwaitConditionAsync(() => Task.FromResult(Cluster.IsTerminated)); | ||
| await EnterBarrierAsync("second-shutdown"); | ||
| }, _config.Second); | ||
|
|
||
| EnterBarrier("finished"); | ||
| await EnterBarrierAsync("finished"); | ||
| }); | ||
|
|
||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, looks like this was a bug