Skip to content

Commit a8c7508

Browse files
author
Rodrigo Valin
authored
CLOUDP-65923: Replication state aware Readinessprobe (#381)
1 parent 2329b89 commit a8c7508

File tree

8 files changed

+457
-7
lines changed

8 files changed

+457
-7
lines changed

cmd/readiness/main.go

Lines changed: 79 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,14 @@ func isPodReady(conf config.Config) bool {
6666
}
6767

6868
// If the agent has reached the goal state - returning true
69-
ok, err := isInGoalState(health, conf)
70-
69+
inGoalState, err := isInGoalState(health, conf)
7170
if err != nil {
7271
logger.Errorf("There was problem checking the health status: %s", err)
7372
panic(err)
7473
}
7574

76-
if ok {
75+
inReadyState := isInReadyState(health)
76+
if inGoalState && inReadyState {
7777
logger.Info("Agent has reached goal state")
7878
return true
7979
}
@@ -198,6 +198,7 @@ func kubernetesClientset() (kubernetes.Interface, error) {
198198
}
199199
return clientset, nil
200200
}
201+
201202
func main() {
202203
clientSet, err := kubernetesClientset()
203204
if err != nil {
@@ -222,3 +223,78 @@ func main() {
222223
os.Exit(1)
223224
}
224225
}
226+
227+
// isInReadyState checks the MongoDB Server state. It returns true if the state
228+
// is PRIMARY or SECONDARY.
229+
// This function will always return true if the agent doesn't publish this state.
230+
func isInReadyState(health health.Status) bool {
231+
if len(health.Healthiness) == 0 {
232+
return true
233+
}
234+
for _, processHealth := range health.Healthiness {
235+
// We know this loop should run only once, in Kubernetes there's
236+
// only 1 server managed per host.
237+
if processHealth.ReplicaStatus == nil {
238+
// We always return true if the Agent does not publish mongodb
239+
// server state
240+
return true
241+
}
242+
243+
if mongoDbServerHasStarted(health) {
244+
// There should be only one entry reported for this Pod.
245+
return processHealth.IsReadyState()
246+
}
247+
}
248+
return false
249+
}
250+
251+
// mongoDbServerHasStarted checks if the current plan includes a Move and a Step
252+
// of type "StartFresh" with a Result of "success".
253+
//
254+
// This function will return true if the agent has been able to successfully
255+
// start the MongoDB server.
256+
func mongoDbServerHasStarted(health health.Status) bool {
257+
plan := findCurrentPlan(health.ProcessPlans)
258+
if plan == nil {
259+
return false
260+
}
261+
262+
for _, move := range plan.Moves {
263+
for _, step := range move.Steps {
264+
if step.Step == "StartFresh" && step.Result == "success" {
265+
return true
266+
}
267+
}
268+
}
269+
270+
return false
271+
}
272+
273+
// findCurrentPlan returns the current plan as informed by the Agent.
274+
//
275+
// The current plan is the last plan from the `processStatuses` parameter, this
276+
// is, the plan that's currently being processed by the agent.
277+
func findCurrentPlan(processStatuses map[string]health.MmsDirectorStatus) *health.PlanStatus {
278+
var currentPlan *health.PlanStatus
279+
if len(processStatuses) == 0 {
280+
// Seems shouldn't happen but let's check anyway - may be needs to be
281+
// changed to Info if this happens.
282+
logger.Warnf("There is no information about Agent process plans")
283+
return nil
284+
}
285+
if len(processStatuses) > 1 {
286+
logger.Errorf("Only one process status is expected but got %d!", len(processStatuses))
287+
return nil
288+
}
289+
// There is only one process managed by the Agent - so will only check one
290+
// iteration.
291+
for k, v := range processStatuses {
292+
if len(v.Plans) == 0 {
293+
logger.Errorf("The process %s doesn't contain any plans!", k)
294+
return nil
295+
}
296+
currentPlan = v.Plans[len(v.Plans)-1]
297+
}
298+
299+
return currentPlan
300+
}

cmd/readiness/readiness_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ func TestHeadlessAgentHasntReachedGoal(t *testing.T) {
100100
assert.False(t, isPodReady(c))
101101
thePod, _ := c.ClientSet.CoreV1().Pods(c.Namespace).Get(context.TODO(), c.Hostname, metav1.GetOptions{})
102102
assert.Equal(t, map[string]string{"agent.mongodb.com/version": "5"}, thePod.Annotations)
103+
104+
os.Unsetenv(headlessAgent)
103105
}
104106

105107
// TestHeadlessAgentReachedGoal verifies that the probe reports "true" if the config version is equal to the
@@ -111,6 +113,34 @@ func TestHeadlessAgentReachedGoal(t *testing.T) {
111113
assert.True(t, isPodReady(c))
112114
thePod, _ := c.ClientSet.CoreV1().Pods(c.Namespace).Get(context.TODO(), c.Hostname, metav1.GetOptions{})
113115
assert.Equal(t, map[string]string{"agent.mongodb.com/version": "5"}, thePod.Annotations)
116+
117+
os.Unsetenv(headlessAgent)
118+
}
119+
120+
func TestPodReadiness(t *testing.T) {
121+
t.Run("MongoDB replication state is reported by agents", func(t *testing.T) {
122+
assert.True(t, isPodReady(testConfig("testdata/health-status-ok-no-replica-status.json")))
123+
})
124+
125+
t.Run("If replication state is not PRIMARY or SECONDARY, Pod is not ready", func(t *testing.T) {
126+
assert.False(t, isPodReady(testConfig("testdata/health-status-not-readable-state.json")))
127+
})
128+
129+
t.Run("If replication state is readable", func(t *testing.T) {
130+
assert.True(t, isPodReady(testConfig("testdata/health-status-readable-state.json")))
131+
})
132+
}
133+
134+
func TestServerHasAlreadyStarted(t *testing.T) {
135+
t.Run("Agent should report server has started", func(t *testing.T) {
136+
healthDoc := readHealthinessFile("testdata/health-status-readable-state.json")
137+
assert.True(t, mongoDbServerHasStarted(healthDoc))
138+
})
139+
140+
t.Run("Agent should report server has not started", func(t *testing.T) {
141+
healthDoc := readHealthinessFile("testdata/health-status-not-started-yet.json")
142+
assert.False(t, mongoDbServerHasStarted(healthDoc))
143+
})
114144
}
115145

116146
func readHealthinessFile(path string) health.Status {
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"mmsStatus": {
3+
"bar": {
4+
"errorString": "",
5+
"errorCode": 0,
6+
"plans": [
7+
{
8+
"moves": [
9+
{
10+
"steps": [
11+
{
12+
"result": "success",
13+
"completed": "2019-09-11T14:20:55.645615846Z",
14+
"started": "2019-09-11T14:20:40.631404367Z",
15+
"isWaitStep": false,
16+
"stepDoc": "Download mongodb binaries (may take a while)",
17+
"step": "Download"
18+
}
19+
],
20+
"moveDoc": "Download mongodb binaries",
21+
"move": "Download"
22+
},
23+
{
24+
"steps": [
25+
{
26+
"result": "success",
27+
"completed": "2019-09-11T14:20:59.325129842Z",
28+
"started": "2019-09-11T14:20:55.645743003Z",
29+
"isWaitStep": false,
30+
"stepDoc": "Start a mongo instance (start fresh)",
31+
"step": "StartFresh"
32+
}
33+
],
34+
"moveDoc": "Start the process",
35+
"move": "Start"
36+
},
37+
{
38+
"steps": [
39+
{
40+
"result": "wait",
41+
"completed": null,
42+
"started": "2019-09-11T14:20:59.325272608Z",
43+
"isWaitStep": true,
44+
"stepDoc": "Wait for the replica set to be initialized by another member",
45+
"step": "WaitRsInit"
46+
}
47+
],
48+
"moveDoc": "Wait for the replica set to be initialized by another member",
49+
"move": "WaitRsInit"
50+
},
51+
{
52+
"steps": [
53+
{
54+
"result": "",
55+
"completed": null,
56+
"started": null,
57+
"isWaitStep": true,
58+
"stepDoc": "Wait for featureCompatibilityVersion to be right",
59+
"step": "WaitFeatureCompatibilityVersionCorrect"
60+
}
61+
],
62+
"moveDoc": "Wait for featureCompatibilityVersion to be right",
63+
"move": "WaitFeatureCompatibilityVersionCorrect"
64+
}
65+
],
66+
"completed": "2019-09-11T14:21:42.034934358Z",
67+
"started": "2019-09-11T14:20:40.631348806Z"
68+
}
69+
],
70+
"lastGoalVersionAchieved": 5,
71+
"name": "bar"
72+
}
73+
},
74+
"statuses": {
75+
"bar": {
76+
"ReplicationStatus": 3,
77+
"ExpectedToBeUp": true,
78+
"LastMongoUpTime": 1568222195,
79+
"IsInGoalState": true
80+
}
81+
}
82+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"mmsStatus": {
3+
"bar": {
4+
"errorString": "",
5+
"errorCode": 0,
6+
"plans": [
7+
{
8+
"moves": [
9+
{
10+
"steps": [
11+
{
12+
"result": "success",
13+
"completed": "2019-09-11T14:20:55.645615846Z",
14+
"started": "2019-09-11T14:20:40.631404367Z",
15+
"isWaitStep": false,
16+
"stepDoc": "Download mongodb binaries (may take a while)",
17+
"step": "Download"
18+
}
19+
],
20+
"moveDoc": "Download mongodb binaries",
21+
"move": "Download"
22+
},
23+
{
24+
"steps": [
25+
{
26+
"result": "wait",
27+
"completed": "2019-09-11T14:20:59.325129842Z",
28+
"started": "2019-09-11T14:20:55.645743003Z",
29+
"isWaitStep": false,
30+
"stepDoc": "Start a mongo instance (start fresh)",
31+
"step": "StartFresh"
32+
}
33+
],
34+
"moveDoc": "Start the process",
35+
"move": "Start"
36+
},
37+
{
38+
"steps": [
39+
{
40+
"result": "wait",
41+
"completed": null,
42+
"started": "2019-09-11T14:20:59.325272608Z",
43+
"isWaitStep": true,
44+
"stepDoc": "Wait for the replica set to be initialized by another member",
45+
"step": "WaitRsInit"
46+
}
47+
],
48+
"moveDoc": "Wait for the replica set to be initialized by another member",
49+
"move": "WaitRsInit"
50+
},
51+
{
52+
"steps": [
53+
{
54+
"result": "",
55+
"completed": null,
56+
"started": null,
57+
"isWaitStep": true,
58+
"stepDoc": "Wait for featureCompatibilityVersion to be right",
59+
"step": "WaitFeatureCompatibilityVersionCorrect"
60+
}
61+
],
62+
"moveDoc": "Wait for featureCompatibilityVersion to be right",
63+
"move": "WaitFeatureCompatibilityVersionCorrect"
64+
}
65+
],
66+
"completed": "2019-09-11T14:21:42.034934358Z",
67+
"started": "2019-09-11T14:20:40.631348806Z"
68+
}
69+
],
70+
"lastGoalVersionAchieved": 5,
71+
"name": "bar"
72+
}
73+
},
74+
"statuses": {
75+
"bar": {
76+
"ReplicationStatus": 1,
77+
"ExpectedToBeUp": true,
78+
"LastMongoUpTime": 1568222195,
79+
"IsInGoalState": true
80+
}
81+
}
82+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"mmsStatus": {
3+
"bar": {
4+
"errorString": "",
5+
"errorCode": 0,
6+
"plans": [
7+
{
8+
"moves": [
9+
{
10+
"steps": [
11+
{
12+
"result": "success",
13+
"completed": "2019-09-11T14:20:55.645615846Z",
14+
"started": "2019-09-11T14:20:40.631404367Z",
15+
"isWaitStep": false,
16+
"stepDoc": "Download mongodb binaries (may take a while)",
17+
"step": "Download"
18+
}
19+
],
20+
"moveDoc": "Download mongodb binaries",
21+
"move": "Download"
22+
},
23+
{
24+
"steps": [
25+
{
26+
"result": "success",
27+
"completed": "2019-09-11T14:20:59.325129842Z",
28+
"started": "2019-09-11T14:20:55.645743003Z",
29+
"isWaitStep": false,
30+
"stepDoc": "Start a mongo instance (start fresh)",
31+
"step": "StartFresh"
32+
}
33+
],
34+
"moveDoc": "Start the process",
35+
"move": "Start"
36+
},
37+
{
38+
"steps": [
39+
{
40+
"result": "wait",
41+
"completed": null,
42+
"started": "2019-09-11T14:20:59.325272608Z",
43+
"isWaitStep": true,
44+
"stepDoc": "Wait for the replica set to be initialized by another member",
45+
"step": "WaitRsInit"
46+
}
47+
],
48+
"moveDoc": "Wait for the replica set to be initialized by another member",
49+
"move": "WaitRsInit"
50+
},
51+
{
52+
"steps": [
53+
{
54+
"result": "",
55+
"completed": null,
56+
"started": null,
57+
"isWaitStep": true,
58+
"stepDoc": "Wait for featureCompatibilityVersion to be right",
59+
"step": "WaitFeatureCompatibilityVersionCorrect"
60+
}
61+
],
62+
"moveDoc": "Wait for featureCompatibilityVersion to be right",
63+
"move": "WaitFeatureCompatibilityVersionCorrect"
64+
}
65+
],
66+
"completed": "2019-09-11T14:21:42.034934358Z",
67+
"started": "2019-09-11T14:20:40.631348806Z"
68+
}
69+
],
70+
"lastGoalVersionAchieved": 5,
71+
"name": "bar"
72+
}
73+
},
74+
"statuses": {
75+
"bar": {
76+
"ReplicationStatus": null,
77+
"ExpectedToBeUp": true,
78+
"LastMongoUpTime": 1568222195,
79+
"IsInGoalState": true
80+
}
81+
}
82+
}

0 commit comments

Comments
 (0)