Skip to content

Commit 30266fb

Browse files
author
Rodrigo Valin
authored
CLOUDP-59610: Pre-stop hook
1 parent dd209c5 commit 30266fb

File tree

20 files changed

+563
-41
lines changed

20 files changed

+563
-41
lines changed

.evergreen.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,14 @@ tasks:
150150
vars:
151151
test: replica_set_scale
152152

153+
- name: e2e_test_replica_set_change_version
154+
commands:
155+
- func: clone
156+
- func: setup_kubernetes_environment
157+
- func: run_e2e_test
158+
vars:
159+
test: replica_set_change_version
160+
153161
buildvariants:
154162
- name: go_unit_tests
155163
display_name: go_unit_tests
@@ -173,6 +181,7 @@ buildvariants:
173181
- name: e2e_test_replica_set
174182
- name: e2e_test_replica_set_readiness_probe
175183
- name: e2e_test_replica_set_scale
184+
- name: e2e_test_replica_set_change_version
176185

177186
- name: init_test_run
178187
display_name: init_test_run

agent/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ RUN curl -LO http://downloads.mongodb.org/linux/mongodb-linux-x86_64-ubuntu1604-
2626
mv mongodb-linux-x86_64-ubuntu1604-4.0.6/bin/mongo /usr/bin && \
2727
rm -rf mongodb-linux-x86_64-ubuntu1604-4.0.6.tgz mongodb-linux-x86_64-ubuntu1604-4.0.6
2828

29-
3029
RUN mkdir -p /var/lib/mongodb-mms-automation/probes/ \
31-
&& curl --retry 3 https://readinessprobe.s3-us-west-1.amazonaws.com/readinessprobe -o /var/lib/mongodb-mms-automation/probes/readinessprobe \
30+
# && curl --retry 3 https://readinessprobe.s3-us-west-1.amazonaws.com/readinessprobe -o /var/lib/mongodb-mms-automation/probes/readinessprobe \
31+
&& curl --retry 3 https://readinessprobe-test.s3-us-west-1.amazonaws.com/readiness -o /var/lib/mongodb-mms-automation/probes/readinessprobe \
3232
&& chmod +x /var/lib/mongodb-mms-automation/probes/readinessprobe \
3333
&& mkdir -p /var/log/mongodb-mms-automation/ \
3434
&& chmod -R +wr /var/log/mongodb-mms-automation/

cmd/prestop/main.go

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"io"
8+
"io/ioutil"
9+
"os"
10+
"strings"
11+
12+
"github.com/mongodb/mongodb-kubernetes-operator/pkg/agenthealth"
13+
"go.uber.org/zap"
14+
corev1 "k8s.io/api/core/v1"
15+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
16+
"k8s.io/client-go/rest"
17+
"sigs.k8s.io/controller-runtime/pkg/client"
18+
)
19+
20+
var logger *zap.SugaredLogger
21+
22+
const (
23+
agentStatusFilePathEnv = "AGENT_STATUS_FILEPATH"
24+
logFilePathEnv = "PRE_STOP_HOOK_LOG_PATH"
25+
defaultNamespace = "default"
26+
)
27+
28+
func getNamespace() (string, error) {
29+
data, err := ioutil.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/namespace")
30+
if err != nil {
31+
return "", err
32+
}
33+
if ns := strings.TrimSpace(string(data)); len(ns) > 0 {
34+
return ns, nil
35+
}
36+
return defaultNamespace, nil
37+
}
38+
39+
// deletePod attempts to delete the pod this mongod is running in
40+
func deletePod() error {
41+
thisPod, err := getThisPod()
42+
if err != nil {
43+
return fmt.Errorf("error getting this pod: %s", err)
44+
}
45+
k8sClient, err := inClusterClient()
46+
if err != nil {
47+
return fmt.Errorf("error getting client: %s", err)
48+
}
49+
50+
if err := k8sClient.Delete(context.TODO(), &thisPod); err != nil {
51+
return fmt.Errorf("error deleting pod: %s", err)
52+
}
53+
return nil
54+
}
55+
56+
func inClusterClient() (client.Client, error) {
57+
config, err := rest.InClusterConfig()
58+
if err != nil {
59+
return nil, fmt.Errorf("error getting cluster config: %+v", err)
60+
}
61+
62+
k8sClient, err := client.New(config, client.Options{})
63+
if err != nil {
64+
return nil, fmt.Errorf("error creating client: %+v", err)
65+
}
66+
return k8sClient, nil
67+
}
68+
69+
func prettyPrint(i interface{}) {
70+
b, err := json.MarshalIndent(i, "", " ")
71+
if err != nil {
72+
fmt.Println("error:", err)
73+
}
74+
fmt.Println(string(b))
75+
}
76+
77+
// shouldDeletePod returns a boolean value indicating if this pod should be deleted
78+
// this would be the case if the agent is currently trying to upgrade the version
79+
// of mongodb.
80+
func shouldDeletePod(health agenthealth.Health) (bool, error) {
81+
hostname := os.Getenv("HOSTNAME")
82+
status, ok := health.ProcessPlans[hostname]
83+
if !ok {
84+
return false, fmt.Errorf("hostname %s was not in the process plans", hostname)
85+
}
86+
return isWaitingToBeDeleted(status), nil
87+
}
88+
89+
// getAgentHealthStatus returns an instance of agenthealth.Health read
90+
// from the health file on disk
91+
func getAgentHealthStatus() (agenthealth.Health, error) {
92+
f, err := os.Open(os.Getenv(agentStatusFilePathEnv))
93+
if err != nil {
94+
return agenthealth.Health{}, fmt.Errorf("error opening file: %s", err)
95+
}
96+
defer f.Close()
97+
98+
h, err := readAgentHealthStatus(f)
99+
if err != nil {
100+
return agenthealth.Health{}, fmt.Errorf("error reading health status: %s", err)
101+
}
102+
return h, err
103+
104+
}
105+
106+
// getThisPod returns an instance of corev1.Pod that points to the current pod
107+
func getThisPod() (corev1.Pod, error) {
108+
podName := os.Getenv("HOSTNAME")
109+
if podName == "" {
110+
return corev1.Pod{}, fmt.Errorf("environment variable HOSTNAME was not present")
111+
}
112+
113+
ns, err := getNamespace()
114+
if err != nil {
115+
return corev1.Pod{}, fmt.Errorf("error reading namespace: %+v", err)
116+
}
117+
118+
return corev1.Pod{
119+
ObjectMeta: metav1.ObjectMeta{
120+
Name: podName,
121+
Namespace: ns,
122+
},
123+
}, nil
124+
}
125+
126+
// readAgentHealthStatus reads an instance of health.Health from the provided
127+
// io.Reader
128+
func readAgentHealthStatus(reader io.Reader) (agenthealth.Health, error) {
129+
var h agenthealth.Health
130+
data, err := ioutil.ReadAll(reader)
131+
if err != nil {
132+
return h, err
133+
}
134+
err = json.Unmarshal(data, &h)
135+
return h, err
136+
}
137+
138+
func ensureEnvironmentVariables(requiredEnvVars ...string) error {
139+
var missingEnvVars []string
140+
for _, envVar := range requiredEnvVars {
141+
if val := os.Getenv(envVar); val == "" {
142+
missingEnvVars = append(missingEnvVars, envVar)
143+
}
144+
}
145+
if len(missingEnvVars) > 0 {
146+
return fmt.Errorf("missing envars: %s", strings.Join(missingEnvVars, ","))
147+
}
148+
return nil
149+
}
150+
151+
func main() {
152+
fmt.Println("Calling pre-stop hook!")
153+
cfg := zap.NewDevelopmentConfig()
154+
if err := ensureEnvironmentVariables(logFilePathEnv, agentStatusFilePathEnv); err != nil {
155+
zap.S().Fatal("Not all required environment variables are present: %s", err)
156+
os.Exit(1)
157+
}
158+
cfg.OutputPaths = []string{
159+
os.Getenv(logFilePathEnv),
160+
}
161+
log, err := cfg.Build()
162+
if err != nil {
163+
zap.S().Errorf("Error building logger config: %s", err)
164+
os.Exit(1)
165+
}
166+
logger = log.Sugar()
167+
health, err := getAgentHealthStatus()
168+
if err != nil {
169+
logger.Errorf("Error getting the agent health file: %s", err)
170+
}
171+
172+
shouldDelete, err := shouldDeletePod(health)
173+
logger.Debugf("shouldDeletePod=%t", shouldDelete)
174+
if err != nil {
175+
logger.Errorf("Error in shouldDeletePod: %s", err)
176+
}
177+
178+
if shouldDelete {
179+
if err := deletePod(); err != nil {
180+
// We should not raise an error if the Pod could not be deleted. It can have even
181+
// worst consequences: Pod being restarted with the same version, and the agent
182+
// killing it immediately after.
183+
logger.Errorf("Could not manually trigger restart of this Pod because of: %s", err)
184+
logger.Errorf("Make sure the Pod is restarted in order for the upgrade process to continue")
185+
}
186+
187+
// If the Pod needs to be killed, we'll wait until the Pod
188+
// is killed by Kubernetes, bringing the new container image
189+
// into play.
190+
var quit = make(chan struct{})
191+
logger.Info("A Pod killed itself, waiting...")
192+
<-quit
193+
}
194+
}
195+
196+
// isWaitingToBeDeleted determines if the agent is currently waiting
197+
// on the mongod pod to be restarted. In order to do this, we need to check the agent
198+
// status file and determine if the mongod has been stopped and if we are in the process
199+
// of a version change.
200+
func isWaitingToBeDeleted(healthStatus agenthealth.MmsDirectorStatus) bool {
201+
if len(healthStatus.Plans) == 0 {
202+
return false
203+
}
204+
lastPlan := healthStatus.Plans[len(healthStatus.Plans)-1]
205+
for _, m := range lastPlan.Moves {
206+
207+
// The next conditions are based on observations on the outcome
208+
// of the agent after they have stopped the mongo server.
209+
210+
switch m.Move {
211+
case "WaitFeatureCompatibilityVersionCorrect":
212+
// First condition observed. This is the Plan reported by the
213+
// agent on the first MongoD stopped.
214+
for _, s := range m.Steps {
215+
if s.Step == "WaitFeatureCompatibilityVersionCorrect" &&
216+
s.Result == "" {
217+
return true
218+
}
219+
}
220+
case "ChangeVersion":
221+
// This is the condition observed in the 2nd and 3rd Pods.
222+
return true
223+
}
224+
}
225+
return false
226+
}

cmd/testrunner/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ func withTest(test string) func(obj runtime.Object) {
227227
"--verbose",
228228
"--kubeconfig",
229229
"/etc/config/kubeconfig",
230+
"--go-test-flags",
231+
"-timeout=20m",
230232
}
231233
}
232234
}

deploy/operator.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,5 @@ spec:
3232
value: "mongodb-kubernetes-operator"
3333
- name: AGENT_IMAGE # The MongoDB Agent the operator will deploy to manage MongoDB deployments
3434
value: quay.io/chatton/mongodb-agent
35+
- name: PRE_STOP_HOOK_IMAGE
36+
value: quay.io/mongodb/community-operator-pre-stop-hook

pkg/agenthealth/agenthealth.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package agenthealth
2+
3+
import (
4+
"time"
5+
)
6+
7+
type Health struct {
8+
Healthiness map[string]ProcessHealth `json:"statuses"`
9+
ProcessPlans map[string]MmsDirectorStatus `json:"mmsStatus"`
10+
}
11+
12+
type ProcessHealth struct {
13+
IsInGoalState bool `json:"IsInGoalState"`
14+
LastMongoUpTime int64 `json:"LastMongoUpTime"`
15+
ExpectedToBeUp bool `json:"ExpectedToBeUp"`
16+
}
17+
18+
type MmsDirectorStatus struct {
19+
Name string `json:"name"`
20+
LastGoalStateClusterConfigVersion int64 `json:"lastGoalVersionAchieved"`
21+
Plans []*PlanStatus `json:"plans"`
22+
}
23+
24+
type PlanStatus struct {
25+
Moves []*MoveStatus `json:"moves"`
26+
Started *time.Time `json:"started"`
27+
Completed *time.Time `json:"completed"`
28+
}
29+
30+
type MoveStatus struct {
31+
Move string `json:"move"`
32+
Steps []*StepStatus `json:"steps"`
33+
}
34+
35+
type StepStatus struct {
36+
Step string `json:"step"`
37+
Started *time.Time `json:"started"`
38+
Completed *time.Time `json:"completed"`
39+
Result string `json:"result"`
40+
}

pkg/apis/mongodb/v1/mongodb_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ func (m *MongoDB) UpdateSuccess() {
6565
m.Status.Phase = Running
6666
}
6767

68-
func (m MongoDB) ChangingVersion() bool {
68+
func (m MongoDB) IsChangingVersion() bool {
6969
if lastVersion, ok := m.Annotations[LastVersionAnnotationKey]; ok {
7070
return (m.Spec.Version != lastVersion) && lastVersion != ""
7171
}

0 commit comments

Comments
 (0)