Skip to content

Commit ccc8c53

Browse files
CLOUDP-66661: Enable graceful shutdowns (#105)
1 parent 7402e42 commit ccc8c53

File tree

12 files changed

+95
-111
lines changed

12 files changed

+95
-111
lines changed

.evergreen.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ tasks:
184184
- func: setup_virtualenv
185185
- func: build_and_push_image
186186
vars:
187-
image: quay.io/mongodb/community-operator-pre-stop-hook:${version_id}
188-
image_type: prehook
187+
image: quay.io/mongodb/community-operator-version-upgrade-post-start-hook:${version_id}
188+
image_type: versionhook
189189

190190
- name: build_testrunner_image
191191
priority: 60

architecture.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ You create and update MongoDB resources by defining a MongoDB resource definitio
1616
1. Writes the Automation configuration as a [ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) and mounts it to each pod.
1717
1. Creates one [init container](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) and two [containers](https://kubernetes.io/docs/concepts/containers/overview/) in each pod:
1818

19-
- An init container which copies the `cmd/prestop` binary to the main `mongod` container. [This pre-stop hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) is used during [version upgrades](#example-mongodb-version-upgrade).
19+
- An init container which copies the `cmd/versionhook` binary to the main `mongod` container. This is run before `mongod` starts to handle [version upgrades](#example-mongodb-version-upgrade).
2020

2121
- A container for the [`mongod`](https://docs.mongodb.com/manual/reference/program/mongod/index.html) process binary. `mongod` is the primary daemon process for the MongoDB system. It handles data requests, manages data access, and performs background management operations.
2222

@@ -61,7 +61,7 @@ When you update the MongoDB version in your resource definition and reapply it t
6161

6262
1. The MongoDB Agent chooses the first pod to upgrade and stops the `mongod` process using a local connection and [`db.shutdownServer`](https://docs.mongodb.com/manual/reference/method/db.shutdownServer/#db.shutdownServer).
6363

64-
1. A pre-stop hook on the database container checks the state of the MongoDB Agent. If the MongoDB Agent expects the `mongod` process to start with a new version, the hook uses a Kubernetes API call to delete the pod.
64+
1. Kubernetes will restart the `mongod` container causing the version change hook to run and check the state of the MongoDB Agent. If the MongoDB Agent expects the `mongod` process to start with a new version, the hook uses a Kubernetes API call to delete the pod.
6565

6666
1. The Kubernetes Controller downloads the target version of MongoDB from its default docker registry and restarts the pod with the target version of `mongod` in the database container.
6767

cmd/testrunner/main.go

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -30,34 +30,34 @@ import (
3030
)
3131

3232
type flags struct {
33-
deployDir string
34-
namespace string
35-
operatorImage string
36-
preHookImage string
37-
testImage string
38-
test string
39-
performCleanup string
33+
deployDir string
34+
namespace string
35+
operatorImage string
36+
versionUpgradeHookImage string
37+
testImage string
38+
test string
39+
performCleanup string
4040
}
4141

4242
func parseFlags() flags {
43-
var namespace, deployDir, operatorImage, preHookImage, testImage, test, performCleanup *string
43+
var namespace, deployDir, operatorImage, versionUpgradeHookImage, testImage, test, performCleanup *string
4444
namespace = flag.String("namespace", "default", "the namespace the operator and tests should be deployed in")
4545
deployDir = flag.String("deployDir", "deploy/", "the path to the directory which contains the yaml deployment files")
4646
operatorImage = flag.String("operatorImage", "quay.io/mongodb/community-operator-dev:latest", "the image which should be used for the operator deployment")
47-
preHookImage = flag.String("preHookImage", "quay.io/mongodb/community-operator-prehook:latest", "the prestophook image")
47+
versionUpgradeHookImage = flag.String("versionUpgradeHookImage", "quay.io/mongodb/community-operator-pre-stop-hook:latest", "the version upgrade post-start hook image")
4848
testImage = flag.String("testImage", "quay.io/mongodb/community-operator-e2e:latest", "the image which should be used for the operator e2e tests")
4949
test = flag.String("test", "", "test e2e test that should be run. (name of folder containing the test)")
5050
performCleanup = flag.String("performCleanup", "1", "specifies whether to performing a cleanup the context or not")
5151
flag.Parse()
5252

5353
return flags{
54-
deployDir: *deployDir,
55-
namespace: *namespace,
56-
operatorImage: *operatorImage,
57-
preHookImage: *preHookImage,
58-
testImage: *testImage,
59-
test: *test,
60-
performCleanup: *performCleanup,
54+
deployDir: *deployDir,
55+
namespace: *namespace,
56+
operatorImage: *operatorImage,
57+
versionUpgradeHookImage: *versionUpgradeHookImage,
58+
testImage: *testImage,
59+
test: *test,
60+
performCleanup: *performCleanup,
6161
}
6262
}
6363

@@ -174,7 +174,7 @@ func deployOperator(f flags, c client.Client) error {
174174
&appsv1.Deployment{},
175175
withNamespace(f.namespace),
176176
withOperatorImage(f.operatorImage),
177-
withPreHookImage(f.preHookImage)); err != nil {
177+
withVersionUpgradeHookImage(f.versionUpgradeHookImage)); err != nil {
178178
return fmt.Errorf("error building operator deployment: %v", err)
179179
}
180180
fmt.Println("Successfully created the operator Deployment")
@@ -219,25 +219,25 @@ func withEnvVar(key, val string) func(obj runtime.Object) {
219219
}
220220
}
221221

222-
// withPreHookImage sets the value of the PRE_STOP_HOOK_IMAGE
222+
// withVersionUpgradeHookImage sets the value of the VERSION_UPGRADE_HOOK_IMAGE
223223
// EnvVar from first container to `image`. The EnvVar is updated
224224
// if it exists. Or appended if there is no EnvVar with this `Name`.
225-
func withPreHookImage(image string) func(runtime.Object) {
225+
func withVersionUpgradeHookImage(image string) func(runtime.Object) {
226226
return func(obj runtime.Object) {
227227
if dep, ok := obj.(*appsv1.Deployment); ok {
228-
preHookEnv := corev1.EnvVar{
229-
Name: "PRE_STOP_HOOK_IMAGE",
228+
versionUpgradeHookEnv := corev1.EnvVar{
229+
Name: "VERSION_UPGRADE_HOOK_IMAGE",
230230
Value: image,
231231
}
232232
found := false
233233
for idx := range dep.Spec.Template.Spec.Containers[0].Env {
234-
if dep.Spec.Template.Spec.Containers[0].Env[idx].Name == preHookEnv.Name {
235-
dep.Spec.Template.Spec.Containers[0].Env[idx].Value = preHookEnv.Value
234+
if dep.Spec.Template.Spec.Containers[0].Env[idx].Name == versionUpgradeHookEnv.Name {
235+
dep.Spec.Template.Spec.Containers[0].Env[idx].Value = versionUpgradeHookEnv.Value
236236
found = true
237237
}
238238
}
239239
if !found {
240-
dep.Spec.Template.Spec.Containers[0].Env = append(dep.Spec.Template.Spec.Containers[0].Env, preHookEnv)
240+
dep.Spec.Template.Spec.Containers[0].Env = append(dep.Spec.Template.Spec.Containers[0].Env, versionUpgradeHookEnv)
241241
}
242242
}
243243
}

cmd/prestop/main.go renamed to cmd/versionhook/main.go

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020

2121
const (
2222
agentStatusFilePathEnv = "AGENT_STATUS_FILEPATH"
23-
logFilePathEnv = "PRE_STOP_HOOK_LOG_PATH"
2423

2524
defaultNamespace = "default"
2625

@@ -29,19 +28,27 @@ const (
2928
)
3029

3130
func main() {
32-
fmt.Println("Calling pre-stop hook!")
31+
logger := setupLogger()
3332

34-
if err := ensureEnvironmentVariables(logFilePathEnv, agentStatusFilePathEnv); err != nil {
35-
zap.S().Fatal("Not all required environment variables are present: %s", err)
36-
os.Exit(1)
37-
}
33+
logger.Info("Running version change post-start hook")
3834

39-
logger := setupLogger()
35+
if statusPath := os.Getenv(agentStatusFilePathEnv); statusPath == "" {
36+
logger.Fatalf(`Required environment variable "%s" not set`, agentStatusFilePathEnv)
37+
return
38+
}
4039

4140
logger.Info("Waiting for agent health status...")
4241
health, err := waitForAgentHealthStatus()
4342
if err != nil {
44-
logger.Errorf("Error getting the agent health file: %s", err)
43+
// If the pod has just restarted then the status file will not exist.
44+
// In that case we return and let mongod start again.
45+
if os.IsNotExist(err) {
46+
logger.Info("Agent health status file not found, mongod will start")
47+
} else {
48+
logger.Errorf("Error getting the agent health file: %s", err)
49+
}
50+
51+
return
4552
}
4653

4754
shouldDelete, err := shouldDeletePod(health)
@@ -63,32 +70,15 @@ func main() {
6370
// is killed by Kubernetes, bringing the new container image
6471
// into play.
6572
var quit = make(chan struct{})
66-
logger.Info("A Pod killed itself, waiting...")
73+
logger.Info("Pod killed itself, waiting...")
6774
<-quit
6875
} else {
69-
logger.Info("Pod should not be deleted, container will restart...")
76+
logger.Info("Pod should not be deleted, mongod started")
7077
}
7178
}
7279

73-
func ensureEnvironmentVariables(requiredEnvVars ...string) error {
74-
var missingEnvVars []string
75-
for _, envVar := range requiredEnvVars {
76-
if val := os.Getenv(envVar); val == "" {
77-
missingEnvVars = append(missingEnvVars, envVar)
78-
}
79-
}
80-
if len(missingEnvVars) > 0 {
81-
return fmt.Errorf("missing envars: %s", strings.Join(missingEnvVars, ","))
82-
}
83-
return nil
84-
}
85-
8680
func setupLogger() *zap.SugaredLogger {
87-
cfg := zap.NewDevelopmentConfig()
88-
cfg.OutputPaths = []string{
89-
os.Getenv(logFilePathEnv),
90-
}
91-
log, err := cfg.Build()
81+
log, err := zap.NewDevelopment()
9282
if err != nil {
9383
zap.S().Errorf("Error building logger config: %s", err)
9484
os.Exit(1)

contributing.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ by the operator and mounted in the Agent's Pod.
3131

3232
* MongoDB image: Docker image that includes the MongoDB server.
3333

34-
* Pre-stop Hook: This image includes a binary that helps orchestrate the
34+
* Version upgrade post-start hook image: This image includes a binary that helps orchestrate the
3535
restarts of the MongoDB Replica Set members, in particular, when dealing with
3636
version upgrades, which requires a very precise set of operations to allow for
3737
seamless upgrades and downgrades, with no downtime.

pkg/controller/mongodb/build_statefulset_test.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ import (
1414
)
1515

1616
func init() {
17-
18-
os.Setenv(preStopHookImageEnv, "pre-stop-hook-image")
17+
os.Setenv(versionUpgradeHookImageEnv, "version-upgrade-hook-image")
1918
}
2019

2120
func TestMultipleCalls_DoNotCauseSideEffects(t *testing.T) {
@@ -46,7 +45,7 @@ func assertStatefulSetIsBuiltCorrectly(t *testing.T, mdb mdbv1.MongoDB, sts *app
4645
assert.Equal(t, appsv1.RollingUpdateStatefulSetStrategyType, sts.Spec.UpdateStrategy.Type)
4746
assert.Equal(t, operatorServiceAccountName, sts.Spec.Template.Spec.ServiceAccountName)
4847
assert.Len(t, sts.Spec.Template.Spec.Containers[0].Env, 1)
49-
assert.Len(t, sts.Spec.Template.Spec.Containers[1].Env, 2)
48+
assert.Len(t, sts.Spec.Template.Spec.Containers[1].Env, 1)
5049

5150
agentContainer := sts.Spec.Template.Spec.Containers[0]
5251
assert.Equal(t, "agent-image", agentContainer.Image)
@@ -62,7 +61,7 @@ func assertStatefulSetIsBuiltCorrectly(t *testing.T, mdb mdbv1.MongoDB, sts *app
6261
assert.Len(t, mongodContainer.VolumeMounts, 3)
6362

6463
initContainer := sts.Spec.Template.Spec.InitContainers[0]
65-
assert.Equal(t, preStopHookName, initContainer.Name)
66-
assert.Equal(t, "pre-stop-hook-image", initContainer.Image)
64+
assert.Equal(t, versionUpgradeHookName, initContainer.Name)
65+
assert.Equal(t, "version-upgrade-hook-image", initContainer.Image)
6766
assert.Len(t, initContainer.VolumeMounts, 1)
6867
}

pkg/controller/mongodb/mongodb_controller.go

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,13 @@ import (
4343

4444
const (
4545
agentImageEnv = "AGENT_IMAGE"
46-
preStopHookImageEnv = "PRE_STOP_HOOK_IMAGE"
46+
versionUpgradeHookImageEnv = "VERSION_UPGRADE_HOOK_IMAGE"
4747
agentHealthStatusFilePathEnv = "AGENT_STATUS_FILEPATH"
48-
preStopHookLogFilePathEnv = "PRE_STOP_HOOK_LOG_PATH"
4948

5049
AutomationConfigKey = "automation-config"
5150
agentName = "mongodb-agent"
5251
mongodbName = "mongod"
53-
preStopHookName = "mongod-prehook"
52+
versionUpgradeHookName = "mongod-posthook"
5453
dataVolumeName = "data-volume"
5554
versionManifestFilePath = "/usr/local/version_manifest.json"
5655
readinessProbePath = "/var/lib/mongodb-mms-automation/probes/readinessprobe"
@@ -511,11 +510,11 @@ func mongodbAgentContainer(volumeMounts []corev1.VolumeMount) container.Modifica
511510
)
512511
}
513512

514-
func preStopHookInit(volumeMount []corev1.VolumeMount) container.Modification {
513+
func versionUpgradeHookInit(volumeMount []corev1.VolumeMount) container.Modification {
515514
return container.Apply(
516-
container.WithName(preStopHookName),
517-
container.WithCommand([]string{"cp", "pre-stop-hook", "/hooks/pre-stop-hook"}),
518-
container.WithImage(os.Getenv(preStopHookImageEnv)),
515+
container.WithName(versionUpgradeHookName),
516+
container.WithCommand([]string{"cp", "version-upgrade-hook", "/hooks/version-upgrade"}),
517+
container.WithImage(os.Getenv(versionUpgradeHookImageEnv)),
519518
container.WithImagePullPolicy(corev1.PullAlways),
520519
container.WithVolumeMounts(volumeMount),
521520
)
@@ -525,15 +524,15 @@ func mongodbContainer(version string, volumeMounts []corev1.VolumeMount) contain
525524
mongoDbCommand := []string{
526525
"/bin/sh",
527526
"-c",
528-
// we execute the pre-stop hook once the mongod has been gracefully shut down by the agent.
529-
`while [ ! -f /data/automation-mongod.conf ]; do sleep 3 ; done ; sleep 2 ;
530-
# start mongod with this configuration
531-
mongod -f /data/automation-mongod.conf ;
527+
`
528+
# run post-start hook to handle version changes
529+
/hooks/version-upgrade
530+
531+
# wait for config to be created by the agent
532+
while [ ! -f /data/automation-mongod.conf ]; do sleep 3 ; done ; sleep 2 ;
532533
533-
# start the pre-stop-hook to restart the Pod when needed
534-
# If the Pod does not require to be restarted, the pre-stop-hook will
535-
# exit(0) for Kubernetes to restart the container.
536-
/hooks/pre-stop-hook ;
534+
# start mongod with this configuration
535+
exec mongod -f /data/automation-mongod.conf ;
537536
`,
538537
}
539538

@@ -547,10 +546,6 @@ mongod -f /data/automation-mongod.conf ;
547546
Name: agentHealthStatusFilePathEnv,
548547
Value: "/healthstatus/agent-health-status.json",
549548
},
550-
corev1.EnvVar{
551-
Name: preStopHookLogFilePathEnv,
552-
Value: "/hooks/pre-stop-hook.log",
553-
},
554549
),
555550
container.WithVolumeMounts(volumeMounts),
556551
)
@@ -604,7 +599,7 @@ func buildStatefulSetModificationFunction(mdb mdbv1.MongoDB) statefulset.Modific
604599
podtemplatespec.WithServiceAccount(operatorServiceAccountName),
605600
podtemplatespec.WithContainer(agentName, mongodbAgentContainer([]corev1.VolumeMount{agentHealthStatusVolumeMount, automationConfigVolumeMount, dataVolume})),
606601
podtemplatespec.WithContainer(mongodbName, mongodbContainer(mdb.Spec.Version, []corev1.VolumeMount{mongodHealthStatusVolumeMount, dataVolume, hooksVolumeMount})),
607-
podtemplatespec.WithInitContainer(preStopHookName, preStopHookInit([]corev1.VolumeMount{hooksVolumeMount})),
602+
podtemplatespec.WithInitContainer(versionUpgradeHookName, versionUpgradeHookInit([]corev1.VolumeMount{hooksVolumeMount})),
608603
buildTLSPodSpecModification(mdb),
609604
buildScramPodSpecModification(mdb),
610605
),

scripts/ci/config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
"repo_url": "quay.io/mongodb",
44
"operator_image": "community-operator-dev",
55
"e2e_image": "community-operator-e2e",
6-
"prestop_hook_image": "community-operator-pre-stop-hook",
6+
"version_upgrade_hook_image": "community-operator-version-upgrade-post-start-hook",
77
"testrunner_image": "community-operator-testrunner"
88
}

scripts/dev/dev_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ def e2e_image(self) -> str:
3535
return self._config["e2e_image"]
3636

3737
@property
38-
def prestop_hook_image(self) -> str:
39-
return self._config["prestop_hook_image"]
38+
def version_upgrade_hook_image(self) -> str:
39+
return self._config["version_upgrade_hook_image"]
4040

4141
@property
4242
def testrunner_image(self) -> str:

scripts/dev/e2e.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,11 @@ def build_and_push_e2e(repo_url: str, tag: str, path: str) -> None:
123123
build_and_push_image(repo_url, tag, path, "e2e")
124124

125125

126-
def build_and_push_prehook(repo_url: str, tag: str, path: str) -> None:
126+
def build_and_push_version_upgrade_hook(repo_url: str, tag: str, path: str) -> None:
127127
"""
128-
build_and_push_prehook builds and pushes the pre-stop-hook image.
128+
build_and_push_version_upgrade_hook builds and pushes the version upgrade hook image.
129129
"""
130-
build_and_push_image(repo_url, tag, path, "prehook")
130+
build_and_push_image(repo_url, tag, path, "versionhook")
131131

132132

133133
def _delete_testrunner_pod(config_file: str) -> None:
@@ -216,8 +216,8 @@ def _get_testrunner_pod_body(
216216
"./runner",
217217
"--operatorImage",
218218
f"{dev_config.repo_url}/{dev_config.operator_image}:{tag}",
219-
"--preHookImage",
220-
f"{dev_config.repo_url}/{dev_config.prestop_hook_image}:{tag}",
219+
"--versionUpgradeHookImage",
220+
f"{dev_config.repo_url}/{dev_config.version_upgrade_hook_image}:{tag}",
221221
"--testImage",
222222
f"{dev_config.repo_url}/{dev_config.e2e_image}:{tag}",
223223
f"--test={test}",
@@ -240,7 +240,7 @@ def parse_args() -> argparse.Namespace:
240240
)
241241
parser.add_argument(
242242
"--build-images",
243-
help="Build testrunner, e2e and prestop-hook images",
243+
help="Build testrunner, e2e and version upgrade hook images",
244244
action="store_true",
245245
)
246246
parser.add_argument(
@@ -284,10 +284,10 @@ def build_and_push_images(args: argparse.Namespace, dev_config: DevConfig) -> No
284284
"{}/{}:{}".format(dev_config.repo_url, dev_config.e2e_image, args.tag),
285285
".",
286286
)
287-
build_and_push_prehook(
287+
build_and_push_version_upgrade_hook(
288288
dev_config.repo_url,
289289
"{}/{}:{}".format(
290-
dev_config.repo_url, dev_config.prestop_hook_image, args.tag
290+
dev_config.repo_url, dev_config.version_upgrade_hook_image, args.tag
291291
),
292292
".",
293293
)

0 commit comments

Comments
 (0)