diff --git a/.evergreen-tasks.yml b/.evergreen-tasks.yml index 5059e8a8a..8fb63f71c 100644 --- a/.evergreen-tasks.yml +++ b/.evergreen-tasks.yml @@ -502,7 +502,7 @@ tasks: commands: - func: "e2e_test" - - name: e2e_disable_tls_scale_up + - name: e2e_disable_tls_and_scale tags: [ "patch-run" ] commands: - func: "e2e_test" diff --git a/.evergreen.yml b/.evergreen.yml index 41d5d257c..7592cd6a3 100644 --- a/.evergreen.yml +++ b/.evergreen.yml @@ -679,7 +679,7 @@ task_groups: - e2e_tls_rs_external_access - e2e_replica_set_tls_require - e2e_replica_set_tls_certs_secret_prefix - - e2e_disable_tls_scale_up + - e2e_disable_tls_and_scale - e2e_replica_set_tls_require_and_disable # e2e_x509_task_group - e2e_configure_tls_and_x509_simultaneously_st diff --git a/api/v1/mdb/mongodb_validation.go b/api/v1/mdb/mongodb_validation.go index d33d3167c..651daa644 100644 --- a/api/v1/mdb/mongodb_validation.go +++ b/api/v1/mdb/mongodb_validation.go @@ -369,6 +369,25 @@ func noTopologyMigration(newObj, oldObj MongoDbSpec) v1.ValidationResult { return v1.ValidationSuccess() } +func noSimultaneousTLSDisablingAndScaling(newObj, oldObj MongoDbSpec) v1.ValidationResult { + if newObj.ResourceType != ReplicaSet { + return v1.ValidationSuccess() + } + + // Check if TLS is being disabled (was enabled, now disabled) + tlsWasEnabled := oldObj.IsSecurityTLSConfigEnabled() + tlsWillBeDisabled := !newObj.IsSecurityTLSConfigEnabled() + + // Check if members count is changing + membersChanging := oldObj.Members != newObj.Members + + if tlsWasEnabled && tlsWillBeDisabled && membersChanging { + return v1.ValidationError("Cannot disable TLS and change member count simultaneously. Please apply these changes separately.") + } + + return v1.ValidationSuccess() +} + // specWithExactlyOneSchema checks that exactly one among "Project/OpsManagerConfig/CloudManagerConfig" // is configured, doing the "oneOf" validation in the webhook. func specWithExactlyOneSchema(d DbCommonSpec) v1.ValidationResult { @@ -437,6 +456,7 @@ func (m *MongoDB) RunValidations(old *MongoDB) []v1.ValidationResult { updateValidators := []func(newObj MongoDbSpec, oldObj MongoDbSpec) v1.ValidationResult{ resourceTypeImmutable, noTopologyMigration, + noSimultaneousTLSDisablingAndScaling, } var validationResults []v1.ValidationResult diff --git a/api/v1/mdb/mongodb_validation_test.go b/api/v1/mdb/mongodb_validation_test.go index b295ff742..664bc9100 100644 --- a/api/v1/mdb/mongodb_validation_test.go +++ b/api/v1/mdb/mongodb_validation_test.go @@ -80,6 +80,88 @@ func TestMongoDB_ResourceTypeImmutable(t *testing.T) { assert.Errorf(t, err, "'resourceType' cannot be changed once created") } +func TestMongoDB_NoSimultaneousTLSDisablingAndScaling(t *testing.T) { + tests := []struct { + name string + oldTLSEnabled bool + oldMembers int + newTLSEnabled bool + newMembers int + expectError bool + expectedErrorMessage string + }{ + { + name: "Simultaneous TLS disabling and scaling is blocked", + oldTLSEnabled: true, + oldMembers: 3, + newTLSEnabled: false, + newMembers: 5, + expectError: true, + expectedErrorMessage: "Cannot disable TLS and change member count simultaneously. Please apply these changes separately.", + }, + { + name: "TLS disabling without scaling is allowed", + oldTLSEnabled: true, + oldMembers: 3, + newTLSEnabled: false, + newMembers: 3, + expectError: false, + }, + { + name: "Scaling without TLS changes is allowed", + oldTLSEnabled: true, + oldMembers: 3, + newTLSEnabled: true, + newMembers: 5, + expectError: false, + }, + { + name: "TLS enabling with scaling is allowed", + oldTLSEnabled: false, + oldMembers: 3, + newTLSEnabled: true, + newMembers: 5, + expectError: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Build old ReplicaSet + oldBuilder := NewReplicaSetBuilder() + if tt.oldTLSEnabled { + oldBuilder = oldBuilder.SetSecurityTLSEnabled() + } + oldRs := oldBuilder.Build() + oldRs.Spec.CloudManagerConfig = &PrivateCloudConfig{ + ConfigMapRef: ConfigMapRef{Name: "cloud-manager"}, + } + oldRs.Spec.Members = tt.oldMembers + + // Build new ReplicaSet + newBuilder := NewReplicaSetBuilder() + if tt.newTLSEnabled { + newBuilder = newBuilder.SetSecurityTLSEnabled() + } + newRs := newBuilder.Build() + newRs.Spec.CloudManagerConfig = &PrivateCloudConfig{ + ConfigMapRef: ConfigMapRef{Name: "cloud-manager"}, + } + newRs.Spec.Members = tt.newMembers + + // Validate + _, err := newRs.ValidateUpdate(oldRs) + + if tt.expectError { + require.Error(t, err) + assert.Equal(t, tt.expectedErrorMessage, err.Error()) + } else { + assert.NoError(t, err) + } + }) + } +} + func TestSpecProjectOnlyOneValue(t *testing.T) { rs := NewReplicaSetBuilder().Build() rs.Spec.CloudManagerConfig = &PrivateCloudConfig{ diff --git a/changelog/20251024_fix_block_simultaneous_tls_disabling_and_scaling.md b/changelog/20251024_fix_block_simultaneous_tls_disabling_and_scaling.md new file mode 100644 index 000000000..b5e9afd26 --- /dev/null +++ b/changelog/20251024_fix_block_simultaneous_tls_disabling_and_scaling.md @@ -0,0 +1,6 @@ +--- +kind: fix +date: 2025-10-24 +--- + +* **ReplicaSet**: Blocked disabling TLS and changing member count simultaneously. These operations must now be applied separately to prevent configuration inconsistencies. diff --git a/controllers/om/deployment.go b/controllers/om/deployment.go index fd7f7f776..5de396a0e 100644 --- a/controllers/om/deployment.go +++ b/controllers/om/deployment.go @@ -100,24 +100,6 @@ func NewDeployment() Deployment { return ans } -// TLSConfigurationWillBeDisabled checks that if applying this security configuration the Deployment -// TLS configuration will go from Enabled -> Disabled. -func (d Deployment) TLSConfigurationWillBeDisabled(security *mdbv1.Security) bool { - tlsIsCurrentlyEnabled := false - - // To detect that TLS is enabled, it is sufficient to check for the - // d["tls"]["CAFilePath"] attribute to have a value different from nil. - if tlsMap, ok := d["tls"]; ok { - if caFilePath, ok := tlsMap.(map[string]interface{})["CAFilePath"]; ok { - if caFilePath != nil { - tlsIsCurrentlyEnabled = true - } - } - } - - return tlsIsCurrentlyEnabled && !security.IsTLSEnabled() -} - // ConfigureTLS configures the deployment's TLS settings from the security // specification provided by the user in the mongodb resource. func (d Deployment) ConfigureTLS(security *mdbv1.Security, caFilePath string) { diff --git a/controllers/om/deployment_test.go b/controllers/om/deployment_test.go index 8f0808dae..281731187 100644 --- a/controllers/om/deployment_test.go +++ b/controllers/om/deployment_test.go @@ -152,20 +152,6 @@ func TestConfigureSSL_Deployment(t *testing.T) { assert.Equal(t, d["tls"], map[string]any{"clientCertificateMode": string(automationconfig.ClientCertificateModeOptional)}) } -func TestTLSConfigurationWillBeDisabled(t *testing.T) { - d := Deployment{} - d.ConfigureTLS(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: false}}, util.CAFilePathInContainer) - - assert.False(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: false}})) - assert.False(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: true}})) - - d = Deployment{} - d.ConfigureTLS(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: true}}, util.CAFilePathInContainer) - - assert.False(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: true}})) - assert.True(t, d.TLSConfigurationWillBeDisabled(&mdbv1.Security{TLSConfig: &mdbv1.TLSConfig{Enabled: false}})) -} - // TestMergeDeployment_BigReplicaset ensures that adding a big replica set (> 7 members) works correctly and no more than // 7 voting members are added func TestMergeDeployment_BigReplicaset(t *testing.T) { diff --git a/controllers/operator/mongodbreplicaset_controller.go b/controllers/operator/mongodbreplicaset_controller.go index bb30016a9..b79660112 100644 --- a/controllers/operator/mongodbreplicaset_controller.go +++ b/controllers/operator/mongodbreplicaset_controller.go @@ -578,32 +578,8 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c } caFilePath := fmt.Sprintf("%s/ca-pem", util.TLSCaMountPath) - // If current operation is to Disable TLS, then we should the current members of the Replica Set, - // this is, do not scale them up or down util TLS disabling has completed. - shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(conn, r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, membersNumberBefore, rs, log, caFilePath, tlsCertPath) - if err != nil && !isRecovering { - return workflow.Failed(err) - } - - var updatedMembers int - // This lock member logic will be removed soon, we should rather block possibility to disable tls + scale - // Tracked in CLOUDP-349087 - if shouldLockMembers { - // We should not add or remove members during this run, we'll wait for - // TLS to be completely disabled first. - // However, on first reconciliation (membersNumberBefore=0), we need to use replicasTarget - // because the OM deployment is initialized with TLS enabled by default. - log.Debugf("locking members for this reconciliation because TLS was disabled") - if membersNumberBefore == 0 { - updatedMembers = replicasTarget - } else { - updatedMembers = membersNumberBefore - } - } else { - updatedMembers = replicasTarget - } - replicaSet := replicaset.BuildFromMongoDBWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, rs, updatedMembers, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) + replicaSet := replicaset.BuildFromMongoDBWithReplicas(r.imageUrls[mcoConstruct.MongodbImageEnv], r.forceEnterprise, rs, replicasTarget, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) processNames := replicaSet.GetProcessNames() status, additionalReconciliationRequired := r.updateOmAuthentication(ctx, conn, processNames, rs, deploymentOptionsRS.agentCertPath, caFilePath, internalClusterCertPath, isRecovering, log) @@ -668,40 +644,6 @@ func (r *ReconcileMongoDbReplicaSet) updateOmDeploymentRs(ctx context.Context, c return workflow.OK() } -// updateOmDeploymentDisableTLSConfiguration checks if TLS configuration needs -// to be disabled. In which case it will disable it and inform to the calling -// function. -func updateOmDeploymentDisableTLSConfiguration(conn om.Connection, mongoDBImage string, forceEnterprise bool, membersNumberBefore int, rs *mdbv1.MongoDB, log *zap.SugaredLogger, caFilePath, tlsCertPath string) (bool, error) { - tlsConfigWasDisabled := false - - err := conn.ReadUpdateDeployment( - func(d om.Deployment) error { - if !d.TLSConfigurationWillBeDisabled(rs.Spec.GetSecurity()) { - return nil - } - - tlsConfigWasDisabled = true - d.ConfigureTLS(rs.Spec.GetSecurity(), caFilePath) - - // configure as many agents/Pods as we currently have, no more (in case - // there's a scale up change at the same time). - replicaSet := replicaset.BuildFromMongoDBWithReplicas(mongoDBImage, forceEnterprise, rs, membersNumberBefore, rs.CalculateFeatureCompatibilityVersion(), tlsCertPath) - - lastConfig, err := rs.GetLastAdditionalMongodConfigByType(mdbv1.ReplicaSetConfig) - if err != nil { - return err - } - - d.MergeReplicaSet(replicaSet, rs.Spec.AdditionalMongodConfig.ToMap(), lastConfig.ToMap(), log) - - return nil - }, - log, - ) - - return tlsConfigWasDisabled, err -} - func (r *ReconcileMongoDbReplicaSet) OnDelete(ctx context.Context, obj runtime.Object, log *zap.SugaredLogger) error { rs := obj.(*mdbv1.MongoDB) diff --git a/controllers/operator/mongodbreplicaset_controller_test.go b/controllers/operator/mongodbreplicaset_controller_test.go index 787883ffb..35c924de0 100644 --- a/controllers/operator/mongodbreplicaset_controller_test.go +++ b/controllers/operator/mongodbreplicaset_controller_test.go @@ -68,7 +68,7 @@ func TestCreateReplicaSet(t *testing.T) { connection := omConnectionFactory.GetConnection() connection.(*om.MockedOmConnection).CheckDeployment(t, deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rs), "auth", "ssl") - connection.(*om.MockedOmConnection).CheckNumberOfUpdateRequests(t, 2) + connection.(*om.MockedOmConnection).CheckNumberOfUpdateRequests(t, 1) } func TestReplicaSetRace(t *testing.T) { @@ -385,39 +385,6 @@ func TestCreateReplicaSet_TLS(t *testing.T) { assert.Equal(t, "OPTIONAL", sslConfig["clientCertificateMode"]) } -// TestUpdateDeploymentTLSConfiguration a combination of tests checking that: -// -// TLS Disabled -> TLS Disabled: should not lock members -// TLS Disabled -> TLS Enabled: should not lock members -// TLS Enabled -> TLS Enabled: should not lock members -// TLS Enabled -> TLS Disabled: *should lock members* -func TestUpdateDeploymentTLSConfiguration(t *testing.T) { - rsWithTLS := mdbv1.NewReplicaSetBuilder().SetSecurityTLSEnabled().Build() - rsNoTLS := mdbv1.NewReplicaSetBuilder().Build() - deploymentWithTLS := deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rsWithTLS) - deploymentNoTLS := deployment.CreateFromReplicaSet("fake-mongoDBImage", false, rsNoTLS) - - // TLS Disabled -> TLS Disabled - shouldLockMembers, err := updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsNoTLS, zap.S(), util.CAFilePathInContainer, "") - assert.NoError(t, err) - assert.False(t, shouldLockMembers) - - // TLS Disabled -> TLS Enabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentNoTLS), "fake-mongoDBImage", false, 3, rsWithTLS, zap.S(), util.CAFilePathInContainer, "") - assert.NoError(t, err) - assert.False(t, shouldLockMembers) - - // TLS Enabled -> TLS Enabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsWithTLS, zap.S(), util.CAFilePathInContainer, "") - assert.NoError(t, err) - assert.False(t, shouldLockMembers) - - // TLS Enabled -> TLS Disabled - shouldLockMembers, err = updateOmDeploymentDisableTLSConfiguration(om.NewMockedOmConnection(deploymentWithTLS), "fake-mongoDBImage", false, 3, rsNoTLS, zap.S(), util.CAFilePathInContainer, "") - assert.NoError(t, err) - assert.True(t, shouldLockMembers) -} - // TestCreateDeleteReplicaSet checks that no state is left in OpsManager on removal of the replicaset func TestCreateDeleteReplicaSet(t *testing.T) { ctx := context.Background() diff --git a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale.py b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale.py new file mode 100644 index 000000000..f83dcab96 --- /dev/null +++ b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale.py @@ -0,0 +1,85 @@ +import pytest +from kubetester.certs import ISSUER_CA_NAME, create_mongodb_tls_certs +from kubetester.kubetester import fixture as load_fixture +from kubetester.mongodb import MongoDB +from kubetester.operator import Operator +from kubetester.phase import Phase + + +@pytest.fixture(scope="module") +def server_certs(issuer: str, namespace: str): + resource_name = "test-tls-base-rs" + return create_mongodb_tls_certs(ISSUER_CA_NAME, namespace, resource_name, "test-tls-base-rs-cert") + + +@pytest.fixture(scope="module") +def replica_set(namespace: str, server_certs: str, issuer_ca_configmap: str) -> MongoDB: + res = MongoDB.from_yaml(load_fixture("test-tls-base-rs.yaml"), namespace=namespace) + res["spec"]["security"]["tls"]["ca"] = issuer_ca_configmap + + # Set this ReplicaSet to allowSSL mode + # this is the only mode that can go to "disabled" state. + res["spec"]["additionalMongodConfig"] = {"net": {"ssl": {"mode": "allowSSL"}}} + + return res.create() + + +@pytest.mark.e2e_disable_tls_and_scale +def test_install_operator(operator: Operator): + operator.assert_is_running() + + +@pytest.mark.e2e_disable_tls_and_scale +def test_rs_is_running(replica_set: MongoDB): + replica_set.assert_reaches_phase(Phase.Running, timeout=400) + + +@pytest.mark.e2e_disable_tls_and_scale +def test_validation_error_on_simultaneous_tls_disable_and_scale(replica_set: MongoDB): + """Test that attempting to disable TLS and scale simultaneously fails validation.""" + replica_set.load() + replica_set["spec"]["members"] = 5 + replica_set["spec"]["security"]["tls"]["enabled"] = False + del replica_set["spec"]["additionalMongodConfig"] + + try: + replica_set.update() + # If update succeeds, the test should fail + assert False, "Expected validation error but update succeeded" + except Exception as e: + # Verify the error message contains our validation error + error_message = str(e) + assert ( + "Cannot disable TLS and change member count simultaneously" in error_message + ), f"Expected validation error about simultaneous TLS disable and scaling, got: {error_message}" + + +@pytest.mark.e2e_disable_tls_and_scale +def test_scale_up_without_tls_change(replica_set: MongoDB): + """Test that scaling up without TLS changes works.""" + replica_set.load() + replica_set["spec"]["members"] = 5 + + replica_set.update() + replica_set.assert_reaches_phase(Phase.Running, timeout=400) + + +@pytest.mark.e2e_disable_tls_and_scale +def test_disable_tls_without_scaling(replica_set: MongoDB): + """Test that disabling TLS without scaling works.""" + replica_set.load() + replica_set["spec"]["security"]["tls"]["enabled"] = False + del replica_set["spec"]["additionalMongodConfig"] + + replica_set.update() + replica_set.assert_reaches_phase(Phase.Running, timeout=800) + + +@pytest.mark.e2e_disable_tls_and_scale +def test_scale_down_after_tls_change(replica_set: MongoDB): + """Test that scaling down after disabling TLS works.""" + replica_set.load() + replica_set["spec"]["members"] = 3 + + replica_set.update() + replica_set.assert_reaches_phase(Phase.Running, timeout=400) diff --git a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py b/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py deleted file mode 100644 index 3eae37813..000000000 --- a/docker/mongodb-kubernetes-tests/tests/tls/e2e_tls_disable_and_scale_up.py +++ /dev/null @@ -1,55 +0,0 @@ -import pytest -from kubetester.certs import ISSUER_CA_NAME, create_mongodb_tls_certs -from kubetester.kubetester import fixture as load_fixture -from kubetester.mongodb import MongoDB -from kubetester.operator import Operator -from kubetester.phase import Phase - - -@pytest.fixture(scope="module") -def server_certs(issuer: str, namespace: str): - resource_name = "test-tls-base-rs" - return create_mongodb_tls_certs(ISSUER_CA_NAME, namespace, resource_name, "test-tls-base-rs-cert") - - -@pytest.fixture(scope="module") -def replica_set(namespace: str, server_certs: str, issuer_ca_configmap: str) -> MongoDB: - res = MongoDB.from_yaml(load_fixture("test-tls-base-rs.yaml"), namespace=namespace) - res["spec"]["security"]["tls"]["ca"] = issuer_ca_configmap - - # Set this ReplicaSet to allowSSL mode - # this is the only mode that can go to "disabled" state. - res["spec"]["additionalMongodConfig"] = {"net": {"ssl": {"mode": "allowSSL"}}} - - return res.create() - - -@pytest.mark.e2e_disable_tls_scale_up -def test_install_operator(operator: Operator): - operator.assert_is_running() - - -@pytest.mark.e2e_disable_tls_scale_up -def test_rs_is_running(replica_set: MongoDB): - replica_set.assert_reaches_phase(Phase.Running, timeout=400) - - -@pytest.mark.e2e_disable_tls_scale_up -def test_tls_is_disabled_and_scaled_up(replica_set: MongoDB): - replica_set.load() - replica_set["spec"]["members"] = 5 - - replica_set.update() - - -@pytest.mark.e2e_disable_tls_scale_up -def test_tls_is_disabled_and_scaled_up(replica_set: MongoDB): - replica_set.load() - replica_set["spec"]["security"]["tls"]["enabled"] = False - del replica_set["spec"]["additionalMongodConfig"] - - replica_set.update() - - # timeout is longer because the operator first needs to - # disable TLS and then, scale down one by one. - replica_set.assert_reaches_phase(Phase.Running, timeout=800)