Skip to content

Commit 86ef3fb

Browse files
test: fix flake in TestAgent_Metrics_SSH (coder#20447)
Closes coder/internal#921 The flake in the linked issue was caused by the startup script taking longer than 1 second in CI. The existing conditional, that the startup script duration was under a second, was incorrect; the correct conditional is that the metric exists with the `success` label set to `true`.
1 parent 13ca9ea commit 86ef3fb

File tree

1 file changed

+78
-28
lines changed

1 file changed

+78
-28
lines changed

agent/agent_test.go

Lines changed: 78 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3481,16 +3481,31 @@ func TestAgent_Metrics_SSH(t *testing.T) {
34813481
err = session.Shell()
34823482
require.NoError(t, err)
34833483

3484-
expected := []*proto.Stats_Metric{
3484+
expected := []struct {
3485+
Name string
3486+
Type proto.Stats_Metric_Type
3487+
CheckFn func(float64) error
3488+
Labels []*proto.Stats_Metric_Label
3489+
}{
34853490
{
3486-
Name: "agent_reconnecting_pty_connections_total",
3487-
Type: proto.Stats_Metric_COUNTER,
3488-
Value: 0,
3491+
Name: "agent_reconnecting_pty_connections_total",
3492+
Type: proto.Stats_Metric_COUNTER,
3493+
CheckFn: func(v float64) error {
3494+
if v == 0 {
3495+
return nil
3496+
}
3497+
return xerrors.Errorf("expected 0, got %f", v)
3498+
},
34893499
},
34903500
{
3491-
Name: "agent_sessions_total",
3492-
Type: proto.Stats_Metric_COUNTER,
3493-
Value: 1,
3501+
Name: "agent_sessions_total",
3502+
Type: proto.Stats_Metric_COUNTER,
3503+
CheckFn: func(v float64) error {
3504+
if v == 1 {
3505+
return nil
3506+
}
3507+
return xerrors.Errorf("expected 1, got %f", v)
3508+
},
34943509
Labels: []*proto.Stats_Metric_Label{
34953510
{
34963511
Name: "magic_type",
@@ -3503,24 +3518,44 @@ func TestAgent_Metrics_SSH(t *testing.T) {
35033518
},
35043519
},
35053520
{
3506-
Name: "agent_ssh_server_failed_connections_total",
3507-
Type: proto.Stats_Metric_COUNTER,
3508-
Value: 0,
3521+
Name: "agent_ssh_server_failed_connections_total",
3522+
Type: proto.Stats_Metric_COUNTER,
3523+
CheckFn: func(v float64) error {
3524+
if v == 0 {
3525+
return nil
3526+
}
3527+
return xerrors.Errorf("expected 0, got %f", v)
3528+
},
35093529
},
35103530
{
3511-
Name: "agent_ssh_server_sftp_connections_total",
3512-
Type: proto.Stats_Metric_COUNTER,
3513-
Value: 0,
3531+
Name: "agent_ssh_server_sftp_connections_total",
3532+
Type: proto.Stats_Metric_COUNTER,
3533+
CheckFn: func(v float64) error {
3534+
if v == 0 {
3535+
return nil
3536+
}
3537+
return xerrors.Errorf("expected 0, got %f", v)
3538+
},
35143539
},
35153540
{
3516-
Name: "agent_ssh_server_sftp_server_errors_total",
3517-
Type: proto.Stats_Metric_COUNTER,
3518-
Value: 0,
3541+
Name: "agent_ssh_server_sftp_server_errors_total",
3542+
Type: proto.Stats_Metric_COUNTER,
3543+
CheckFn: func(v float64) error {
3544+
if v == 0 {
3545+
return nil
3546+
}
3547+
return xerrors.Errorf("expected 0, got %f", v)
3548+
},
35193549
},
35203550
{
3521-
Name: "coderd_agentstats_currently_reachable_peers",
3522-
Type: proto.Stats_Metric_GAUGE,
3523-
Value: 1,
3551+
Name: "coderd_agentstats_currently_reachable_peers",
3552+
Type: proto.Stats_Metric_GAUGE,
3553+
CheckFn: func(v float64) error {
3554+
if v == 1 {
3555+
return nil
3556+
}
3557+
return xerrors.Errorf("expected 1, got %f", v)
3558+
},
35243559
Labels: []*proto.Stats_Metric_Label{
35253560
{
35263561
Name: "connection_type",
@@ -3529,9 +3564,14 @@ func TestAgent_Metrics_SSH(t *testing.T) {
35293564
},
35303565
},
35313566
{
3532-
Name: "coderd_agentstats_currently_reachable_peers",
3533-
Type: proto.Stats_Metric_GAUGE,
3534-
Value: 0,
3567+
Name: "coderd_agentstats_currently_reachable_peers",
3568+
Type: proto.Stats_Metric_GAUGE,
3569+
CheckFn: func(f float64) error {
3570+
if f == 0 {
3571+
return nil
3572+
}
3573+
return xerrors.Errorf("expected 0, got %f", f)
3574+
},
35353575
Labels: []*proto.Stats_Metric_Label{
35363576
{
35373577
Name: "connection_type",
@@ -3540,9 +3580,20 @@ func TestAgent_Metrics_SSH(t *testing.T) {
35403580
},
35413581
},
35423582
{
3543-
Name: "coderd_agentstats_startup_script_seconds",
3544-
Type: proto.Stats_Metric_GAUGE,
3545-
Value: 1,
3583+
Name: "coderd_agentstats_startup_script_seconds",
3584+
Type: proto.Stats_Metric_GAUGE,
3585+
CheckFn: func(f float64) error {
3586+
if f >= 0 {
3587+
return nil
3588+
}
3589+
return xerrors.Errorf("expected >= 0, got %f", f)
3590+
},
3591+
Labels: []*proto.Stats_Metric_Label{
3592+
{
3593+
Name: "success",
3594+
Value: "true",
3595+
},
3596+
},
35463597
},
35473598
}
35483599

@@ -3564,11 +3615,10 @@ func TestAgent_Metrics_SSH(t *testing.T) {
35643615
for _, m := range mf.GetMetric() {
35653616
assert.Equal(t, expected[i].Name, mf.GetName())
35663617
assert.Equal(t, expected[i].Type.String(), mf.GetType().String())
3567-
// Value is max expected
35683618
if expected[i].Type == proto.Stats_Metric_GAUGE {
3569-
assert.GreaterOrEqualf(t, expected[i].Value, m.GetGauge().GetValue(), "expected %s to be greater than or equal to %f, got %f", expected[i].Name, expected[i].Value, m.GetGauge().GetValue())
3619+
assert.NoError(t, expected[i].CheckFn(m.GetGauge().GetValue()), "check fn for %s failed", expected[i].Name)
35703620
} else if expected[i].Type == proto.Stats_Metric_COUNTER {
3571-
assert.GreaterOrEqualf(t, expected[i].Value, m.GetCounter().GetValue(), "expected %s to be greater than or equal to %f, got %f", expected[i].Name, expected[i].Value, m.GetCounter().GetValue())
3621+
assert.NoError(t, expected[i].CheckFn(m.GetCounter().GetValue()), "check fn for %s failed", expected[i].Name)
35723622
}
35733623
for j, lbl := range expected[i].Labels {
35743624
assert.Equal(t, m.GetLabel()[j], &promgo.LabelPair{

0 commit comments

Comments
 (0)