Skip to content

Commit d10ae95

Browse files
committed
:sparkling: Add terminal error
This change adds a terminal error into the reconcile package. The purpose of that error is to still be logged and used in metrics but avoid retrying in situations where it is known that that will not help. It also adds a new metric for just terminal errors, as they often indicate that some kind of human intervention is needed.
1 parent 92646a5 commit d10ae95

File tree

8 files changed

+97
-24
lines changed

8 files changed

+97
-24
lines changed

pkg/controller/controllertest/testing.go

+11-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package controllertest
1818

1919
import (
20+
"sync"
2021
"time"
2122

2223
"k8s.io/apimachinery/pkg/runtime"
@@ -35,28 +36,33 @@ func (ErrorType) GetObjectKind() schema.ObjectKind { return nil }
3536
// DeepCopyObject implements runtime.Object.
3637
func (ErrorType) DeepCopyObject() runtime.Object { return nil }
3738

38-
var _ workqueue.RateLimitingInterface = Queue{}
39+
var _ workqueue.RateLimitingInterface = &Queue{}
3940

4041
// Queue implements a RateLimiting queue as a non-ratelimited queue for testing.
4142
// This helps testing by having functions that use a RateLimiting queue synchronously add items to the queue.
4243
type Queue struct {
4344
workqueue.Interface
45+
AddedRateLimitedLock sync.Mutex
46+
AddedRatelimited []any
4447
}
4548

4649
// AddAfter implements RateLimitingInterface.
47-
func (q Queue) AddAfter(item interface{}, duration time.Duration) {
50+
func (q *Queue) AddAfter(item interface{}, duration time.Duration) {
4851
q.Add(item)
4952
}
5053

5154
// AddRateLimited implements RateLimitingInterface. TODO(community): Implement this.
52-
func (q Queue) AddRateLimited(item interface{}) {
55+
func (q *Queue) AddRateLimited(item interface{}) {
56+
q.AddedRateLimitedLock.Lock()
57+
q.AddedRatelimited = append(q.AddedRatelimited, item)
58+
q.AddedRateLimitedLock.Unlock()
5359
q.Add(item)
5460
}
5561

5662
// Forget implements RateLimitingInterface. TODO(community): Implement this.
57-
func (q Queue) Forget(item interface{}) {}
63+
func (q *Queue) Forget(item interface{}) {}
5864

5965
// NumRequeues implements RateLimitingInterface. TODO(community): Implement this.
60-
func (q Queue) NumRequeues(item interface{}) int {
66+
func (q *Queue) NumRequeues(item interface{}) int {
6167
return 0
6268
}

pkg/handler/eventhandler_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ var _ = Describe("Eventhandler", func() {
4646
var pod *corev1.Pod
4747
var mapper meta.RESTMapper
4848
BeforeEach(func() {
49-
q = controllertest.Queue{Interface: workqueue.New()}
49+
q = &controllertest.Queue{Interface: workqueue.New()}
5050
pod = &corev1.Pod{
5151
ObjectMeta: metav1.ObjectMeta{Namespace: "biz", Name: "baz"},
5252
}

pkg/internal/controller/controller.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,11 @@ func (c *Controller) reconcileHandler(ctx context.Context, obj interface{}) {
314314
result, err := c.Reconcile(ctx, req)
315315
switch {
316316
case err != nil:
317-
c.Queue.AddRateLimited(req)
317+
if errors.Is(err, reconcile.TerminalError(nil)) {
318+
ctrlmetrics.TerminalReconcileErrors.WithLabelValues(c.Name).Inc()
319+
} else {
320+
c.Queue.AddRateLimited(req)
321+
}
318322
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()
319323
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Inc()
320324
log.Error(err, "Reconciler error")

pkg/internal/controller/controller_test.go

+24-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,6 @@ var _ = Describe("controller", func() {
359359
})
360360

361361
It("should requeue a Request if there is an error and continue processing items", func() {
362-
363362
ctx, cancel := context.WithCancel(context.Background())
364363
defer cancel()
365364
go func() {
@@ -372,6 +371,9 @@ var _ = Describe("controller", func() {
372371
By("Invoking Reconciler which will give an error")
373372
fakeReconcile.AddResult(reconcile.Result{}, fmt.Errorf("expected error: reconcile"))
374373
Expect(<-reconciled).To(Equal(request))
374+
queue.AddedRateLimitedLock.Lock()
375+
Expect(queue.AddedRatelimited).To(Equal([]any{request}))
376+
queue.AddedRateLimitedLock.Unlock()
375377

376378
By("Invoking Reconciler a second time without error")
377379
fakeReconcile.AddResult(reconcile.Result{}, nil)
@@ -382,6 +384,27 @@ var _ = Describe("controller", func() {
382384
Eventually(func() int { return queue.NumRequeues(request) }, 1.0).Should(Equal(0))
383385
})
384386

387+
It("should not requeue a Request if there is a terminal error", func() {
388+
ctx, cancel := context.WithCancel(context.Background())
389+
defer cancel()
390+
go func() {
391+
defer GinkgoRecover()
392+
Expect(ctrl.Start(ctx)).NotTo(HaveOccurred())
393+
}()
394+
395+
queue.Add(request)
396+
397+
By("Invoking Reconciler which will give an error")
398+
fakeReconcile.AddResult(reconcile.Result{}, reconcile.TerminalError(fmt.Errorf("expected error: reconcile")))
399+
Expect(<-reconciled).To(Equal(request))
400+
401+
queue.AddedRateLimitedLock.Lock()
402+
Expect(queue.AddedRatelimited).To(BeEmpty())
403+
queue.AddedRateLimitedLock.Unlock()
404+
405+
Expect(queue.Len()).Should(Equal(0))
406+
})
407+
385408
// TODO(directxman12): we should ensure that backoff occurrs with error requeue
386409

387410
It("should not reset backoff until there's a non-error result", func() {

pkg/internal/controller/metrics/metrics.go

+8
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ var (
3939
Help: "Total number of reconciliation errors per controller",
4040
}, []string{"controller"})
4141

42+
// TerminalReconcileErrors is a prometheus counter metrics which holds the total
43+
// number of terminal errors from the Reconciler.
44+
TerminalReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
45+
Name: "controller_runtime_terminal_reconcile_errors_total",
46+
Help: "Total number of terminal reconciliation errors per controller",
47+
}, []string{"controller"})
48+
4249
// ReconcileTime is a prometheus metric which keeps track of the duration
4350
// of reconciliations.
4451
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
@@ -67,6 +74,7 @@ func init() {
6774
metrics.Registry.MustRegister(
6875
ReconcileTotal,
6976
ReconcileErrors,
77+
TerminalReconcileErrors,
7078
ReconcileTime,
7179
WorkerCount,
7280
ActiveWorkers,

pkg/internal/source/internal_test.go

+16-16
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ var _ = Describe("Internal", func() {
7474
set = true
7575
},
7676
}
77-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, funcs, nil)
77+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, funcs, nil)
7878
})
7979

8080
Describe("EventHandler", func() {
@@ -99,38 +99,38 @@ var _ = Describe("Internal", func() {
9999
})
100100

101101
It("should used Predicates to filter CreateEvents", func() {
102-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
102+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
103103
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return false }},
104104
})
105105
set = false
106106
instance.OnAdd(pod)
107107
Expect(set).To(BeFalse())
108108

109109
set = false
110-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
110+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
111111
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
112112
})
113113
instance.OnAdd(pod)
114114
Expect(set).To(BeTrue())
115115

116116
set = false
117-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
117+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
118118
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
119119
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return false }},
120120
})
121121
instance.OnAdd(pod)
122122
Expect(set).To(BeFalse())
123123

124124
set = false
125-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
125+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
126126
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return false }},
127127
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
128128
})
129129
instance.OnAdd(pod)
130130
Expect(set).To(BeFalse())
131131

132132
set = false
133-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
133+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
134134
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
135135
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
136136
})
@@ -157,37 +157,37 @@ var _ = Describe("Internal", func() {
157157

158158
It("should used Predicates to filter UpdateEvents", func() {
159159
set = false
160-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
160+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
161161
predicate.Funcs{UpdateFunc: func(updateEvent event.UpdateEvent) bool { return false }},
162162
})
163163
instance.OnUpdate(pod, newPod)
164164
Expect(set).To(BeFalse())
165165

166166
set = false
167-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
167+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
168168
predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return true }},
169169
})
170170
instance.OnUpdate(pod, newPod)
171171
Expect(set).To(BeTrue())
172172

173173
set = false
174-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
174+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
175175
predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return true }},
176176
predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return false }},
177177
})
178178
instance.OnUpdate(pod, newPod)
179179
Expect(set).To(BeFalse())
180180

181181
set = false
182-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
182+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
183183
predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return false }},
184184
predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return true }},
185185
})
186186
instance.OnUpdate(pod, newPod)
187187
Expect(set).To(BeFalse())
188188

189189
set = false
190-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
190+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
191191
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
192192
predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }},
193193
})
@@ -215,37 +215,37 @@ var _ = Describe("Internal", func() {
215215

216216
It("should used Predicates to filter DeleteEvents", func() {
217217
set = false
218-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
218+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
219219
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return false }},
220220
})
221221
instance.OnDelete(pod)
222222
Expect(set).To(BeFalse())
223223

224224
set = false
225-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
225+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
226226
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }},
227227
})
228228
instance.OnDelete(pod)
229229
Expect(set).To(BeTrue())
230230

231231
set = false
232-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
232+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
233233
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }},
234234
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return false }},
235235
})
236236
instance.OnDelete(pod)
237237
Expect(set).To(BeFalse())
238238

239239
set = false
240-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
240+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
241241
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return false }},
242242
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }},
243243
})
244244
instance.OnDelete(pod)
245245
Expect(set).To(BeFalse())
246246

247247
set = false
248-
instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{
248+
instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{
249249
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }},
250250
predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }},
251251
})

pkg/reconcile/reconcile.go

+24
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package reconcile
1818

1919
import (
2020
"context"
21+
"errors"
2122
"time"
2223

2324
"k8s.io/apimachinery/pkg/types"
@@ -100,3 +101,26 @@ var _ Reconciler = Func(nil)
100101

101102
// Reconcile implements Reconciler.
102103
func (r Func) Reconcile(ctx context.Context, o Request) (Result, error) { return r(ctx, o) }
104+
105+
// TerminalError is an error that will not be retried but still be logged
106+
// and recorded in metrics.
107+
func TerminalError(wrapped error) error {
108+
return &terminalError{err: wrapped}
109+
}
110+
111+
type terminalError struct {
112+
err error
113+
}
114+
115+
func (te *terminalError) Unwrap() error {
116+
return te.err
117+
}
118+
119+
func (te *terminalError) Error() string {
120+
return "terminal error: " + te.err.Error()
121+
}
122+
123+
func (te *terminalError) Is(target error) bool {
124+
tp := &terminalError{}
125+
return errors.As(target, &tp)
126+
}

pkg/reconcile/reconcile_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323

2424
. "github.com/onsi/ginkgo/v2"
2525
. "github.com/onsi/gomega"
26+
apierrors "k8s.io/apimachinery/pkg/api/errors"
2627
"k8s.io/apimachinery/pkg/types"
2728
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2829
)
@@ -88,5 +89,12 @@ var _ = Describe("reconcile", func() {
8889
Expect(actualResult).To(Equal(result))
8990
Expect(actualErr).To(Equal(err))
9091
})
92+
93+
It("should allow unwrapping inner error from terminal error", func() {
94+
inner := apierrors.NewGone("")
95+
terminalError := reconcile.TerminalError(inner)
96+
97+
Expect(apierrors.IsGone(terminalError)).To(BeTrue())
98+
})
9199
})
92100
})

0 commit comments

Comments
 (0)