Skip to content

Commit 8520671

Browse files
authored
Use simpler locking in the Go 1.17 collector (#975)
A previous PR made it so that the Go 1.17 collector locked only around uses of rmSampleBuf, but really that means that Metric values may be sent over the channel containing some values from future metrics.Read calls. While generally-speaking this isn't a problem, we lose any consistency guarantees provided by the runtime/metrics package. Also, that optimization to not just lock around all of Collect was premature. Truthfully, Collect is called relatively infrequently, and its critical path is fairly fast (10s of µs). To prove it, this change also adds a benchmark. name old time/op new time/op delta GoCollector-16 43.7µs ± 2% 43.2µs ± 2% ~ (p=0.190 n=9+9) Note that because the benchmark is single-threaded it actually looks like it might be getting *slightly* faster, because all those Collect calls for the Metrics are direct calls instead of interface calls. Signed-off-by: Michael Anthony Knyszek <mknyszek@google.com>
1 parent f63e219 commit 8520671

File tree

3 files changed

+35
-17
lines changed

3 files changed

+35
-17
lines changed

prometheus/go_collector_go117.go

+17-16
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,11 @@ import (
3131
type goCollector struct {
3232
base baseGoCollector
3333

34+
// mu protects updates to all fields ensuring a consistent
35+
// snapshot is always produced by Collect.
36+
mu sync.Mutex
37+
3438
// rm... fields all pertain to the runtime/metrics package.
35-
rmSampleMu sync.Mutex
3639
rmSampleBuf []metrics.Sample
3740
rmSampleMap map[string]*metrics.Sample
3841
rmMetrics []collectorMetric
@@ -135,10 +138,16 @@ func (c *goCollector) Collect(ch chan<- Metric) {
135138
// rmSampleBuf. Just read into rmSampleBuf but write all the data
136139
// we get into our Metrics or MemStats.
137140
//
138-
// Note that we cannot simply read and then clone rmSampleBuf
139-
// because we'd need to perform a deep clone of it, which is likely
140-
// not worth it.
141-
c.rmSampleMu.Lock()
141+
// This lock also ensures that the Metrics we send out are all from
142+
// the same updates, ensuring their mutual consistency insofar as
143+
// is guaranteed by the runtime/metrics package.
144+
//
145+
// N.B. This locking is heavy-handed, but Collect is expected to be called
146+
// relatively infrequently. Also the core operation here, metrics.Read,
147+
// is fast (O(tens of microseconds)) so contention should certainly be
148+
// low, though channel operations and any allocations may add to that.
149+
c.mu.Lock()
150+
defer c.mu.Unlock()
142151

143152
// Populate runtime/metrics sample buffer.
144153
metrics.Read(c.rmSampleBuf)
@@ -157,10 +166,13 @@ func (c *goCollector) Collect(ch chan<- Metric) {
157166
if v1 > v0 {
158167
m.Add(unwrapScalarRMValue(sample.Value) - m.get())
159168
}
169+
m.Collect(ch)
160170
case *gauge:
161171
m.Set(unwrapScalarRMValue(sample.Value))
172+
m.Collect(ch)
162173
case *batchHistogram:
163174
m.update(sample.Value.Float64Histogram(), c.exactSumFor(sample.Name))
175+
m.Collect(ch)
164176
default:
165177
panic("unexpected metric type")
166178
}
@@ -169,17 +181,6 @@ func (c *goCollector) Collect(ch chan<- Metric) {
169181
// populate the old metrics from it.
170182
var ms runtime.MemStats
171183
memStatsFromRM(&ms, c.rmSampleMap)
172-
173-
c.rmSampleMu.Unlock()
174-
175-
// Export all the metrics to ch.
176-
// At this point we must not access rmSampleBuf or rmSampleMap, because
177-
// a concurrent caller could use it. It's safe to Collect all our Metrics,
178-
// however, because they're updated in a thread-safe way while MemStats
179-
// is local to this call of Collect.
180-
for _, m := range c.rmMetrics {
181-
m.Collect(ch)
182-
}
183184
for _, i := range c.msMetrics {
184185
ch <- MustNewConstMetric(i.desc, i.valType, i.eval(&ms))
185186
}

prometheus/go_collector_go117_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ func TestGoCollectorConcurrency(t *testing.T) {
292292
go func() {
293293
ch := make(chan Metric)
294294
go func() {
295-
// Drain all metrics recieved until the
295+
// Drain all metrics received until the
296296
// channel is closed.
297297
for range ch {
298298
}

prometheus/go_collector_test.go

+17
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,20 @@ func TestGoCollectorGC(t *testing.T) {
154154
break
155155
}
156156
}
157+
158+
func BenchmarkGoCollector(b *testing.B) {
159+
c := NewGoCollector().(*goCollector)
160+
161+
b.ResetTimer()
162+
for i := 0; i < b.N; i++ {
163+
ch := make(chan Metric, 8)
164+
go func() {
165+
// Drain all metrics received until the
166+
// channel is closed.
167+
for range ch {
168+
}
169+
}()
170+
c.Collect(ch)
171+
close(ch)
172+
}
173+
}

0 commit comments

Comments
 (0)