Skip to content

Commit 94d156b

Browse files
committed
pprof-heap 估算部分
1 parent ccbda40 commit 94d156b

File tree

1 file changed

+138
-61
lines changed

1 file changed

+138
-61
lines changed

pprof.md

Lines changed: 138 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ profiles.m = map[string]*Profile{
2222
```go
2323

2424
var allocsProfile = &Profile{
25-
name: "allocs",
26-
count: countHeap, // identical to heap profile
27-
write: writeAlloc,
25+
name: "allocs",
26+
count: countHeap, // identical to heap profile
27+
write: writeAlloc,
2828
}
2929
```
3030
- writeAlloc (主要涉及以下几个 api)
@@ -40,14 +40,14 @@ var allocsProfile = &Profile{
4040
// collection cycle.
4141
func ReadMemStats(m *MemStats) {
4242
// STW 操作
43-
stopTheWorld("read mem stats")
43+
stopTheWorld("read mem stats")
4444
// systemstack 切换
45-
systemstack(func() {
45+
systemstack(func() {
4646
// 将 memstats 通过 copy 操作复制给 m
47-
readmemstats_m(m)
48-
})
47+
readmemstats_m(m)
48+
})
4949

50-
startTheWorld()
50+
startTheWorld()
5151
}
5252
```
5353

@@ -74,12 +74,12 @@ func ReadMemStats(m *MemStats) {
7474
// the testing package's -test.memprofile flag instead
7575
// of calling MemProfile directly.
7676
func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
77-
lock(&proflock)
78-
// If we're between mProf_NextCycle and mProf_Flush, take care
79-
// of flushing to the active profile so we only have to look
80-
// at the active profile below.
81-
mProf_FlushLocked()
82-
clear := true
77+
lock(&proflock)
78+
// If we're between mProf_NextCycle and mProf_Flush, take care
79+
// of flushing to the active profile so we only have to look
80+
// at the active profile below.
81+
mProf_FlushLocked()
82+
clear := true
8383
/*
8484
* 记住这个 mbuckets -- memory profile buckets
8585
* allocs 的采样都是记录在这个全局变量内, 下面会进行详细分析
@@ -91,46 +91,46 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
9191
* runtime.bucket *runtime.mbuckets;
9292
* (gdb)
9393
*/
94-
for b := mbuckets; b != nil; b = b.allnext {
95-
mp := b.mp()
96-
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
97-
n++
98-
}
99-
if mp.active.allocs != 0 || mp.active.frees != 0 {
100-
clear = false
101-
}
102-
}
103-
if clear {
104-
// Absolutely no data, suggesting that a garbage collection
105-
// has not yet happened. In order to allow profiling when
106-
// garbage collection is disabled from the beginning of execution,
107-
// accumulate all of the cycles, and recount buckets.
108-
n = 0
109-
for b := mbuckets; b != nil; b = b.allnext {
110-
mp := b.mp()
111-
for c := range mp.future {
112-
mp.active.add(&mp.future[c])
113-
mp.future[c] = memRecordCycle{}
114-
}
115-
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
116-
n++
117-
}
118-
}
119-
}
120-
if n <= len(p) {
121-
ok = true
122-
idx := 0
123-
for b := mbuckets; b != nil; b = b.allnext {
124-
mp := b.mp()
125-
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
94+
for b := mbuckets; b != nil; b = b.allnext {
95+
mp := b.mp()
96+
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
97+
n++
98+
}
99+
if mp.active.allocs != 0 || mp.active.frees != 0 {
100+
clear = false
101+
}
102+
}
103+
if clear {
104+
// Absolutely no data, suggesting that a garbage collection
105+
// has not yet happened. In order to allow profiling when
106+
// garbage collection is disabled from the beginning of execution,
107+
// accumulate all of the cycles, and recount buckets.
108+
n = 0
109+
for b := mbuckets; b != nil; b = b.allnext {
110+
mp := b.mp()
111+
for c := range mp.future {
112+
mp.active.add(&mp.future[c])
113+
mp.future[c] = memRecordCycle{}
114+
}
115+
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
116+
n++
117+
}
118+
}
119+
}
120+
if n <= len(p) {
121+
ok = true
122+
idx := 0
123+
for b := mbuckets; b != nil; b = b.allnext {
124+
mp := b.mp()
125+
if inuseZero || mp.active.alloc_bytes != mp.active.free_bytes {
126126
// mbuckets 数据拷贝
127-
record(&p[idx], b)
128-
idx++
129-
}
130-
}
131-
}
132-
unlock(&proflock)
133-
return
127+
record(&p[idx], b)
128+
idx++
129+
}
130+
}
131+
}
132+
unlock(&proflock)
133+
return
134134
}
135135
```
136136

@@ -143,12 +143,12 @@ func MemProfile(p []MemProfileRecord, inuseZero bool) (n int, ok bool) {
143143
```go
144144
var mbuckets *bucket // memory profile buckets
145145
type bucket struct {
146-
next *bucket
147-
allnext *bucket
148-
typ bucketType // memBucket or blockBucket (includes mutexProfile)
149-
hash uintptr
150-
size uintptr
151-
nstk uintptr
146+
next *bucket
147+
allnext *bucket
148+
typ bucketType // memBucket or blockBucket (includes mutexProfile)
149+
hash uintptr
150+
size uintptr
151+
nstk uintptr
152152
}
153153
```
154154

@@ -164,7 +164,7 @@ type bucket struct {
164164
------------------
165165
|
166166
|
167-
| create && insert new bucket into mbuckets
167+
| create_or_get && insert_or_update bucket into mbuckets
168168
|
169169
|
170170
--------------------------------------
@@ -223,10 +223,87 @@ var MemProfileRate int = 512 * 1024
223223
224224
(本文讨论的基于 1.14.3 版本, 如有差异请进行版本确认)
225225
226-
#### pprof/mallocs 总结
226+
#### pprof/allocs 总结
227227
- 开启后会对 runtime 产生额外压力, 采样时会在 `runtime malloc` 时记录额外信息以供后续分析
228228
- 可以人为选择是否开启, 以及采样频率, 通过设置 `runtime.MemProfileRate` 参数, 不同 go 版本存在差异(是否默认开启), 与用户代码内是否引用(linker)相关模块/变量有关, 默认大小为 512 KB
229229
230+
`allocs` 部分还包含了 `heap` 情况的近似计算, 放在下一节分析
231+
## heap
232+
>allocs: A sampling of all past memory allocations
233+
234+
>heap: A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.
230235
236+
对比下 `allocs``heap` 官方说明上的区别, 一个是分析所有内存分配的情况, 一个是当前 `heap` 上的分配情况. `heap` 还能使用额外参数运行一次 `GC` 后再进行分析
237+
238+
看起来两者差别很大。。。不过实质上在代码层面两者除了一次 `GC` 可以人为调用以及生成的文件类型不同之外 (debug == 0 的时候) 之外没啥区别.
239+
240+
### heap 采样(伪)
241+
```go
242+
// p 为上文提到过的 MemProfileRecord 采样记录
243+
for _, r := range p {
244+
hideRuntime := true
245+
for tries := 0; tries < 2; tries++ {
246+
stk := r.Stack()
247+
// For heap profiles, all stack
248+
// addresses are return PCs, which is
249+
// what appendLocsForStack expects.
250+
if hideRuntime {
251+
for i, addr := range stk {
252+
if f := runtime.FuncForPC(addr); f != nil && strings.HasPrefix(f.Name(), "runtime.") {
253+
continue
254+
}
255+
// Found non-runtime. Show any runtime uses above it.
256+
stk = stk[i:]
257+
break
258+
}
259+
}
260+
locs = b.appendLocsForStack(locs[:0], stk)
261+
if len(locs) > 0 {
262+
break
263+
}
264+
hideRuntime = false // try again, and show all frames next time.
265+
}
266+
// rate 即为 runtime.MemProfileRate
267+
values[0], values[1] = scaleHeapSample(r.AllocObjects, r.AllocBytes, rate)
268+
values[2], values[3] = scaleHeapSample(r.InUseObjects(), r.InUseBytes(), rate)
269+
var blockSize int64
270+
if r.AllocObjects > 0 {
271+
blockSize = r.AllocBytes / r.AllocObjects
272+
}
273+
b.pbSample(values, locs, func() {
274+
if blockSize != 0 {
275+
b.pbLabel(tagSample_Label, "bytes", "", blockSize)
276+
}
277+
})
278+
}
279+
```
280+
```go
281+
// scaleHeapSample adjusts the data from a heap Sample to
282+
// account for its probability of appearing in the collected
283+
// data. heap profiles are a sampling of the memory allocations
284+
// requests in a program. We estimate the unsampled value by dividing
285+
// each collected sample by its probability of appearing in the
286+
// profile. heap profiles rely on a poisson process to determine
287+
// which samples to collect, based on the desired average collection
288+
// rate R. The probability of a sample of size S to appear in that
289+
// profile is 1-exp(-S/R).
290+
func scaleHeapSample(count, size, rate int64) (int64, int64) {
291+
if count == 0 || size == 0 {
292+
return 0, 0
293+
}
294+
295+
if rate <= 1 {
296+
// if rate==1 all samples were collected so no adjustment is needed.
297+
// if rate<1 treat as unknown and skip scaling.
298+
return count, size
299+
}
300+
301+
avgSize := float64(size) / float64(count)
302+
scale := 1 / (1 - math.Exp(-avgSize/float64(rate)))
303+
304+
return int64(float64(count) * scale), int64(float64(size) * scale)
305+
}
306+
```
307+
为什么要在标题里加个伪? 看上面代码片段也可以注意到, 实质上在 `pprof` 分析的时候并没有扫描所有堆上内存进行分析 (想想也不现实) , 而是通过之前采样出的数据, 进行计算 (现有对象数量, 大小, 采样率等) 来估算出 `heap` 上的情况, 当然给我们参考一般来说是足够了
231308
# 参考资料
232309
https://go-review.googlesource.com/c/go/+/299671

0 commit comments

Comments
 (0)