Skip to content

Commit bcd3410

Browse files
committedFeb 7, 2019
Add script to categorize and list functions and data in all sections of an object
1 parent 84be3f0 commit bcd3410

File tree

1 file changed

+357
-0
lines changed

1 file changed

+357
-0
lines changed
 

‎utils/analyze_code_size.py

+357
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,357 @@
1+
#!/usr/bin/env python
2+
3+
import argparse
4+
import re
5+
import subprocess
6+
import sys
7+
8+
9+
def main(arguments):
10+
parser = argparse.ArgumentParser(
11+
description='Analyze the code size in a binary')
12+
parser.add_argument('-arch', type=str,
13+
help='the arch to look at', default='arm64')
14+
parser.add_argument('-categorize', action='store_true',
15+
help='categorize symbols', dest='build_categories',
16+
default=False)
17+
parser.add_argument('-list-category', type=str,
18+
help='list symbols in category')
19+
parser.add_argument('-uncategorized', action='store_true',
20+
help='show all uncategorized symbols',
21+
dest='show_uncategorized',
22+
default=False)
23+
parser.add_argument('bin', help='the binary')
24+
25+
args = parser.parse_args(arguments)
26+
27+
segments = parse_segments(args.bin, args.arch)
28+
29+
if args.build_categories:
30+
categorize(segments)
31+
elif args.show_uncategorized:
32+
uncategorized(segments)
33+
elif args.list_category:
34+
list_category(segments, args.list_category)
35+
else:
36+
show_all(segments)
37+
38+
39+
class Symbol(object):
40+
def __init__(self, name, mangled_name, size):
41+
self.name = name
42+
self.mangled_name = mangled_name
43+
self.count = 1
44+
self.size = int(size)
45+
46+
47+
def get_symbol_size(sym):
48+
return sym.size
49+
50+
51+
class Segment(object):
52+
def __init__(self, name):
53+
self.name = name
54+
self.sections = []
55+
56+
57+
class Section(object):
58+
def __init__(self, name, size):
59+
self.name = name
60+
self.size = size
61+
self.symbols = []
62+
63+
64+
class Category(object):
65+
def __init__(self, name):
66+
self.name = name
67+
self.size = 0
68+
self.symbols = []
69+
70+
def add(self, symbol):
71+
self.symbols.append(symbol)
72+
self.size += symbol.size
73+
74+
75+
class Categories(object):
76+
def __init__(self):
77+
self.category_matching = [
78+
['Objective-C function', re.compile(r'.*[+-]\[')],
79+
['C++', re.compile(r'_+swift')],
80+
['Merged function', re.compile(r'merged ')],
81+
['Key path', re.compile(r'key path')],
82+
['Function signature specialization',
83+
re.compile(r'function signature specialization')],
84+
['Generic specialization', re.compile(r'generic specialization')],
85+
['Reabstraction thunk helper',
86+
re.compile(r'reabstraction thunk helper')],
87+
['vtable thunk', re.compile(r'vtable thunk for')],
88+
['@objc thunk', re.compile(r'@objc')],
89+
['@nonobjc thunk', re.compile(r'@nonobjc')],
90+
['Value witness', re.compile(r'.*value witness for')],
91+
['Block copy helper', re.compile(r'_block_copy_helper')],
92+
['Block destroy helper', re.compile(r'_block_destroy_helper')],
93+
['Block literal global', re.compile(r'___block_literal_global')],
94+
['Destroy helper block', re.compile(r'___destroy_helper_block')],
95+
['Copy helper block', re.compile(r'___copy_helper_block')],
96+
['Object destroy', re.compile(r'_objectdestroy')],
97+
['Partial apply forwarder',
98+
re.compile(r'partial apply forwarder')],
99+
['Closure function', re.compile(r'closure #')],
100+
['ObjC metadata update function',
101+
re.compile(r'ObjC metadata update function for')],
102+
['Variable initialization expression',
103+
re.compile(r'variable initialization expression of')],
104+
['Global initialization', re.compile(r'_globalinit_')],
105+
['Unnamed', re.compile(r'___unnamed_')],
106+
['Dyld stubs', re.compile(r'DYLD-STUB\$')],
107+
['Witness table accessor',
108+
re.compile(r'.*witness table accessor for')],
109+
['Protocol witness', re.compile(r'protocol witness for')],
110+
['Outlined variable', re.compile(r'outlined variable #')],
111+
['Outlined value function (copy,destroy,release...)',
112+
re.compile(r'outlined')],
113+
['_symbolic', re.compile(r'_symbolic')],
114+
['_associated conformance',
115+
re.compile(r'_associated conformance')],
116+
['Direct field offset', re.compile(r'direct field offset for')],
117+
['Value witness tables', re.compile(r'.*value witness table')],
118+
['Protocol witness table',
119+
re.compile(r'.*protocol witness table for')],
120+
['Protocol conformance descriptor',
121+
re.compile(r'protocol conformance descriptor for')],
122+
['Lazy protocol witness table cache var',
123+
re.compile(
124+
r'lazy protocol witness table cache variable for type')],
125+
['Nominal type descriptor',
126+
re.compile(r'nominal type descriptor for')],
127+
['ObjC class', re.compile(r'_OBJC_CLASS_')],
128+
['ObjC metaclass', re.compile(r'_OBJC_METACLASS')],
129+
['ObjC ivar', re.compile(r'_OBJC_IVAR')],
130+
['Metaclass', re.compile(r'metaclass for')],
131+
['Block descriptor', re.compile(r'_+block_descriptor')],
132+
['Extension descriptor', re.compile(r'extension descriptor')],
133+
['Module descriptor', re.compile(r'module descriptor')],
134+
['Associated type descriptor',
135+
re.compile(r'associated type descriptor for')],
136+
['Associated conformance descriptor',
137+
re.compile(r'associated conformance descriptor for')],
138+
['Protocol descriptor', re.compile(r'protocol descriptor for')],
139+
['Base conformance descriptor',
140+
re.compile(r'base conformance descriptor for')],
141+
['Protocol requirements base descriptor',
142+
re.compile(r'protocol requirements base descriptor for')],
143+
['Property descriptor', re.compile(r'property descriptor for')],
144+
['Method descriptor', re.compile(r'method descriptor for')],
145+
['Anonymous descriptor', re.compile(r'anonymous descriptor')],
146+
['Type metadata accessor',
147+
re.compile(r'.*type metadata accessor')],
148+
['Type metadata', re.compile(r'.*type metadata')],
149+
['Reflection metadata descriptor',
150+
re.compile(r'reflection metadata .* descriptor')],
151+
]
152+
153+
self.category_mangled_matching = [
154+
['Swift variable storage', re.compile(r'^_\$s.*[v][p][Z]?$')],
155+
['Swift constructor', re.compile(r'^_\$s.*[f][cC]$')],
156+
['Swift initializer', re.compile(r'^_\$s.*[f][ie]$')],
157+
['Swift destructor/destroyer', re.compile(r'^_\$s.*[f][dDE]$')],
158+
['Swift getter', re.compile(r'^_\$s.*[iv][gG]$')],
159+
['Swift setter', re.compile(r'^_\$s.*[iv][swW]$')],
160+
['Swift materializeForSet', re.compile(r'^_\$s.*[iv][m]$')],
161+
['Swift modify', re.compile(r'^_\$s.*[iv][M]$')],
162+
['Swift read', re.compile(r'^_\$s.*[iv][r]$')],
163+
['Swift addressor', re.compile(r'^_\$s.*[iv][al][uOop]$')],
164+
['Swift function', re.compile(r'^_\$s.*F$')],
165+
['Swift unknown', re.compile(r'^_\$s.*')],
166+
]
167+
self.categories = {}
168+
169+
def categorize_by_name(self, symbol):
170+
for c in self.category_matching:
171+
if c[1].match(symbol.name):
172+
return c[0]
173+
return None
174+
175+
def categorize_by_mangled_name(self, symbol):
176+
for c in self.category_mangled_matching:
177+
if c[1].match(symbol.mangled_name):
178+
return c[0]
179+
return None
180+
181+
def add_symbol(self, category_name, symbol):
182+
existing_category = self.categories.get(category_name)
183+
if existing_category:
184+
existing_category.add(symbol)
185+
else:
186+
new_category = Category(category_name)
187+
new_category.add(symbol)
188+
self.categories[category_name] = new_category
189+
190+
def add(self, symbol):
191+
category_name = self.categorize_by_name(symbol)
192+
if category_name:
193+
self.add_symbol(category_name, symbol)
194+
return
195+
category_name = self.categorize_by_mangled_name(symbol)
196+
if category_name:
197+
self.add_symbol(category_name, symbol)
198+
else:
199+
self.add_symbol('Unknown', symbol)
200+
201+
def categorize(self, symbols):
202+
for sym in symbols:
203+
self.add(sym)
204+
205+
def print_summary(self, section_size):
206+
names = [c[0] for c in self.category_matching]
207+
names.extend([c[0] for c in self.category_mangled_matching])
208+
names.append('Unknown')
209+
total_size = 0
210+
for name in names:
211+
category = self.categories.get(name)
212+
size = 0
213+
if category:
214+
size = category.size
215+
total_size += size
216+
if size > 0:
217+
print("%60s: %8d (%6.2f%%)" %
218+
(name, size, (float(size) * 100) / section_size))
219+
print("%60s: %8d (%6.2f%%)" % ('TOTAL', total_size, float(100)))
220+
221+
def uncatorizedSymbols(self):
222+
category = self.categories.get('Unknown')
223+
if category:
224+
return category.symbols
225+
return None
226+
227+
def print_uncategorizedSymbols(self):
228+
syms = self.uncatorizedSymbols()
229+
if syms:
230+
for symbol in syms:
231+
print(symbol.mangled_name + " " + symbol.name + " " +
232+
str(symbol.size))
233+
234+
def print_category(self, category):
235+
category = self.categories.get(category)
236+
if category:
237+
if category.symbols:
238+
sorted_symbols = sorted(category.symbols, key=get_symbol_size)
239+
for sym in sorted_symbols:
240+
print('%8d %s %s' % (sym.size, sym.name, sym.mangled_name))
241+
242+
def has_category(self, category):
243+
category = self.categories.get(category)
244+
if category:
245+
if category.symbols:
246+
return True
247+
return False
248+
249+
250+
def parse_segments(path, arch):
251+
mangled = subprocess.check_output(
252+
['symbols', '-noSources', '-noDemangling', '-arch', arch, path])
253+
demangle = subprocess.Popen(
254+
['xcrun', 'swift-demangle'], stdin=subprocess.PIPE,
255+
stdout=subprocess.PIPE)
256+
demangled = demangle.communicate(mangled)[0]
257+
symbols = {}
258+
segments = []
259+
segment_regex = re.compile(
260+
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
261+
r"(?P<name>.+?) (?P<name2>.+?)$")
262+
section_regex = re.compile(
263+
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
264+
r"(?P<name>.+?) (?P<name2>.+?)$")
265+
symbol_regex = re.compile(
266+
r"^ 0x[0-9a-f]+ \(\s*0x(?P<size>[0-9a-f]+)\) "
267+
r"(?P<name>.+?) \[[^\]]+\] $")
268+
269+
mangled_lines = mangled.splitlines()
270+
current_line_number = 0
271+
272+
for line in demangled.splitlines():
273+
mangled_line = mangled_lines[current_line_number]
274+
current_line_number += 1
275+
276+
# Match a segment entry.
277+
segment_match = segment_regex.match(line)
278+
if segment_match:
279+
new_segment = Segment(segment_match.group('name'))
280+
segments.append(new_segment)
281+
continue
282+
283+
# Match a section entry.
284+
section_match = section_regex.match(line)
285+
if section_match:
286+
new_section = Section(section_match.group('name2'),
287+
int(section_match.group('size'), 16))
288+
segments[-1].sections.append(new_section)
289+
continue
290+
291+
# Match a symbol entry.
292+
symbol_match = symbol_regex.match(line)
293+
if not symbol_match:
294+
continue
295+
mangled_symbol_match = symbol_regex.match(mangled_line)
296+
if not mangled_symbol_match:
297+
print('mangled and demangled mismatch')
298+
print(mangled_line)
299+
print(line)
300+
assert False
301+
302+
symbol = Symbol(symbol_match.group('name'),
303+
mangled_symbol_match.group('name'),
304+
int(symbol_match.group('size'), 16))
305+
existing = symbols.get(symbol.name)
306+
if existing:
307+
existing.size += symbol.size
308+
else:
309+
symbols[symbol.name] = symbol
310+
segments[-1].sections[-1].symbols.append(symbol)
311+
312+
return segments
313+
314+
315+
def show_all(segments):
316+
for segment in segments:
317+
for section in segment.sections:
318+
symbols = section.symbols
319+
for sym in symbols:
320+
print(str(sym.size) + ' ' + sym.name + ' ' + sym.mangled_name)
321+
322+
323+
def categorize(segments):
324+
for segment in segments:
325+
for section in segment.sections:
326+
print('Section %52s: %8d' %
327+
(segment.name + ';' + section.name, section.size))
328+
symbols = section.symbols
329+
categories = Categories()
330+
categories.categorize(symbols)
331+
categories.print_summary(section.size)
332+
print('')
333+
334+
335+
def uncategorized(segments):
336+
for segment in segments:
337+
for section in segment.sections:
338+
symbols = section.symbols
339+
categories = Categories()
340+
categories.categorize(symbols)
341+
categories.print_uncategorizedSymbols()
342+
343+
344+
def list_category(segments, category):
345+
for segment in segments:
346+
for section in segment.sections:
347+
symbols = section.symbols
348+
categories = Categories()
349+
categories.categorize(symbols)
350+
if categories.has_category(category):
351+
print('Section %22s: %8d' %
352+
(segment.name + ';' + section.name, section.size))
353+
categories.print_category(category)
354+
355+
356+
if __name__ == '__main__':
357+
sys.exit(main(sys.argv[1:]))

0 commit comments

Comments
 (0)