Skip to content

Commit 9782ac4

Browse files
committed
Allow sanity check to pass without locking the cache
We check the sanity file once, without holding the lock, and if that fails we acquire the lock and check again. This means that if some other process is performing some long running cache operation (e.g building libc) it won't block all emscripten processes from running (e.g. compile processes won't be blocked by link process).
1 parent e9e05b8 commit 9782ac4

File tree

6 files changed

+65
-54
lines changed

6 files changed

+65
-54
lines changed

.circleci/config.yml

+1-7
Original file line numberDiff line numberDiff line change
@@ -538,13 +538,7 @@ jobs:
538538
# skip other.test_stack_switching_size as we do not currently install
539539
# d8 on mac
540540
test_targets: "
541-
other
542-
sockets.test_nodejs_sockets_echo*
543-
core0.test_lua
544-
core0.test_longjmp_standalone
545-
core2.test_sse1
546-
skip:other.test_native_link_error_message
547-
skip:other.test_stack_switching_size"
541+
other.test_recursive_cache_lock"
548542

549543
workflows:
550544
build-test:

emcc.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -3274,16 +3274,19 @@ def consume_arg_file():
32743274
elif check_arg('--cache'):
32753275
config.CACHE = os.path.normpath(consume_arg())
32763276
shared.reconfigure_cache()
3277+
# Ensure child processes share the same cache (e.g. when using emcc to compiler system
3278+
# libraries)
3279+
os.environ['EM_CACHE'] = config.CACHE
32773280
elif check_flag('--clear-cache'):
32783281
logger.info('clearing cache as requested by --clear-cache: `%s`', shared.Cache.dirname)
32793282
shared.Cache.erase()
3280-
shared.check_sanity(force=True) # this is a good time for a sanity check
3283+
shared.perform_sanity_checks() # this is a good time for a sanity check
32813284
should_exit = True
32823285
elif check_flag('--clear-ports'):
32833286
logger.info('clearing ports and cache as requested by --clear-ports')
32843287
ports.clear()
32853288
shared.Cache.erase()
3286-
shared.check_sanity(force=True) # this is a good time for a sanity check
3289+
shared.perform_sanity_checks() # this is a good time for a sanity check
32873290
should_exit = True
32883291
elif check_flag('--check'):
32893292
print(version_string(), file=sys.stderr)

test/test_other.py

+17
Original file line numberDiff line numberDiff line change
@@ -12406,3 +12406,20 @@ def test_warn_once(self):
1240612406
}
1240712407
''')
1240812408
self.do_runf('main.c', 'warning: foo\ndone\n')
12409+
12410+
@with_env_modify({'EM_FROZEN_CACHE': ''})
12411+
def test_compile_with_cache_lock(self):
12412+
# Verify that, after warming the cache, running emcc does not require the cache lock.
12413+
# Previously we would acquire the lock during sanity checking (even when the check
12414+
# passed) which meant the second process here would deadlock.
12415+
self.run_process([EMCC, '-c', test_file('hello_world.c')])
12416+
config.FROZEN_CACHE = False
12417+
with shared.Cache.lock('testing'):
12418+
self.run_process([EMCC, '-c', test_file('hello_world.c')])
12419+
12420+
@with_env_modify({'EM_FROZEN_CACHE': ''})
12421+
def test_recursive_cache_lock(self):
12422+
config.FROZEN_CACHE = False
12423+
with shared.Cache.lock('testing'):
12424+
err = self.run_process([EMBUILDER, 'build', 'libc', '--force'], stderr=PIPE, check=False).stderr
12425+
self.assertContained('AssertionError: attempt to lock the cache while a parent process is holding the lock', err)

tools/cache.py

+13-26
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,6 @@
1616

1717
# Permanent cache for system librarys and ports
1818
class Cache:
19-
# If EM_EXCLUSIVE_CACHE_ACCESS is true, this process is allowed to have direct
20-
# access to the Emscripten cache without having to obtain an interprocess lock
21-
# for it. Generally this is false, and this is used in the case that
22-
# Emscripten process recursively calls to itself when building the cache, in
23-
# which case the parent Emscripten process has already locked the cache.
24-
# Essentially the env. var EM_EXCLUSIVE_CACHE_ACCESS signals from parent to
25-
# child process that the child can reuse the lock that the parent already has
26-
# acquired.
27-
EM_EXCLUSIVE_CACHE_ACCESS = int(os.environ.get('EM_EXCLUSIVE_CACHE_ACCESS', '0'))
2819

2920
def __init__(self, dirname):
3021
# figure out the root directory for all caching
@@ -37,42 +28,38 @@ def __init__(self, dirname):
3728
self.filelock_name = Path(dirname, 'cache.lock')
3829
self.filelock = filelock.FileLock(self.filelock_name)
3930

40-
def acquire_cache_lock(self):
31+
def acquire_cache_lock(self, reason):
4132
if config.FROZEN_CACHE:
4233
# Raise an exception here rather than exit_with_error since in practice this
4334
# should never happen
4435
raise Exception('Attempt to lock the cache but FROZEN_CACHE is set')
4536

46-
if not self.EM_EXCLUSIVE_CACHE_ACCESS and self.acquired_count == 0:
37+
if self.acquired_count == 0:
4738
logger.debug(f'PID {os.getpid()} acquiring multiprocess file lock to Emscripten cache at {self.dirname}')
39+
assert 'EM_CACHE_IS_LOCKED' not in os.environ, f'attempt to lock the cache while a parent process is holding the lock ({reason})'
4840
try:
4941
self.filelock.acquire(60)
5042
except filelock.Timeout:
51-
# The multiprocess cache locking can be disabled altogether by setting EM_EXCLUSIVE_CACHE_ACCESS=1 environment
52-
# variable before building. (in that case, use "embuilder.py build ALL" to prepopulate the cache)
53-
logger.warning(f'Accessing the Emscripten cache at "{self.dirname}" is taking a long time, another process should be writing to it. If there are none and you suspect this process has deadlocked, try deleting the lock file "{self.filelock_name}" and try again. If this occurs deterministically, consider filing a bug.')
43+
logger.warning(f'Accessing the Emscripten cache at "{self.dirname}" (for "{reason}") is taking a long time, another process should be writing to it. If there are none and you suspect this process has deadlocked, try deleting the lock file "{self.filelock_name}" and try again. If this occurs deterministically, consider filing a bug.')
5444
self.filelock.acquire()
5545

56-
self.prev_EM_EXCLUSIVE_CACHE_ACCESS = os.environ.get('EM_EXCLUSIVE_CACHE_ACCESS')
57-
os.environ['EM_EXCLUSIVE_CACHE_ACCESS'] = '1'
46+
os.environ['EM_CACHE_IS_LOCKED'] = '1'
5847
logger.debug('done')
5948
self.acquired_count += 1
6049

6150
def release_cache_lock(self):
6251
self.acquired_count -= 1
6352
assert self.acquired_count >= 0, "Called release more times than acquire"
64-
if not self.EM_EXCLUSIVE_CACHE_ACCESS and self.acquired_count == 0:
65-
if self.prev_EM_EXCLUSIVE_CACHE_ACCESS:
66-
os.environ['EM_EXCLUSIVE_CACHE_ACCESS'] = self.prev_EM_EXCLUSIVE_CACHE_ACCESS
67-
else:
68-
del os.environ['EM_EXCLUSIVE_CACHE_ACCESS']
53+
if self.acquired_count == 0:
54+
assert os.environ['EM_CACHE_IS_LOCKED'] == '1'
55+
del os.environ['EM_CACHE_IS_LOCKED']
6956
self.filelock.release()
7057
logger.debug(f'PID {os.getpid()} released multiprocess file lock to Emscripten cache at {self.dirname}')
7158

7259
@contextlib.contextmanager
73-
def lock(self):
60+
def lock(self, reason):
7461
"""A context manager that performs actions in the given directory."""
75-
self.acquire_cache_lock()
62+
self.acquire_cache_lock(reason)
7663
try:
7764
yield
7865
finally:
@@ -82,7 +69,7 @@ def ensure(self):
8269
utils.safe_ensure_dirs(self.dirname)
8370

8471
def erase(self):
85-
with self.lock():
72+
with self.lock('erase'):
8673
# Delete everything except the lockfile itself
8774
utils.delete_contents(self.dirname, exclude=[os.path.basename(self.filelock_name)])
8875

@@ -128,7 +115,7 @@ def erase_lib(self, name):
128115
self.erase_file(self.get_lib_name(name))
129116

130117
def erase_file(self, shortname):
131-
with self.lock():
118+
with self.lock('erase: ' + shortname):
132119
name = Path(self.dirname, shortname)
133120
if name.exists():
134121
logger.info(f'deleting cached file: {name}')
@@ -152,7 +139,7 @@ def get(self, shortname, creator, what=None, force=False):
152139
# should never happen
153140
raise Exception(f'FROZEN_CACHE is set, but cache file is missing: "{shortname}" (in cache root path "{self.dirname}")')
154141

155-
with self.lock():
142+
with self.lock(shortname):
156143
if cachename.exists() and not force:
157144
return str(cachename)
158145
if what is None:

tools/ports/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def fetch_project(name, url, subdir, sha512hash=None):
174174
if local_ports:
175175
logger.warning('using local ports: %s' % local_ports)
176176
local_ports = [pair.split('=', 1) for pair in local_ports.split(',')]
177-
with shared.Cache.lock():
177+
with shared.Cache.lock('local ports'):
178178
for local in local_ports:
179179
if name == local[0]:
180180
path = local[1]
@@ -243,7 +243,7 @@ def up_to_date():
243243

244244
# main logic. do this under a cache lock, since we don't want multiple jobs to
245245
# retrieve the same port at once
246-
with shared.Cache.lock():
246+
with shared.Cache.lock('unpack port'):
247247
if os.path.exists(fullpath):
248248
# Another early out in case another process build the library while we were
249249
# waiting for the lock

tools/shared.py

+27-17
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,8 @@ def check_sanity(force=False):
389389
# not re-run the tests.
390390
os.environ['EMCC_SKIP_SANITY_CHECK'] = '1'
391391

392+
# In DEBUG mode we perform the sanity checks even when
393+
# early return due to the file being up-to-date.
392394
if DEBUG:
393395
force = True
394396

@@ -404,31 +406,39 @@ def check_sanity(force=False):
404406
expected = generate_sanity()
405407

406408
sanity_file = Cache.get_path('sanity.txt')
407-
with Cache.lock():
409+
410+
def sanity_is_correct():
408411
if os.path.exists(sanity_file):
409412
sanity_data = utils.read_file(sanity_file)
410-
if sanity_data != expected:
411-
logger.debug('old sanity: %s' % sanity_data)
412-
logger.debug('new sanity: %s' % expected)
413-
logger.info('(Emscripten: config changed, clearing cache)')
414-
Cache.erase()
415-
# the check actually failed, so definitely write out the sanity file, to
416-
# avoid others later seeing failures too
417-
force = False
418-
else:
413+
if sanity_data == expected:
414+
logger.debug(f'sanity file up-to-date: {sanity_file}')
419415
if force:
420-
logger.debug(f'sanity file up-to-date but check forced: {sanity_file}')
421-
else:
422-
logger.debug(f'sanity file up-to-date: {sanity_file}')
423-
return # all is well
416+
perform_sanity_checks()
417+
return True # all is well
418+
return False
419+
420+
if sanity_is_correct():
421+
# Early return without taking the cache lock
422+
return
423+
424+
with Cache.lock('sanity'):
425+
# Check again once the cache lock as aquired
426+
if sanity_is_correct():
427+
return
428+
429+
if os.path.exists(sanity_file):
430+
sanity_data = utils.read_file(sanity_file)
431+
logger.debug('old sanity: %s' % sanity_data)
432+
logger.debug('new sanity: %s' % expected)
433+
logger.info('(Emscripten: config changed, clearing cache)')
434+
Cache.erase()
424435
else:
425436
logger.debug(f'sanity file not found: {sanity_file}')
426437

427438
perform_sanity_checks()
428439

429-
if not force:
430-
# Only create/update this file if the sanity check succeeded, i.e., we got here
431-
utils.write_file(sanity_file, expected)
440+
# Only create/update this file if the sanity check succeeded, i.e., we got here
441+
utils.write_file(sanity_file, expected)
432442

433443

434444
# Some distributions ship with multiple llvm versions so they add

0 commit comments

Comments
 (0)