Skip to content

Commit 152e837

Browse files
committed
Allow sanity check to pass without locking the cache
We check the sanity file once, without holding the lock, and if that fails we acquire the lock and check again. This means that if some other process is performing some long running cache operation (e.g building libc) it won't block all emscripten processes from running (e.g. compile processes won't be blocked by link process).
1 parent 9a0a4bc commit 152e837

File tree

4 files changed

+56
-45
lines changed

4 files changed

+56
-45
lines changed

tests/test_other.py

+15
Original file line numberDiff line numberDiff line change
@@ -12290,3 +12290,18 @@ def test_bigint64array_polyfill(self):
1229012290

1229112291
for m, [v1, v2] in output['assertEquals']:
1229212292
self.assertEqual(v1, v2, msg=m)
12293+
12294+
@with_env_modify({'EMCC_FROZEN_CACHE': '0'})
12295+
def test_compile_with_cache_lock(self):
12296+
# Verify that, after warming the cache, running emcc does not require the cache lock.
12297+
# Previously we would acquire the lock during sanity checking (even when the check
12298+
# passed) which meant the second process here would deadlock.
12299+
self.run_process([EMCC, '-c', test_file('hello_world.c')])
12300+
with shared.Cache.lock('testing'):
12301+
self.run_process([EMCC, '-c', test_file('hello_world.c')])
12302+
12303+
@with_env_modify({'EMCC_FROZEN_CACHE': '0'})
12304+
def test_recursive_cache_lock(self):
12305+
with shared.Cache.lock('testing'):
12306+
err = self.run_process([EMBUILDER, 'build', 'libc', '--force'], stderr=PIPE, check=False).stderr
12307+
self.assertContained('AssertionError: attempt to lock the cache while a parent process is holding the lock', err)

tools/cache.py

+12-26
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,6 @@
1616

1717
# Permanent cache for system librarys and ports
1818
class Cache:
19-
# If EM_EXCLUSIVE_CACHE_ACCESS is true, this process is allowed to have direct
20-
# access to the Emscripten cache without having to obtain an interprocess lock
21-
# for it. Generally this is false, and this is used in the case that
22-
# Emscripten process recursively calls to itself when building the cache, in
23-
# which case the parent Emscripten process has already locked the cache.
24-
# Essentially the env. var EM_EXCLUSIVE_CACHE_ACCESS signals from parent to
25-
# child process that the child can reuse the lock that the parent already has
26-
# acquired.
27-
EM_EXCLUSIVE_CACHE_ACCESS = int(os.environ.get('EM_EXCLUSIVE_CACHE_ACCESS', '0'))
2819

2920
def __init__(self, dirname):
3021
# figure out the root directory for all caching
@@ -37,42 +28,37 @@ def __init__(self, dirname):
3728
self.filelock_name = Path(dirname, 'cache.lock')
3829
self.filelock = filelock.FileLock(self.filelock_name)
3930

40-
def acquire_cache_lock(self):
31+
def acquire_cache_lock(self, reason):
4132
if config.FROZEN_CACHE:
4233
# Raise an exception here rather than exit_with_error since in practice this
4334
# should never happen
4435
raise Exception('Attempt to lock the cache but FROZEN_CACHE is set')
4536

46-
if not self.EM_EXCLUSIVE_CACHE_ACCESS and self.acquired_count == 0:
37+
if self.acquired_count == 0:
4738
logger.debug(f'PID {os.getpid()} acquiring multiprocess file lock to Emscripten cache at {self.dirname}')
39+
assert 'EM_CACHE_IS_LOCKED' not in os.environ, 'attempt to lock the cache while a parent process is holding the lock'
4840
try:
4941
self.filelock.acquire(60)
5042
except filelock.Timeout:
51-
# The multiprocess cache locking can be disabled altogether by setting EM_EXCLUSIVE_CACHE_ACCESS=1 environment
52-
# variable before building. (in that case, use "embuilder.py build ALL" to prepopulate the cache)
53-
logger.warning(f'Accessing the Emscripten cache at "{self.dirname}" is taking a long time, another process should be writing to it. If there are none and you suspect this process has deadlocked, try deleting the lock file "{self.filelock_name}" and try again. If this occurs deterministically, consider filing a bug.')
43+
logger.warning(f'Accessing the Emscripten cache at "{self.dirname}" (for "{reason}") is taking a long time, another process should be writing to it. If there are none and you suspect this process has deadlocked, try deleting the lock file "{self.filelock_name}" and try again. If this occurs deterministically, consider filing a bug.')
5444
self.filelock.acquire()
5545

56-
self.prev_EM_EXCLUSIVE_CACHE_ACCESS = os.environ.get('EM_EXCLUSIVE_CACHE_ACCESS')
57-
os.environ['EM_EXCLUSIVE_CACHE_ACCESS'] = '1'
46+
os.environ['EM_CACHE_IS_LOCKED'] = '1'
5847
logger.debug('done')
5948
self.acquired_count += 1
6049

6150
def release_cache_lock(self):
6251
self.acquired_count -= 1
6352
assert self.acquired_count >= 0, "Called release more times than acquire"
64-
if not self.EM_EXCLUSIVE_CACHE_ACCESS and self.acquired_count == 0:
65-
if self.prev_EM_EXCLUSIVE_CACHE_ACCESS:
66-
os.environ['EM_EXCLUSIVE_CACHE_ACCESS'] = self.prev_EM_EXCLUSIVE_CACHE_ACCESS
67-
else:
68-
del os.environ['EM_EXCLUSIVE_CACHE_ACCESS']
53+
if self.acquired_count == 0:
54+
del os.environ['EM_CACHE_IS_LOCKED']
6955
self.filelock.release()
7056
logger.debug(f'PID {os.getpid()} released multiprocess file lock to Emscripten cache at {self.dirname}')
7157

7258
@contextlib.contextmanager
73-
def lock(self):
59+
def lock(self, reason):
7460
"""A context manager that performs actions in the given directory."""
75-
self.acquire_cache_lock()
61+
self.acquire_cache_lock(reason)
7662
try:
7763
yield
7864
finally:
@@ -82,7 +68,7 @@ def ensure(self):
8268
utils.safe_ensure_dirs(self.dirname)
8369

8470
def erase(self):
85-
with self.lock():
71+
with self.lock('erase'):
8672
if self.dirname.exists():
8773
for f in os.listdir(self.dirname):
8874
tempfiles.try_delete(Path(self.dirname, f))
@@ -129,7 +115,7 @@ def erase_lib(self, name):
129115
self.erase_file(self.get_lib_name(name))
130116

131117
def erase_file(self, shortname):
132-
with self.lock():
118+
with self.lock('erase: ' + shortname):
133119
name = Path(self.dirname, shortname)
134120
if name.exists():
135121
logger.info(f'deleting cached file: {name}')
@@ -153,7 +139,7 @@ def get(self, shortname, creator, what=None, force=False):
153139
# should never happen
154140
raise Exception(f'FROZEN_CACHE is set, but cache file is missing: "{shortname}" (in cache root path "{self.dirname}")')
155141

156-
with self.lock():
142+
with self.lock(shortname):
157143
if cachename.exists() and not force:
158144
return str(cachename)
159145
if what is None:

tools/ports/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def fetch_project(name, url, subdir, sha512hash=None):
175175
if local_ports:
176176
logger.warning('using local ports: %s' % local_ports)
177177
local_ports = [pair.split('=', 1) for pair in local_ports.split(',')]
178-
with shared.Cache.lock():
178+
with shared.Cache.lock('local ports'):
179179
for local in local_ports:
180180
if name == local[0]:
181181
path = local[1]
@@ -244,7 +244,7 @@ def up_to_date():
244244

245245
# main logic. do this under a cache lock, since we don't want multiple jobs to
246246
# retrieve the same port at once
247-
with shared.Cache.lock():
247+
with shared.Cache.lock('unpack port'):
248248
if os.path.exists(fullpath):
249249
# Another early out in case another process build the library while we were
250250
# waiting for the lock

tools/shared.py

+27-17
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,8 @@ def check_sanity(force=False):
390390
# not re-run the tests.
391391
os.environ['EMCC_SKIP_SANITY_CHECK'] = '1'
392392

393+
# In DEBUG mode we perform the sanity checks even when
394+
# early return due to the file being up-to-date.
393395
if DEBUG:
394396
force = True
395397

@@ -405,31 +407,39 @@ def check_sanity(force=False):
405407
expected = generate_sanity()
406408

407409
sanity_file = Cache.get_path('sanity.txt')
408-
with Cache.lock():
410+
411+
def sanity_is_correct():
409412
if os.path.exists(sanity_file):
410413
sanity_data = utils.read_file(sanity_file)
411-
if sanity_data != expected:
412-
logger.debug('old sanity: %s' % sanity_data)
413-
logger.debug('new sanity: %s' % expected)
414-
logger.info('(Emscripten: config changed, clearing cache)')
415-
Cache.erase()
416-
# the check actually failed, so definitely write out the sanity file, to
417-
# avoid others later seeing failures too
418-
force = False
419-
else:
414+
if sanity_data == expected:
415+
logger.debug(f'sanity file up-to-date: {sanity_file}')
420416
if force:
421-
logger.debug(f'sanity file up-to-date but check forced: {sanity_file}')
422-
else:
423-
logger.debug(f'sanity file up-to-date: {sanity_file}')
424-
return # all is well
417+
perform_sanity_checks()
418+
return True # all is well
419+
return False
420+
421+
if sanity_is_correct():
422+
# Early return without taking the cache lock
423+
return
424+
425+
with Cache.lock('sanity'):
426+
# Check again once the cache lock as aquired
427+
if sanity_is_correct():
428+
return
429+
430+
if os.path.exists(sanity_file):
431+
sanity_data = utils.read_file(sanity_file)
432+
logger.debug('old sanity: %s' % sanity_data)
433+
logger.debug('new sanity: %s' % expected)
434+
logger.info('(Emscripten: config changed, clearing cache)')
435+
Cache.erase()
425436
else:
426437
logger.debug(f'sanity file not found: {sanity_file}')
427438

428439
perform_sanity_checks()
429440

430-
if not force:
431-
# Only create/update this file if the sanity check succeeded, i.e., we got here
432-
utils.write_file(sanity_file, expected)
441+
# Only create/update this file if the sanity check succeeded, i.e., we got here
442+
utils.write_file(sanity_file, expected)
433443

434444

435445
# Some distributions ship with multiple llvm versions so they add

0 commit comments

Comments
 (0)