|
1 | 1 | import asyncio
|
2 | 2 |
|
3 | 3 | import aiohttp
|
4 |
| -from traitlets import Instance, Integer, Float, Dict, Union, Unicode, default |
| 4 | +from traitlets import ( |
| 5 | + Instance, |
| 6 | + Integer, |
| 7 | + Float, |
| 8 | + Dict, |
| 9 | + Union, |
| 10 | + Unicode, |
| 11 | + default, |
| 12 | + validate, |
| 13 | + observe, |
| 14 | +) |
5 | 15 | from traitlets.config import LoggingConfigurable, Configurable
|
6 | 16 |
|
7 | 17 | from .. import models
|
8 | 18 | from ..options import Options
|
9 | 19 | from ..traitlets import MemoryLimit, Type, Callable, Command
|
10 |
| -from ..utils import awaitable |
| 20 | +from ..utils import awaitable, format_bytes |
11 | 21 |
|
12 | 22 |
|
13 | 23 | __all__ = ("Backend", "ClusterConfig")
|
@@ -320,6 +330,126 @@ class ClusterConfig(Configurable):
|
320 | 330 | config=True,
|
321 | 331 | )
|
322 | 332 |
|
| 333 | + cluster_max_memory = MemoryLimit( |
| 334 | + None, |
| 335 | + help=""" |
| 336 | + The maximum amount of memory (in bytes) available to this cluster. |
| 337 | + Allows the following suffixes: |
| 338 | +
|
| 339 | + - K -> Kibibytes |
| 340 | + - M -> Mebibytes |
| 341 | + - G -> Gibibytes |
| 342 | + - T -> Tebibytes |
| 343 | +
|
| 344 | + Set to ``None`` for no memory limit (default). |
| 345 | + """, |
| 346 | + min=0, |
| 347 | + allow_none=True, |
| 348 | + config=True, |
| 349 | + ) |
| 350 | + |
| 351 | + cluster_max_cores = Float( |
| 352 | + None, |
| 353 | + help=""" |
| 354 | + The maximum number of cores available to this cluster. |
| 355 | +
|
| 356 | + Set to ``None`` for no cores limit (default). |
| 357 | + """, |
| 358 | + min=0.0, |
| 359 | + allow_none=True, |
| 360 | + config=True, |
| 361 | + ) |
| 362 | + |
| 363 | + cluster_max_workers = Integer( |
| 364 | + help=""" |
| 365 | + The maximum number of workers available to this cluster. |
| 366 | +
|
| 367 | + Note that this will be combined with ``cluster_max_cores`` and |
| 368 | + ``cluster_max_memory`` at runtime to determine the actual maximum |
| 369 | + number of workers available to this cluster. |
| 370 | + """, |
| 371 | + allow_none=True, |
| 372 | + min=0, |
| 373 | + config=True, |
| 374 | + ) |
| 375 | + |
| 376 | + def _check_scheduler_memory(self, scheduler_memory, cluster_max_memory): |
| 377 | + if cluster_max_memory is not None and scheduler_memory > cluster_max_memory: |
| 378 | + memory = format_bytes(scheduler_memory) |
| 379 | + limit = format_bytes(cluster_max_memory) |
| 380 | + raise ValueError( |
| 381 | + f"Scheduler memory request of {memory} exceeds cluster memory " |
| 382 | + f"limit of {limit}" |
| 383 | + ) |
| 384 | + |
| 385 | + def _check_scheduler_cores(self, scheduler_cores, cluster_max_cores): |
| 386 | + if cluster_max_cores is not None and scheduler_cores > cluster_max_cores: |
| 387 | + raise ValueError( |
| 388 | + f"Scheduler cores request of {scheduler_cores} exceeds cluster " |
| 389 | + f"cores limit of {cluster_max_cores}" |
| 390 | + ) |
| 391 | + |
| 392 | + def _worker_limit_from_resources(self): |
| 393 | + inf = max_workers = float("inf") |
| 394 | + if self.cluster_max_memory is not None: |
| 395 | + max_workers = min( |
| 396 | + (self.cluster_max_memory - self.scheduler_memory) // self.worker_memory, |
| 397 | + max_workers, |
| 398 | + ) |
| 399 | + if self.cluster_max_cores is not None: |
| 400 | + max_workers = min( |
| 401 | + (self.cluster_max_cores - self.scheduler_cores) // self.worker_cores, |
| 402 | + max_workers, |
| 403 | + ) |
| 404 | + |
| 405 | + if max_workers == inf: |
| 406 | + return None |
| 407 | + return max(0, int(max_workers)) |
| 408 | + |
| 409 | + @validate("scheduler_memory") |
| 410 | + def _validate_scheduler_memory(self, proposal): |
| 411 | + self._check_scheduler_memory(proposal.value, self.cluster_max_memory) |
| 412 | + return proposal.value |
| 413 | + |
| 414 | + @validate("scheduler_cores") |
| 415 | + def _validate_scheduler_cores(self, proposal): |
| 416 | + self._check_scheduler_cores(proposal.value, self.cluster_max_cores) |
| 417 | + return proposal.value |
| 418 | + |
| 419 | + @validate("cluster_max_memory") |
| 420 | + def _validate_cluster_max_memory(self, proposal): |
| 421 | + self._check_scheduler_memory(self.scheduler_memory, proposal.value) |
| 422 | + return proposal.value |
| 423 | + |
| 424 | + @validate("cluster_max_cores") |
| 425 | + def _validate_cluster_max_cores(self, proposal): |
| 426 | + self._check_scheduler_cores(self.scheduler_cores, proposal.value) |
| 427 | + return proposal.value |
| 428 | + |
| 429 | + @validate("cluster_max_workers") |
| 430 | + def _validate_cluster_max_workers(self, proposal): |
| 431 | + lim = self._worker_limit_from_resources() |
| 432 | + if lim is None: |
| 433 | + return proposal.value |
| 434 | + if proposal.value is None: |
| 435 | + return lim |
| 436 | + return min(proposal.value, lim) |
| 437 | + |
| 438 | + @observe("cluster_max_workers") |
| 439 | + def _observe_cluster_max_workers(self, change): |
| 440 | + # This shouldn't be needed, but traitlet validators don't run |
| 441 | + # if a value is `None` and `allow_none` is true, so we need to |
| 442 | + # add an observer to handle the event of an *explicit* `None` |
| 443 | + # set for `cluster_max_workers` |
| 444 | + if change.new is None: |
| 445 | + lim = self._worker_limit_from_resources() |
| 446 | + if lim is not None: |
| 447 | + self.cluster_max_workers = lim |
| 448 | + |
| 449 | + @default("cluster_max_workers") |
| 450 | + def _default_cluster_max_workers(self): |
| 451 | + return self._worker_limit_from_resources() |
| 452 | + |
323 | 453 | def to_dict(self):
|
324 | 454 | return {
|
325 | 455 | k: getattr(self, k)
|
|
0 commit comments