Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ Join our discord community via [this invite link](https://discord.gg/bxgXW8jJGh)
| <a name="input_runners_scale_up_lambda_memory_size"></a> [runners\_scale\_up\_lambda\_memory\_size](#input\_runners\_scale\_up\_lambda\_memory\_size) | Memory size limit in MB for scale-up lambda. | `number` | `512` | no |
| <a name="input_runners_scale_up_lambda_timeout"></a> [runners\_scale\_up\_lambda\_timeout](#input\_runners\_scale\_up\_lambda\_timeout) | Time out for the scale up lambda in seconds. | `number` | `30` | no |
| <a name="input_runners_ssm_housekeeper"></a> [runners\_ssm\_housekeeper](#input\_runners\_ssm\_housekeeper) | Configuration for the SSM housekeeper lambda. This lambda deletes token / JIT config from SSM.<br/><br/> `schedule_expression`: is used to configure the schedule for the lambda.<br/> `enabled`: enable or disable the lambda trigger via the EventBridge.<br/> `lambda_memory_size`: lambda memery size limit.<br/> `lambda_timeout`: timeout for the lambda in seconds.<br/> `config`: configuration for the lambda function. Token path will be read by default from the module. | <pre>object({<br/> schedule_expression = optional(string, "rate(1 day)")<br/> enabled = optional(bool, true)<br/> lambda_memory_size = optional(number, 512)<br/> lambda_timeout = optional(number, 60)<br/> config = object({<br/> tokenPath = optional(string)<br/> minimumDaysOld = optional(number, 1)<br/> dryRun = optional(bool, false)<br/> })<br/> })</pre> | <pre>{<br/> "config": {}<br/>}</pre> | no |
| <a name="input_scale_down_parameter_store_tier"></a> [scale\_down\_parameter\_store\_tier](#input\_scale\_down\_parameter\_store\_tier) | SSM Parameter Store tier to use for scale-down configuration parameters. | `string` | `"Standard"` | no |
| <a name="input_scale_down_schedule_expression"></a> [scale\_down\_schedule\_expression](#input\_scale\_down\_schedule\_expression) | Scheduler expression to check every x for scale down. | `string` | `"cron(*/5 * * * ? *)"` | no |
| <a name="input_scale_up_reserved_concurrent_executions"></a> [scale\_up\_reserved\_concurrent\_executions](#input\_scale\_up\_reserved\_concurrent\_executions) | Amount of reserved concurrent executions for the scale-up lambda function. A value of 0 disables lambda from being triggered and -1 removes any concurrency limitations. | `number` | `1` | no |
| <a name="input_ssm_paths"></a> [ssm\_paths](#input\_ssm\_paths) | The root path used in SSM to store configuration and secrets. | <pre>object({<br/> root = optional(string, "github-action-runners")<br/> app = optional(string, "app")<br/> runners = optional(string, "runners")<br/> webhook = optional(string, "webhook")<br/> use_prefix = optional(bool, true)<br/> })</pre> | `{}` | no |
Expand Down
2 changes: 1 addition & 1 deletion examples/multi-runner/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ module "runners" {
# runner_extra_labels = ["amazon"]
# runners_maximum_count = 1
# enable_ephemeral_runners = true
# scale_down_schedule_expression = "cron(* * * * ? *)"
# }
# }
# }
Expand All @@ -107,6 +106,7 @@ module "runners" {
subnet_ids = module.base.vpc.private_subnets
runners_scale_up_lambda_timeout = 60
runners_scale_down_lambda_timeout = 60
scale_down_schedule_expression = "cron(* * * * ? *)"
prefix = local.environment
tags = {
Project = "ProjectX"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ runner_config:
id_ssm_parameter_arn: ${ami_id_ssm_parameter_arn}
runners_maximum_count: 1
delay_webhook_event: 0
scale_down_schedule_expression: cron(* * * * ? *)
runner_hook_job_started: |
echo "Running pre job hook as $(whoami)"
runner_hook_job_completed: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ runner_config:
- m5a.large
runners_maximum_count: 1
delay_webhook_event: 0
scale_down_schedule_expression: cron(* * * * ? *)
userdata_template: ./templates/user-data.sh
ami:
owners:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ runner_config:
- m5a.large
runners_maximum_count: 1
delay_webhook_event: 0
scale_down_schedule_expression: cron(* * * * ? *)
userdata_template: ./templates/user-data.sh
ami:
owners:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ runner_config:
enable_on_demand_failover_for_errors: ['InsufficientInstanceCapacity']
create_service_linked_role_spot: true
delay_webhook_event: 0
scale_down_schedule_expression: cron(* * * * ? *)
runner_metadata_options:
instance_metadata_tags: disabled
http_endpoint: enabled
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ runner_config:
- c5.large
runners_maximum_count: 1
delay_webhook_event: 5
scale_down_schedule_expression: cron(* * * * ? *)
runner_boot_time_in_minutes: 20
ami_filter:
name:
Expand Down
3 changes: 1 addition & 2 deletions lambdas/functions/control-plane/src/aws/runners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,7 @@ async function createInstances(
}

// If launchTime is undefined, this will return false
export function bootTimeExceeded(ec2Runner: { launchTime?: Date }): boolean {
const runnerBootTimeInMinutes = process.env.RUNNER_BOOT_TIME_IN_MINUTES;
export function bootTimeExceeded(ec2Runner: { launchTime?: Date }, runnerBootTimeInMinutes: number): boolean {
const launchTimePlusBootTime = moment(ec2Runner.launchTime).utc().add(runnerBootTimeInMinutes, 'minutes');
return launchTimePlusBootTime < moment(new Date()).utc();
}
3 changes: 2 additions & 1 deletion lambdas/functions/control-plane/src/modules.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ declare namespace NodeJS {
PARAMETER_GITHUB_APP_CLIENT_SECRET_NAME: string;
PARAMETER_GITHUB_APP_ID_NAME: string;
PARAMETER_GITHUB_APP_KEY_BASE64_NAME: string;
RUNNER_BOOT_TIME_IN_MINUTES: string;
RUNNER_OWNER: string;
SCALE_DOWN_CONFIG: string;
SCALE_DOWN_CONFIG_SSM_PATH_PREFIX: string;
SSM_TOKEN_PATH: string;
SSM_CLEANUP_CONFIG: string;
SUBNET_IDS: string;
Expand Down
7 changes: 4 additions & 3 deletions lambdas/functions/control-plane/src/pool/pool.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export async function adjust(event: PoolEvent): Promise<void> {
const instanceAllocationStrategy = process.env.INSTANCE_ALLOCATION_STRATEGY || 'lowest-price'; // same as AWS default
const runnerOwner = process.env.RUNNER_OWNER;
const amiIdSsmParameterName = process.env.AMI_ID_SSM_PARAMETER_NAME;
const runnerBootTimeInMinutes = parseInt(process.env.RUNNER_BOOT_TIME_IN_MINUTES || '5');
const tracingEnabled = yn(process.env.POWERTOOLS_TRACE_ENABLED, { default: false });
const onDemandFailoverOnError = process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS
? (JSON.parse(process.env.ENABLE_ON_DEMAND_FAILOVER_FOR_ERRORS) as [string])
Expand All @@ -63,7 +64,7 @@ export async function adjust(event: PoolEvent): Promise<void> {
statuses: ['running'],
});

const numberOfRunnersInPool = calculatePooSize(ec2runners, runnerStatusses);
const numberOfRunnersInPool = calculatePooSize(ec2runners, runnerStatusses, runnerBootTimeInMinutes);
const topUp = event.poolSize - numberOfRunnersInPool;

if (topUp > 0) {
Expand Down Expand Up @@ -115,7 +116,7 @@ async function getInstallationId(ghesApiUrl: string, org: string): Promise<numbe
).data.id;
}

function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map<string, RunnerStatus>): number {
function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map<string, RunnerStatus>, runnerBootTimeInMinutes: number): number {
// Runner should be considered idle if it is still booting, or is idle in GitHub
let numberOfRunnersInPool = 0;
for (const ec2Instance of ec2runners) {
Expand All @@ -127,7 +128,7 @@ function calculatePooSize(ec2runners: RunnerList[], runnerStatus: Map<string, Ru
logger.debug(`Runner ${ec2Instance.instanceId} is idle in GitHub and counted as part of the pool`);
} else if (runnerStatus.get(ec2Instance.instanceId) != null) {
logger.debug(`Runner ${ec2Instance.instanceId} is not idle in GitHub and NOT counted as part of the pool`);
} else if (!bootTimeExceeded(ec2Instance)) {
} else if (!bootTimeExceeded(ec2Instance, runnerBootTimeInMinutes)) {
numberOfRunnersInPool++;
logger.info(`Runner ${ec2Instance.instanceId} is still booting and counted as part of the pool`);
} else {
Expand Down
Loading
Loading