Skip to content

Commit f93da5c

Browse files
committed
[docs] Break up documentation into sections
1 parent 308f8ad commit f93da5c

File tree

13 files changed

+319
-29
lines changed

13 files changed

+319
-29
lines changed

docs/content/docs/operations/configuration.md

+21-9
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ To learn more about metrics and logging configuration please refer to the dedica
5252

5353
## Dynamic Operator Configuration
5454

55-
The Kubernetes operator supports dynamic config changes through the operator ConfigMaps. Dynamic operator configuration is enabled by default, and can be disabled by setting `kubernetes.operator.dynamic.config.enabled` to false. Time interval for checking dynamic config changes is specified by `kubernetes.operator.dynamic.config.check.interval` of which default value is 5 minutes.
55+
The Kubernetes operator supports dynamic config changes through the operator ConfigMaps. Dynamic operator configuration is enabled by default, and can be disabled by setting `kubernetes.operator.dynamic.config.enabled` to false. Time interval for checking dynamic config changes is specified by `kubernetes.operator.dynamic.config.check.interval` of which default value is 5 minutes.
5656

5757
Verify whether dynamic operator configuration updates is enabled via the `deploy/flink-kubernetes-operator` log has:
5858

@@ -70,14 +70,26 @@ Verify whether the config value of `kubernetes.operator.reconcile.interval` is u
7070

7171
## Operator Configuration Reference
7272

73-
{{< generated/kubernetes_operator_config_configuration >}}
73+
### System Configuration
7474

75-
## Job Specific Configuration Reference
75+
General operator system configuration. Cannot be overridden on a per-resource basis.
7676

77-
Job specific configuration can be configured under `spec.flinkConfiguration` and it will override flink configurations defined in `flink-conf.yaml`.
77+
{{< generated/system_section >}}
7878

79-
- For application clusters, `spec.flinkConfiguration` will be located in `FlinkDeployment` CustomResource.
80-
- For session clusters, configuring `spec.flinkConfiguration` in parent `FlinkDeployment` will be applied to all session jobs within the session cluster.
81-
- You can configure some additional job specific supplemental configuration through `spec.flinkConfiguration` in `FlinkSessionJob` CustomResource.
82-
Those session job level configurations will override the parent session cluster's Flink configuration. Please note only the following configurations are considered to be valid configurations.
83-
- `kubernetes.operator.user.artifacts.http.header`
79+
### Resource/User Configuration
80+
81+
These options can be configured on both an operator and a per-resource level. When set under `spec.flinkConfiguration` for the Flink resources it will override the default value provided in the operator default configuration (`flink-conf.yaml`).
82+
83+
{{< generated/dynamic_section >}}
84+
85+
### System Metrics Configuration
86+
87+
Operator system metrics configuration. Cannot be overridden on a per-resource basis.
88+
89+
{{< generated/kubernetes_operator_metric_configuration >}}
90+
91+
### Advanced System Configuration
92+
93+
Advanced operator system configuration. Cannot be overridden on a per-resource basis.
94+
95+
{{< generated/system_advanced_section >}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
<table class="configuration table table-bordered">
2+
<thead>
3+
<tr>
4+
<th class="text-left" style="width: 20%">Key</th>
5+
<th class="text-left" style="width: 15%">Default</th>
6+
<th class="text-left" style="width: 10%">Type</th>
7+
<th class="text-left" style="width: 55%">Description</th>
8+
</tr>
9+
</thead>
10+
<tbody>
11+
<tr>
12+
<td><h5>kubernetes.operator.deployment.readiness.timeout</h5></td>
13+
<td style="word-wrap: break-word;">1 min</td>
14+
<td>Duration</td>
15+
<td>The timeout for deployments to become ready/stable before being rolled back if rollback is enabled.</td>
16+
</tr>
17+
<tr>
18+
<td><h5>kubernetes.operator.deployment.rollback.enabled</h5></td>
19+
<td style="word-wrap: break-word;">false</td>
20+
<td>Boolean</td>
21+
<td>Whether to enable rolling back failed deployment upgrades.</td>
22+
</tr>
23+
<tr>
24+
<td><h5>kubernetes.operator.jm-deployment-recovery.enabled</h5></td>
25+
<td style="word-wrap: break-word;">true</td>
26+
<td>Boolean</td>
27+
<td>Whether to enable recovery of missing/deleted jobmanager deployments.</td>
28+
</tr>
29+
<tr>
30+
<td><h5>kubernetes.operator.job.upgrade.ignore-pending-savepoint</h5></td>
31+
<td style="word-wrap: break-word;">false</td>
32+
<td>Boolean</td>
33+
<td>Whether to ignore pending savepoint during job upgrade.</td>
34+
</tr>
35+
<tr>
36+
<td><h5>kubernetes.operator.job.upgrade.last-state-fallback.enabled</h5></td>
37+
<td style="word-wrap: break-word;">true</td>
38+
<td>Boolean</td>
39+
<td>Enables last-state fallback for savepoint upgrade mode. When the job is not running thus savepoint cannot be triggered but HA metadata is available for last state restore the operator can initiate the upgrade process when the flag is enabled.</td>
40+
</tr>
41+
<tr>
42+
<td><h5>kubernetes.operator.periodic.savepoint.interval</h5></td>
43+
<td style="word-wrap: break-word;">0 ms</td>
44+
<td>Duration</td>
45+
<td>Interval at which periodic savepoints will be triggered. The triggering schedule is not guaranteed, savepoints will be triggered as part of the regular reconcile loop.</td>
46+
</tr>
47+
<tr>
48+
<td><h5>kubernetes.operator.savepoint.history.max.age</h5></td>
49+
<td style="word-wrap: break-word;">86400000 ms</td>
50+
<td>Duration</td>
51+
<td>Maximum age for savepoint history entries to retain. Due to lazy clean-up, the most recent savepoint may live longer than the max age.</td>
52+
</tr>
53+
<tr>
54+
<td><h5>kubernetes.operator.savepoint.history.max.count</h5></td>
55+
<td style="word-wrap: break-word;">10</td>
56+
<td>Integer</td>
57+
<td>Maximum number of savepoint history entries to retain.</td>
58+
</tr>
59+
<tr>
60+
<td><h5>kubernetes.operator.savepoint.trigger.grace-period</h5></td>
61+
<td style="word-wrap: break-word;">1 min</td>
62+
<td>Duration</td>
63+
<td>The interval before a savepoint trigger attempt is marked as unsuccessful.</td>
64+
</tr>
65+
<tr>
66+
<td><h5>kubernetes.operator.user.artifacts.http.header</h5></td>
67+
<td style="word-wrap: break-word;">(none)</td>
68+
<td>Map</td>
69+
<td>Custom HTTP header for HttpArtifactFetcher. The header will be applied when getting the session job artifacts. Expected format: headerKey1:headerValue1,headerKey2:headerValue2.</td>
70+
</tr>
71+
</tbody>
72+
</table>

docs/layouts/shortcodes/generated/kubernetes_operator_config_configuration.html

-6
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,6 @@
8080
<td>Boolean</td>
8181
<td>Enables last-state fallback for savepoint upgrade mode. When the job is not running thus savepoint cannot be triggered but HA metadata is available for last state restore the operator can initiate the upgrade process when the flag is enabled.</td>
8282
</tr>
83-
<tr>
84-
<td><h5>kubernetes.operator.josdk.metrics.enabled</h5></td>
85-
<td style="word-wrap: break-word;">true</td>
86-
<td>Boolean</td>
87-
<td>Enable forwarding of Java Operator SDK metrics to the Flink metric registry.</td>
88-
</tr>
8983
<tr>
9084
<td><h5>kubernetes.operator.observer.progress-check.interval</h5></td>
9185
<td style="word-wrap: break-word;">10 s</td>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<table class="configuration table table-bordered">
2+
<thead>
3+
<tr>
4+
<th class="text-left" style="width: 20%">Key</th>
5+
<th class="text-left" style="width: 15%">Default</th>
6+
<th class="text-left" style="width: 10%">Type</th>
7+
<th class="text-left" style="width: 55%">Description</th>
8+
</tr>
9+
</thead>
10+
<tbody>
11+
<tr>
12+
<td><h5>kubernetes.operator.josdk.metrics.enabled</h5></td>
13+
<td style="word-wrap: break-word;">true</td>
14+
<td>Boolean</td>
15+
<td>Enable forwarding of Java Operator SDK metrics to the Flink metric registry.</td>
16+
</tr>
17+
<tr>
18+
<td><h5>metrics.scope.k8soperator.resource</h5></td>
19+
<td style="word-wrap: break-word;">"&lt;host&gt;.k8soperator.&lt;namespace&gt;.&lt;name&gt;.resource.&lt;resourcens&gt;.&lt;resourcename&gt;"</td>
20+
<td>String</td>
21+
<td>Defines the scope format string that is applied to all metrics scoped to the kubernetes operator resource.</td>
22+
</tr>
23+
<tr>
24+
<td><h5>metrics.scope.k8soperator.resourcens</h5></td>
25+
<td style="word-wrap: break-word;">"&lt;host&gt;.k8soperator.&lt;namespace&gt;.&lt;name&gt;.namespace.&lt;resourcens&gt;"</td>
26+
<td>String</td>
27+
<td>Defines the scope format string that is applied to all metrics scoped to the kubernetes operator resource namespace.</td>
28+
</tr>
29+
<tr>
30+
<td><h5>metrics.scope.k8soperator.system</h5></td>
31+
<td style="word-wrap: break-word;">"&lt;host&gt;.k8soperator.&lt;namespace&gt;.&lt;name&gt;.system"</td>
32+
<td>String</td>
33+
<td>Defines the scope format string that is applied to all metrics scoped to the kubernetes operator.</td>
34+
</tr>
35+
</tbody>
36+
</table>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<table class="configuration table table-bordered">
2+
<thead>
3+
<tr>
4+
<th class="text-left" style="width: 20%">Key</th>
5+
<th class="text-left" style="width: 15%">Default</th>
6+
<th class="text-left" style="width: 10%">Type</th>
7+
<th class="text-left" style="width: 55%">Description</th>
8+
</tr>
9+
</thead>
10+
<tbody>
11+
<tr>
12+
<td><h5>kubernetes.operator.config.cache.size</h5></td>
13+
<td style="word-wrap: break-word;">1000</td>
14+
<td>Integer</td>
15+
<td>Max config cache size.</td>
16+
</tr>
17+
<tr>
18+
<td><h5>kubernetes.operator.config.cache.timeout</h5></td>
19+
<td style="word-wrap: break-word;">10 min</td>
20+
<td>Duration</td>
21+
<td>Expiration time for cached configs.</td>
22+
</tr>
23+
<tr>
24+
<td><h5>kubernetes.operator.dynamic.config.check.interval</h5></td>
25+
<td style="word-wrap: break-word;">5 min</td>
26+
<td>Duration</td>
27+
<td>Time interval for checking config changes.</td>
28+
</tr>
29+
<tr>
30+
<td><h5>kubernetes.operator.dynamic.config.enabled</h5></td>
31+
<td style="word-wrap: break-word;">true</td>
32+
<td>Boolean</td>
33+
<td>Whether to enable on-the-fly config changes through the operator configmap.</td>
34+
</tr>
35+
<tr>
36+
<td><h5>kubernetes.operator.observer.progress-check.interval</h5></td>
37+
<td style="word-wrap: break-word;">10 s</td>
38+
<td>Duration</td>
39+
<td>The interval for observing status for in-progress operations such as deployment and savepoints.</td>
40+
</tr>
41+
<tr>
42+
<td><h5>kubernetes.operator.observer.rest-ready.delay</h5></td>
43+
<td style="word-wrap: break-word;">10 s</td>
44+
<td>Duration</td>
45+
<td>Final delay before deployment is marked ready after port becomes accessible.</td>
46+
</tr>
47+
<tr>
48+
<td><h5>kubernetes.operator.savepoint.history.max.age.threshold</h5></td>
49+
<td style="word-wrap: break-word;">(none)</td>
50+
<td>Duration</td>
51+
<td>Maximum age threshold for savepoint history entries to retain.</td>
52+
</tr>
53+
<tr>
54+
<td><h5>kubernetes.operator.savepoint.history.max.count.threshold</h5></td>
55+
<td style="word-wrap: break-word;">(none)</td>
56+
<td>Integer</td>
57+
<td>Maximum number threshold of savepoint history entries to retain.</td>
58+
</tr>
59+
</tbody>
60+
</table>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
<table class="configuration table table-bordered">
2+
<thead>
3+
<tr>
4+
<th class="text-left" style="width: 20%">Key</th>
5+
<th class="text-left" style="width: 15%">Default</th>
6+
<th class="text-left" style="width: 10%">Type</th>
7+
<th class="text-left" style="width: 55%">Description</th>
8+
</tr>
9+
</thead>
10+
<tbody>
11+
<tr>
12+
<td><h5>kubernetes.operator.dynamic.namespaces.enabled</h5></td>
13+
<td style="word-wrap: break-word;">false</td>
14+
<td>Boolean</td>
15+
<td>Enables dynamic change of watched/monitored namespaces.</td>
16+
</tr>
17+
<tr>
18+
<td><h5>kubernetes.operator.flink.client.cancel.timeout</h5></td>
19+
<td style="word-wrap: break-word;">1 min</td>
20+
<td>Duration</td>
21+
<td>The timeout for the reconciler to wait for flink to cancel job.</td>
22+
</tr>
23+
<tr>
24+
<td><h5>kubernetes.operator.flink.client.timeout</h5></td>
25+
<td style="word-wrap: break-word;">10 s</td>
26+
<td>Duration</td>
27+
<td>The timeout for the observer to wait the flink rest client to return.</td>
28+
</tr>
29+
<tr>
30+
<td><h5>kubernetes.operator.reconcile.interval</h5></td>
31+
<td style="word-wrap: break-word;">1 min</td>
32+
<td>Duration</td>
33+
<td>The interval for the controller to reschedule the reconcile process.</td>
34+
</tr>
35+
<tr>
36+
<td><h5>kubernetes.operator.reconcile.parallelism</h5></td>
37+
<td style="word-wrap: break-word;">5</td>
38+
<td>Integer</td>
39+
<td>The maximum number of threads running the reconciliation loop. Use -1 for infinite.</td>
40+
</tr>
41+
<tr>
42+
<td><h5>kubernetes.operator.resource.cleanup.timeout</h5></td>
43+
<td style="word-wrap: break-word;">1 min</td>
44+
<td>Duration</td>
45+
<td>The timeout for the resource clean up to wait for flink to shutdown cluster.</td>
46+
</tr>
47+
<tr>
48+
<td><h5>kubernetes.operator.retry.initial.interval</h5></td>
49+
<td style="word-wrap: break-word;">5 s</td>
50+
<td>Duration</td>
51+
<td>Initial interval of automatic reconcile retries on recoverable errors.</td>
52+
</tr>
53+
<tr>
54+
<td><h5>kubernetes.operator.retry.interval.multiplier</h5></td>
55+
<td style="word-wrap: break-word;">2.0</td>
56+
<td>Double</td>
57+
<td>Interval multiplier of automatic reconcile retries on recoverable errors.</td>
58+
</tr>
59+
<tr>
60+
<td><h5>kubernetes.operator.retry.max.attempts</h5></td>
61+
<td style="word-wrap: break-word;">10</td>
62+
<td>Integer</td>
63+
<td>Max attempts of automatic reconcile retries on recoverable errors.</td>
64+
</tr>
65+
<tr>
66+
<td><h5>kubernetes.operator.user.artifacts.base.dir</h5></td>
67+
<td style="word-wrap: break-word;">"/opt/flink/artifacts"</td>
68+
<td>String</td>
69+
<td>The base dir to put the session job artifacts.</td>
70+
</tr>
71+
<tr>
72+
<td><h5>kubernetes.operator.watched.namespaces</h5></td>
73+
<td style="word-wrap: break-word;">"JOSDK_ALL_NAMESPACES"</td>
74+
<td>String</td>
75+
<td>Comma separated list of namespaces the operator monitors for custom resources.</td>
76+
</tr>
77+
</tbody>
78+
</table>

flink-kubernetes-docs/src/main/java/org/apache/flink/kubernetes/operator/docs/configuration/ConfigOptionsDocGenerator.java

+3-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ public class ConfigOptionsDocGenerator {
7272
static final OptionsClassLocation[] LOCATIONS =
7373
new OptionsClassLocation[] {
7474
new OptionsClassLocation(
75-
"flink-kubernetes-operator", "org.apache.flink.kubernetes.operator.config")
75+
"flink-kubernetes-operator", "org.apache.flink.kubernetes.operator.config"),
76+
new OptionsClassLocation(
77+
"flink-kubernetes-operator", "org.apache.flink.kubernetes.operator.metrics")
7678
};
7779
static final String DEFAULT_PATH_PREFIX = "src/main/java";
7880

flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/FlinkOperator.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ private void overrideOperatorConfigs(ConfigurationServiceOverrider overrider) {
110110
LOG.info("Configuring operator with {} reconciliation threads.", parallelism);
111111
overrider.withConcurrentReconciliationThreads(parallelism);
112112
}
113-
if (configManager.getOperatorConfiguration().getJosdkMetricsEnabled()) {
113+
if (configManager.getOperatorConfiguration().isJosdkMetricsEnabled()) {
114114
overrider.withMetrics(
115115
new OperatorJosdkMetrics(metricGroup, configManager.getDefaultConfig()));
116116
}

flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/config/FlinkConfigManager.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ public void updateDefaultConfig(Configuration newConf) {
132132
.orElse(Set.of());
133133
this.operatorConfiguration = FlinkOperatorConfiguration.fromConfiguration(newConf);
134134
var newNs = this.operatorConfiguration.getWatchedNamespaces();
135-
if (this.operatorConfiguration.getDynamicNamespacesEnabled() && !oldNs.equals(newNs)) {
135+
if (this.operatorConfiguration.isDynamicNamespacesEnabled() && !oldNs.equals(newNs)) {
136136
this.namespaceListener.accept(operatorConfiguration.getWatchedNamespaces());
137137
}
138138
this.defaultConfig = newConf.clone();

flink-kubernetes-operator/src/main/java/org/apache/flink/kubernetes/operator/config/FlinkOperatorConfiguration.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
package org.apache.flink.kubernetes.operator.config;
2020

2121
import org.apache.flink.configuration.Configuration;
22+
import org.apache.flink.kubernetes.operator.metrics.KubernetesOperatorMetricOptions;
2223
import org.apache.flink.kubernetes.operator.utils.EnvUtils;
2324

2425
import io.javaoperatorsdk.operator.api.config.RetryConfiguration;
@@ -42,8 +43,8 @@ public class FlinkOperatorConfiguration {
4243
Duration flinkClientTimeout;
4344
String flinkServiceHostOverride;
4445
Set<String> watchedNamespaces;
45-
Boolean dynamicNamespacesEnabled;
46-
Boolean josdkMetricsEnabled;
46+
boolean dynamicNamespacesEnabled;
47+
boolean josdkMetricsEnabled;
4748
Duration flinkCancelJobTimeout;
4849
Duration flinkShutdownClusterTimeout;
4950
String artifactsBaseDir;
@@ -110,7 +111,7 @@ public static FlinkOperatorConfiguration fromConfiguration(Configuration operato
110111
KubernetesOperatorConfigOptions.OPERATOR_DYNAMIC_NAMESPACES_ENABLED);
111112

112113
boolean josdkMetricsEnabled =
113-
operatorConfig.get(KubernetesOperatorConfigOptions.OPERATOR_JOSDK_METRICS_ENABLED);
114+
operatorConfig.get(KubernetesOperatorMetricOptions.OPERATOR_JOSDK_METRICS_ENABLED);
114115

115116
RetryConfiguration retryConfiguration = new FlinkOperatorRetryConfiguration(operatorConfig);
116117

0 commit comments

Comments
 (0)