Skip to content

Commit 13bee4a

Browse files
authored
🌱 Add pre-provision command to check baremetal machines. (#1543)
1 parent d049268 commit 13bee4a

29 files changed

+1781
-40
lines changed

api/v1beta1/conditions_const.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -189,14 +189,6 @@ const (
189189
CheckDiskFailedReason = "CheckDiskFailed"
190190
)
191191

192-
const (
193-
// SSHAfterInstallImageSucceededCondition indicates that the host is reachable via ssh after installImage.
194-
SSHAfterInstallImageSucceededCondition clusterv1.ConditionType = "SSHAfterInstallImageSucceeded"
195-
196-
// SSHAfterInstallImageFailedReason indicates that the host was not reachable via ssh.
197-
SSHAfterInstallImageFailedReason = "SSHAfterInstallImageFailed"
198-
)
199-
200192
const (
201193
// HostAssociateSucceededCondition indicates that a host has been associated.
202194
HostAssociateSucceededCondition clusterv1.ConditionType = "HostAssociateSucceeded"

api/v1beta1/hetznerbaremetalhost_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ const (
161161
// StateRegistering means we are getting hardware details.
162162
StateRegistering ProvisioningState = "registering"
163163

164+
// StatePreProvisioning means we run the pre-provisioning-command (if given).
165+
StatePreProvisioning ProvisioningState = "pre-provisioning"
166+
164167
// StateImageInstalling means we install a new image.
165168
StateImageInstalling ProvisioningState = "image-installing"
166169

controllers/controllers_suite_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,11 @@ var _ = BeforeSuite(func() {
8888
}).SetupWithManager(ctx, testEnv.Manager, controller.Options{})).To(Succeed())
8989

9090
Expect((&HetznerBareMetalHostReconciler{
91-
Client: testEnv.Manager.GetClient(),
92-
APIReader: testEnv.Manager.GetAPIReader(),
93-
RobotClientFactory: testEnv.RobotClientFactory,
94-
SSHClientFactory: testEnv.SSHClientFactory,
91+
Client: testEnv.Manager.GetClient(),
92+
APIReader: testEnv.Manager.GetAPIReader(),
93+
RobotClientFactory: testEnv.RobotClientFactory,
94+
SSHClientFactory: testEnv.SSHClientFactory,
95+
PreProvisionCommand: "dummy-pre-provision-command",
9596
}).SetupWithManager(ctx, testEnv.Manager, controller.Options{})).To(Succeed())
9697

9798
Expect((&HetznerBareMetalMachineReconciler{

controllers/hetznerbaremetalhost_controller.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,12 @@ import (
5252
// HetznerBareMetalHostReconciler reconciles a HetznerBareMetalHost object.
5353
type HetznerBareMetalHostReconciler struct {
5454
client.Client
55-
RateLimitWaitTime time.Duration
56-
APIReader client.Reader
57-
RobotClientFactory robotclient.Factory
58-
SSHClientFactory sshclient.Factory
59-
WatchFilterValue string
55+
RateLimitWaitTime time.Duration
56+
APIReader client.Reader
57+
RobotClientFactory robotclient.Factory
58+
SSHClientFactory sshclient.Factory
59+
WatchFilterValue string
60+
PreProvisionCommand string
6061
}
6162

6263
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=hetznerbaremetalhosts,verbs=get;list;watch;create;update;patch;delete
@@ -200,6 +201,7 @@ func (r *HetznerBareMetalHostReconciler) Reconcile(ctx context.Context, req ctrl
200201
OSSSHSecret: osSSHSecret,
201202
RescueSSHSecret: rescueSSHSecret,
202203
SecretManager: secretManager,
204+
PreProvisionCommand: r.PreProvisionCommand,
203205
})
204206
if err != nil {
205207
return reconcile.Result{}, fmt.Errorf("failed to create scope: %w", err)

controllers/hetznerbaremetalhost_controller_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,9 @@ var _ = Describe("HetznerBareMetalHostReconciler", func() {
348348
"should-state", infrav1.StateImageInstalling)
349349
return false
350350
}, 10*time.Second).Should(BeTrue())
351+
352+
By("Checking if pre-provision-command was executed")
353+
rescueSSHClient.AssertCalled(GinkgoT(), "ExecutePreProvisionCommand", mock.Anything, mock.Anything)
351354
})
352355
})
353356

@@ -898,6 +901,7 @@ name="eth0" model="Realtek Semiconductor Co., Ltd. RTL8111/8168/8411 PCI Express
898901
sshClient.On("Reboot").Return(sshclient.Output{})
899902
sshClient.On("GetCloudInitOutput").Return(sshclient.Output{StdOut: "dummy content of /var/log/cloud-init-output.log"})
900903
sshClient.On("DetectLinuxOnAnotherDisk", mock.Anything).Return(sshclient.Output{})
904+
sshClient.On("ExecutePreProvisionCommand", mock.Anything, mock.Anything).Return(0, "", nil)
901905
sshClient.On("GetInstallImageState").Return(sshclient.InstallImageStateFinished, nil)
902906
sshClient.On("GetResultOfInstallImage").Return(hostpkg.PostInstallScriptFinished, nil)
903907
}

docs/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ This is the official documentation of Cluster API Provider Hetzner. Before start
3434
- [Tilt](/docs/caph/04-developers/02-tilt.md)
3535
- [Releasing](/docs/caph/04-developers/03-releasing.md)
3636
- [Updating Kubernetes version](/docs/caph/04-developers/04-updating-kubernetes-version.md)
37+
- [pre-provision-command](/docs/caph/04-developers/05-pre-provision-command.md)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
---
2+
title: pre-provision-command
3+
metatitle: Cluster API Provider Hetzner Check Bare Metal Server before Provisioning
4+
sidebar: pre-provision-command
5+
description: Documentation on the CAPH pre-provision-command.
6+
---
7+
8+
The `--pre-provision-command` for the caph controller can be used to execute a custom command
9+
before install-image starts.
10+
11+
This provides you a flexible way to check if the bare metal server is healthy.
12+
13+
Example:
14+
15+
```sh
16+
--pre-provision-command=/shared/my-pre-provision-command.sh
17+
```
18+
19+
The script/binary will be copied into the Hetzner Rescue System and executed.
20+
21+
If the exit code is zero, then all is fine.
22+
23+
If the exit code is non-zero, then provisioning of that machine will be stopped.
24+
25+
The CAPH controller runs in a Kubernetes Pod. The container of that pod needs access to the file.
26+
27+
There are several ways to make this command available:
28+
29+
* You could mount a configMap/secret.
30+
* You create a container image, and use that as init-container.
31+
* You build a custom image of CAPH. We do not recommend that.
32+
33+
In this example we use an init-container to provide the script.
34+
35+
In the directory `images/pre-provision-command/` you see these files:
36+
37+
* my-pre-provision-command.sh: A simple Bash script which creates a message and exists with 0.
38+
* Dockerfile: Needed to create a container image.
39+
* build-and-push.sh: A script to build and upload the script to a container registry.
40+
41+
When the container image was uploaded, you need to adapt the CAPH deployment:
42+
43+
```yaml
44+
45+
# Init container, which makes the command available to caph.
46+
initContainers:
47+
- command:
48+
- /bin/sh
49+
- -c
50+
- cp /my-pre-provision-command.sh /shared/
51+
image: ghcr.io/syself/caph-staging:pre-provision-command
52+
imagePullPolicy: Always
53+
name: init-container
54+
resources: {}
55+
terminationMessagePath: /dev/termination-log
56+
terminationMessagePolicy: File
57+
volumeMounts:
58+
- mountPath: /shared
59+
name: shared
60+
61+
62+
# Add this to the args of the caph container:
63+
args:
64+
- --pre-provision-command=/shared/my-pre-provision-command.sh
65+
66+
# Add this to the caph container
67+
volumeMounts:
68+
- mountPath: /shared
69+
name: shared
70+
71+
# Add this after "container"
72+
volumes:
73+
- emptyDir: {}
74+
name: shared
75+
```

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ go 1.22.6
44

55
require (
66
github.com/blang/semver/v4 v4.0.0
7+
github.com/bramvdbogaerde/go-scp v1.5.0
78
github.com/go-logr/logr v1.4.2
89
github.com/go-logr/zapr v1.3.0
910
github.com/hetznercloud/hcloud-go/v2 v2.13.1

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
2626
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
2727
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
2828
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
29+
github.com/bramvdbogaerde/go-scp v1.5.0 h1:a9BinAjTfQh273eh7vd3qUgmBC+bx+3TRDtkZWmIpzM=
30+
github.com/bramvdbogaerde/go-scp v1.5.0/go.mod h1:on2aH5AxaFb2G0N5Vsdy6B0Ml7k9HuHSwfo1y0QzAbQ=
2931
github.com/bwesterb/go-ristretto v1.2.0/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0=
3032
github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
3133
github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
#!/usr/bin/env bash
2+
# Copyright 2025 The Kubernetes Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# This scripts updates the caph deployment in a mgt-cluster. Be sure to be connected to
17+
# a development cluster.
18+
# It takes the current code, creates a new image and updates the deployment.
19+
# By default the image will be uploaded to:
20+
# ghcr.io/syself/caph-staging:dev-$USER-YOUR_GIT_BRANCH
21+
# You can change the image path with the --image-path flag, if you want to use a different registry.
22+
23+
# Usually it is better to write a test, than to use this script.
24+
25+
trap 'echo "Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0")"; exit 3' ERR
26+
set -Eeuo pipefail
27+
28+
image_path="ghcr.io/syself"
29+
30+
while [[ "$#" -gt 0 ]]; do
31+
case $1 in
32+
--image-path)
33+
image_path="$2"
34+
shift
35+
;;
36+
*)
37+
echo "Unknown parameter passed: $1"
38+
exit 1
39+
;;
40+
esac
41+
shift
42+
done
43+
44+
# remove trailing slash
45+
image_path="${image_path%/}"
46+
47+
if ! kubectl cluster-info >/dev/null; then
48+
echo
49+
echo "No kubernetes cluster found."
50+
echo "You can use alm to create a mgt-cluster"
51+
echo "docs: https://github.com/syself/autopilot-lifecycle-manager"
52+
exit 1
53+
fi
54+
55+
branch=$(git branch --show-current)
56+
if [ "$branch" == "" ]; then
57+
echo "failed to get branch name"
58+
exit 1
59+
fi
60+
61+
tag="dev-$USER-$branch"
62+
tag="$(echo -n "$tag" | tr -c 'a-zA-Z0-9_.-' '-')"
63+
64+
image="$image_path/caph-staging:$tag"
65+
66+
echo "Building image: $image"
67+
68+
docker build -f images/caph/Dockerfile -t "$image" .
69+
70+
docker push "$image"
71+
72+
# Note: Up to now changes in the CRD are not supported by this script.
73+
74+
kubectl scale --replicas=1 -n mgt-system deployment/caph-controller-manager
75+
76+
kubectl set image -n mgt-system deployment/caph-controller-manager manager="$image"
77+
78+
kubectl patch deployment -n mgt-system -p '[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"}]' --type='json' caph-controller-manager
79+
80+
kubectl rollout restart -n mgt-system deployment caph-controller-manager
81+
82+
trap "echo 'Interrupted! Exiting...'; exit 1" SIGINT
83+
84+
while ! kubectl rollout status deployment --timeout=3s -n mgt-system caph-controller-manager; do
85+
echo "Rollout failed"
86+
kubectl events -n mgt-system | grep caph-controller-manager | tail -n 5
87+
echo
88+
echo
89+
done
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright 2025 The Kubernetes Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
FROM bash
16+
COPY ./my-pre-provision-command.sh /my-pre-provision-command.sh
17+
RUN chmod +x /my-pre-provision-command.sh
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env bash
2+
# Copyright 2025 The Kubernetes Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# This script builds and pushed the container image for the init-container.
17+
# See README.md for more information.
18+
19+
# Bash Strict Mode: https://github.com/guettli/bash-strict-mode
20+
trap 'echo "Warning: A command has failed. Exiting the script. Line was ($0:$LINENO): $(sed -n "${LINENO}p" "$0")"; exit 3' ERR
21+
set -Eeuo pipefail
22+
23+
DIR="$(dirname "$0")"
24+
25+
docker build -t ghcr.io/syself/caph-staging:pre-provision-command "$DIR"
26+
27+
docker push ghcr.io/syself/caph-staging:pre-provision-command
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
# Copyright 2025 The Kubernetes Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
echo "dummy script for caph --pre-provision-command. All is fine."
17+
exit 0

0 commit comments

Comments
 (0)