-
Notifications
You must be signed in to change notification settings - Fork 98
K8s control plane high-availability mode #940
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
fa014a2
b00bdc7
b9b62b8
b91ec0f
71ca4be
c9a464e
a167a9a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Signed-off-by: Lazar Cvetković <[email protected]>
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/sh | ||
|
||
errorExit() { | ||
echo "*** $*" 1>&2 | ||
exit 1 | ||
} | ||
|
||
curl --silent --max-time 2 --insecure https://localhost:8443/ -o /dev/null || errorExit "Error GET https://localhost:8443/" | ||
if ip addr | grep -q 10.0.1.254; then | ||
curl --silent --max-time 2 --insecure https://10.0.1.254:8443/ -o /dev/null || errorExit "Error GET https://10.0.1.254:8443/" | ||
fi |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# /etc/haproxy/haproxy.cfg | ||
#--------------------------------------------------------------------- | ||
# Global settings | ||
#--------------------------------------------------------------------- | ||
global | ||
log /dev/log local0 | ||
log /dev/log local1 notice | ||
daemon | ||
|
||
#--------------------------------------------------------------------- | ||
# common defaults that all the 'listen' and 'backend' sections will | ||
# use if not designated in their block | ||
#--------------------------------------------------------------------- | ||
defaults | ||
mode http | ||
log global | ||
option httplog | ||
option dontlognull | ||
option http-server-close | ||
option forwardfor except 127.0.0.0/8 | ||
option redispatch | ||
retries 1 | ||
timeout http-request 10s | ||
timeout queue 20s | ||
timeout connect 5s | ||
timeout client 20s | ||
timeout server 20s | ||
timeout http-keep-alive 10s | ||
timeout check 10s | ||
|
||
#--------------------------------------------------------------------- | ||
# apiserver frontend which proxys to the control plane nodes | ||
#--------------------------------------------------------------------- | ||
frontend apiserver | ||
bind *:8443 | ||
mode tcp | ||
option tcplog | ||
default_backend apiserverbackend | ||
|
||
#--------------------------------------------------------------------- | ||
# round robin balancing for apiserver | ||
#--------------------------------------------------------------------- | ||
backend apiserverbackend | ||
option httpchk GET /healthz | ||
http-check expect status 200 | ||
mode tcp | ||
option ssl-hello-chk | ||
balance roundrobin | ||
server control_plane_1 10.0.1.1:6443 check | ||
server control_plane_2 10.0.1.2:6443 check | ||
server control_plane_3 10.0.1.3:6443 check | ||
server control_plane_4 10.0.1.4:6443 check | ||
server control_plane_5 10.0.1.5:6443 check | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
! /etc/keepalived/keepalived.conf | ||
! Configuration File for keepalived | ||
global_defs { | ||
router_id LVS_DEVEL | ||
} | ||
vrrp_script check_apiserver { | ||
script "/etc/keepalived/check_apiserver.sh" | ||
interval 3 | ||
weight -2 | ||
fall 10 | ||
rise 2 | ||
} | ||
|
||
vrrp_instance VI_1 { | ||
state BACKUP | ||
interface enp4s0f1 | ||
virtual_router_id 51 | ||
priority 101 | ||
authentication { | ||
auth_type PASS | ||
auth_pass 42 | ||
} | ||
virtual_ipaddress { | ||
10.0.1.254 | ||
} | ||
track_script { | ||
check_apiserver | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
! /etc/keepalived/keepalived.conf | ||
! Configuration File for keepalived | ||
global_defs { | ||
router_id LVS_DEVEL | ||
} | ||
vrrp_script check_apiserver { | ||
script "/etc/keepalived/check_apiserver.sh" | ||
interval 3 | ||
weight -2 | ||
fall 10 | ||
rise 2 | ||
} | ||
|
||
vrrp_instance VI_1 { | ||
state MASTER | ||
interface enp4s0f1 | ||
virtual_router_id 51 | ||
priority 101 | ||
authentication { | ||
auth_type PASS | ||
auth_pass 42 | ||
} | ||
virtual_ipaddress { | ||
10.0.1.254 | ||
} | ||
track_script { | ||
check_apiserver | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,21 +25,27 @@ package cluster | |
import ( | ||
"fmt" | ||
"os" | ||
"strconv" | ||
"strings" | ||
"time" | ||
|
||
configs "github.com/vhive-serverless/vHive/scripts/configs" | ||
utils "github.com/vhive-serverless/vHive/scripts/utils" | ||
) | ||
|
||
func CreateMultinodeCluster(stockContainerd string) error { | ||
func CreateMultinodeCluster(stockContainerd string, rawHaReplicaCount string) error { | ||
// Original Bash Scripts: scripts/cluster/create_multinode_cluster.sh | ||
|
||
haReplicaCount, err := strconv.Atoi(rawHaReplicaCount) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
if err := CreateMasterKubeletService(); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This command fixes #967 for the first master node. Without it, we still use the control IP on the master node if we use CloudLab, which can lead to the suspension of the account if traffic is big enough. All other master nodes use CreateWorkerKubeletService, which has the fix for this problem. But it has a different one: in the Firecracker setup, we use our CRI socket, which we don't need to use on controller nodes. Please consider running CreateMasterKubeletService for all master nodes. UPD: running for each node is up to invitro setup scripts, not vhive's, but we need to make it possible from here. And there is not much difference between the master and worker commands. I think we can combine them into a single command setup_kubelet with a sandbox type as an argument. When setting up the node, we always choose containerd for all master and backup nodes and change it only for regular nodes. |
||
return err | ||
} | ||
|
||
if err := DeployKubernetes(); err != nil { | ||
if err := DeployKubernetes(haReplicaCount); err != nil { | ||
return err | ||
} | ||
|
||
|
@@ -95,19 +101,28 @@ EOF'` | |
} | ||
|
||
// Deploy Kubernetes | ||
func DeployKubernetes() error { | ||
|
||
func DeployKubernetes(haReplicaCount int) error { | ||
utils.WaitPrintf("Deploying Kubernetes(version %s)", configs.Kube.K8sVersion) | ||
masterNodeIp, iperr := utils.ExecShellCmd(`ip route | awk '{print $(NF)}' | awk '/^10\..*/'`) | ||
if iperr != nil { | ||
return iperr | ||
} | ||
shellCmd := fmt.Sprintf(`sudo kubeadm init --v=%d \ | ||
|
||
command := `sudo kubeadm init --v=%d \ | ||
--apiserver-advertise-address=%s \ | ||
leokondrashov marked this conversation as resolved.
Show resolved
Hide resolved
|
||
--cri-socket unix:///run/containerd/containerd.sock \ | ||
--kubernetes-version %s \ | ||
--pod-network-cidr="%s" `, | ||
configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr) | ||
--pod-network-cidr="%s" ` | ||
args := []any{configs.System.LogVerbosity, masterNodeIp, configs.Kube.K8sVersion, configs.Kube.PodNetworkCidr} | ||
|
||
if haReplicaCount > 0 { | ||
command += ` \ | ||
--control-plane-endpoint "%s:%s" \ | ||
--upload-certs` | ||
args = append(args, configs.Kube.CPHAEndpoint, configs.Kube.CPHAPort) | ||
} | ||
|
||
shellCmd := fmt.Sprintf(command, args) | ||
if len(configs.Kube.AlternativeImageRepo) > 0 { | ||
shellCmd = fmt.Sprintf(shellCmd+"--image-repository %s ", configs.Kube.AlternativeImageRepo) | ||
} | ||
|
@@ -141,24 +156,37 @@ func KubectlForNonRoot() error { | |
func ExtractMasterNodeInfo() error { | ||
// Extract master node information from logs | ||
utils.WaitPrintf("Extracting master node information from logs") | ||
|
||
// API Server address, port, token | ||
shellOut, err := utils.ExecShellCmd("sed -n '/.*kubeadm join.*/p' < %s/masterNodeInfo | sed -n 's/.*join \\(.*\\):\\(\\S*\\) --token \\(\\S*\\).*/\\1 \\2 \\3/p'", configs.System.TmpDir) | ||
if !utils.CheckErrorWithMsg(err, "Failed to extract master node information from logs!\n") { | ||
if !utils.CheckErrorWithMsg(err, "Failed to extract API Server address, port, and token from logs!\n") { | ||
return err | ||
} | ||
splittedOut := strings.Split(shellOut, " ") | ||
configs.Kube.ApiserverAdvertiseAddress = splittedOut[0] | ||
configs.Kube.ApiserverPort = splittedOut[1] | ||
configs.Kube.ApiserverToken = splittedOut[2] | ||
|
||
// API Server discovery token | ||
shellOut, err = utils.ExecShellCmd("sed -n '/.*sha256:.*/p' < %s/masterNodeInfo | sed -n 's/.*\\(sha256:\\S*\\).*/\\1/p'", configs.System.TmpDir) | ||
Comment on lines
150
to
160
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These commands parsing |
||
if !utils.CheckErrorWithTagAndMsg(err, "Failed to extract master node information from logs!\n") { | ||
if !utils.CheckErrorWithTagAndMsg(err, "Failed to extract API Server discovery token from logs!\n") { | ||
return err | ||
} | ||
configs.Kube.ApiserverDiscoveryToken = shellOut | ||
|
||
// API Server certificate key | ||
shellOut, err = utils.ExecShellCmd("sed -n 's/^.*--certificate-key //p' < %s/masterNodeInfo", configs.System.TmpDir) | ||
if !utils.CheckErrorWithTagAndMsg(err, "Failed to extract API Server certificate key from logs!\n") { | ||
return err | ||
} | ||
configs.Kube.ApiserverTokenHash = shellOut | ||
configs.Kube.ApiserverCertificateKey = shellOut | ||
|
||
masterKeyYamlTemplate := | ||
"ApiserverAdvertiseAddress: %s\n" + | ||
"ApiserverPort: %s\n" + | ||
"ApiserverToken: %s\n" + | ||
"ApiserverTokenHash: %s" | ||
"ApiserverDiscoveryToken: %s\n" + | ||
"ApiserverCertificateKey: %s" | ||
|
||
// Create masterKey.yaml with master node information | ||
utils.WaitPrintf("Creating masterKey.yaml with master node information") | ||
|
@@ -172,14 +200,17 @@ func ExtractMasterNodeInfo() error { | |
configs.Kube.ApiserverAdvertiseAddress, | ||
configs.Kube.ApiserverPort, | ||
configs.Kube.ApiserverToken, | ||
configs.Kube.ApiserverTokenHash) | ||
configs.Kube.ApiserverDiscoveryToken) | ||
_, err = masterKeyYamlFile.WriteString(masterKeyYaml) | ||
if !utils.CheckErrorWithTagAndMsg(err, "Failed to create masterKey.yaml with master node information!\n") { | ||
return err | ||
} | ||
|
||
utils.SuccessPrintf("Join cluster from worker nodes as a new control plane node with command: sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s --control-plane --certificate-key %s\n", | ||
configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverDiscoveryToken, configs.Kube.ApiserverCertificateKey) | ||
|
||
utils.SuccessPrintf("Join cluster from worker nodes with command: sudo kubeadm join %s:%s --token %s --discovery-token-ca-cert-hash %s\n", | ||
configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverTokenHash) | ||
configs.Kube.ApiserverAdvertiseAddress, configs.Kube.ApiserverPort, configs.Kube.ApiserverToken, configs.Kube.ApiserverDiscoveryToken) | ||
|
||
return nil | ||
} | ||
|
Uh oh!
There was an error while loading. Please reload this page.