pre master

This commit is contained in:
Karim Naufal 2022-02-10 03:01:40 +01:00
parent 036404c983
commit cd6b5e2768
8 changed files with 2542 additions and 26 deletions

File diff suppressed because it is too large Load Diff

View File

@ -33,7 +33,7 @@ _Please note that we are not affiliated to Hetzner, this is just an open source
- Maintenance free with auto-upgrade to the latest version of MicroOS and k3s. - Maintenance free with auto-upgrade to the latest version of MicroOS and k3s.
- Proper use of the underlying Hetzner private network to remove the need for encryption and make the cluster both fast and secure. - Proper use of the underlying Hetzner private network to remove the need for encryption and make the cluster both fast and secure.
- Automatic HA with the default setting of two control-plane and agents nodes. - Automatic HA with the default setting of three control-plane and two agents nodes.
- Ability to add or remove as many nodes as you want while the cluster stays running. - Ability to add or remove as many nodes as you want while the cluster stays running.
- Automatic Traefik ingress controller attached to a Hetzner load balancer with proxy protocol turned on. - Automatic Traefik ingress controller attached to a Hetzner load balancer with proxy protocol turned on.
- (Optional) Out of the box config of Traefik with SSL certficate auto-generation. - (Optional) Out of the box config of Traefik with SSL certficate auto-generation.
@ -93,11 +93,15 @@ When the cluster is up and running, you can do whatever you wish with it! 🎉
You can scale the number of nodes up and down without any issues. If you are going to scale down, just make sure to properly `kubectl drain` the nodes in question first. Then just edit these variables in `terraform.tfvars` and re-apply terraform with `terraform apply -auto-approve`. You can scale the number of nodes up and down without any issues. If you are going to scale down, just make sure to properly `kubectl drain` the nodes in question first. Then just edit these variables in `terraform.tfvars` and re-apply terraform with `terraform apply -auto-approve`.
**If you want to remain HA, it's important to keep a number of control planes of at least 3, see [Rancher's doc on HA](https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/).**
Otherwise, you may want to turn off automated updates and reboots of the control-plane nodes (2 or less), and do these maintenance manually.
For instance: For instance:
```tfvars ```tfvars
servers_num = 2 servers_num = 3
agents_num = 3 agents_num = 2
``` ```
### Useful commands ### Useful commands
@ -143,6 +147,7 @@ If you want to takedown the cluster, you can proceed as follows:
kubectl delete -k hetzner/csi kubectl delete -k hetzner/csi
kubectl delete -k hetzner/ccm kubectl delete -k hetzner/ccm
hcloud load-balancer delete traefik hcloud load-balancer delete traefik
hcloud network delete k3s
terraform destroy -auto-approve terraform destroy -auto-approve
``` ```

View File

@ -8,7 +8,7 @@ resource "hcloud_server" "agents" {
location = var.location location = var.location
ssh_keys = [hcloud_ssh_key.k3s.id] ssh_keys = [hcloud_ssh_key.k3s.id]
firewall_ids = [hcloud_firewall.k3s.id] firewall_ids = [hcloud_firewall.k3s.id]
placement_group_id = hcloud_placement_group.k3s_placement_group.id placement_group_id = hcloud_placement_group.k3s.id
labels = { labels = {
@ -53,7 +53,7 @@ resource "hcloud_server" "agents" {
command = <<-EOT command = <<-EOT
until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null
do do
echo Waiting for ssh to be ready... echo "Waiting for ssh to be ready..."
sleep 2 sleep 2
done done
EOT EOT
@ -98,10 +98,20 @@ resource "hcloud_server" "agents" {
provisioner "remote-exec" { provisioner "remote-exec" {
inline = [ inline = [
"set -ex", "set -ex",
# set the hostname in a persistent fashion
"hostnamectl set-hostname ${self.name}",
# first we disable automatic reboot (after transactional updates), and configure the reboot method as kured # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured
"rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf",
# then turn on k3s and join the cluster # then we start k3s agent and join the cluster
"systemctl --now enable k3s-agent", "systemctl enable k3s-server",
<<-EOT
until systemctl status k3s-server > /dev/null
do
systemctl start k3s-server
echo "Starting k3s-agent and joining the cluster..."
sleep 2
done
EOT
] ]
connection { connection {

View File

@ -187,8 +187,8 @@ resource "local_file" "traefik_config" {
} }
resource "hcloud_placement_group" "k3s_placement_group" { resource "hcloud_placement_group" "k3s" {
name = "k3s-placement-group" name = "k3s"
type = "spread" type = "spread"
labels = { labels = {
"provisioner" = "terraform", "provisioner" = "terraform",

View File

@ -7,7 +7,7 @@ resource "hcloud_server" "first_control_plane" {
location = var.location location = var.location
ssh_keys = [hcloud_ssh_key.k3s.id] ssh_keys = [hcloud_ssh_key.k3s.id]
firewall_ids = [hcloud_firewall.k3s.id] firewall_ids = [hcloud_firewall.k3s.id]
placement_group_id = hcloud_placement_group.k3s_placement_group.id placement_group_id = hcloud_placement_group.k3s.id
labels = { labels = {
"provisioner" = "terraform", "provisioner" = "terraform",
@ -51,7 +51,7 @@ resource "hcloud_server" "first_control_plane" {
command = <<-EOT command = <<-EOT
until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null
do do
echo Waiting for ssh to be ready... echo "Waiting for ssh to be ready..."
sleep 2 sleep 2
done done
EOT EOT
@ -84,10 +84,20 @@ resource "hcloud_server" "first_control_plane" {
provisioner "remote-exec" { provisioner "remote-exec" {
inline = [ inline = [
"set -ex", "set -ex",
# set the hostname in a persistent fashion
"hostnamectl set-hostname ${self.name}",
# first we disable automatic reboot (after transactional updates), and configure the reboot method as kured # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured
"rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf",
# then we initiate the cluster # then we initiate the cluster
"systemctl --now enable k3s-server", "systemctl enable k3s-server",
<<-EOT
until systemctl status k3s-server > /dev/null
do
systemctl start k3s-server
echo "Initiating the cluster..."
sleep 2
done
EOT
] ]
connection { connection {
@ -102,10 +112,18 @@ resource "hcloud_server" "first_control_plane" {
provisioner "local-exec" { provisioner "local-exec" {
command = <<-EOT command = <<-EOT
set -ex set -ex
sleep 30 until ssh -q ${local.ssh_args} root@${self.ipv4_address} [[ -f /etc/rancher/k3s/k3s.yaml ]]
do
echo "Waiting for the k3s config file to be ready..."
sleep 2
done
scp ${local.ssh_args} root@${self.ipv4_address}:/etc/rancher/k3s/k3s.yaml ${path.module}/kubeconfig.yaml scp ${local.ssh_args} root@${self.ipv4_address}:/etc/rancher/k3s/k3s.yaml ${path.module}/kubeconfig.yaml
sed -i -e 's/127.0.0.1/${self.ipv4_address}/g' ${path.module}/kubeconfig.yaml sed -i -e 's/127.0.0.1/${self.ipv4_address}/g' ${path.module}/kubeconfig.yaml
sleep 10 && until kubectl get node ${self.name} --kubeconfig ${path.module}/kubeconfig.yaml; do sleep 5; done until kubectl get node ${self.name} --kubeconfig ${path.module}/kubeconfig.yaml 2> /dev/null || false
do
echo "Waiting for the node to become available...";
sleep 2
done
EOT EOT
} }

View File

@ -8,7 +8,7 @@ resource "hcloud_server" "control_planes" {
location = var.location location = var.location
ssh_keys = [hcloud_ssh_key.k3s.id] ssh_keys = [hcloud_ssh_key.k3s.id]
firewall_ids = [hcloud_firewall.k3s.id] firewall_ids = [hcloud_firewall.k3s.id]
placement_group_id = hcloud_placement_group.k3s_placement_group.id placement_group_id = hcloud_placement_group.k3s.id
labels = { labels = {
"provisioner" = "terraform", "provisioner" = "terraform",
@ -52,7 +52,7 @@ resource "hcloud_server" "control_planes" {
command = <<-EOT command = <<-EOT
until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null
do do
echo Waiting for ssh to be ready... echo "Waiting for ssh to be ready..."
sleep 2 sleep 2
done done
EOT EOT
@ -84,14 +84,24 @@ resource "hcloud_server" "control_planes" {
} }
} }
# Run the other control plane # Run an other control plane server
provisioner "remote-exec" { provisioner "remote-exec" {
inline = [ inline = [
"set -ex", "set -ex",
# set the hostname in a persistent fashion
"hostnamectl set-hostname ${self.name}",
# first we disable automatic reboot (after transactional updates), and configure the reboot method as kured # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured
"rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf",
# then we initiate the cluster # then then we start k3s in server mode and join the cluster
"systemctl --now enable k3s-server", "systemctl enable k3s-server",
<<-EOT
until systemctl status k3s-server > /dev/null
do
systemctl start k3s-server
echo "Waiting on other 'learning' control planes, patience is the mother of virtues..."
sleep 2
done
EOT
] ]
connection { connection {

View File

@ -14,17 +14,17 @@
}, },
"storage": { "storage": {
"files": [ "files": [
{
"path": "/etc/hostname",
"mode": 420,
"overwrite": true,
"contents": { "source": "data:,${name}" }
},
{ {
"path": "/etc/sysconfig/network/ifcfg-eth1", "path": "/etc/sysconfig/network/ifcfg-eth1",
"mode": 420, "mode": 420,
"overwrite": true, "overwrite": true,
"contents": { "source": "data:,BOOTPROTO%3D%27dhcp%27%0ASTARTMODE%3D%27auto%27" } "contents": { "source": "data:,BOOTPROTO%3D%27dhcp%27%0ASTARTMODE%3D%27auto%27" }
},
{
"path": "/etc/ssh/sshd_config.d/kube-hetzner.conf",
"mode": 420,
"overwrite": true,
"contents": { "source": "data:,PasswordAuthentication%20no%0AX11Forwarding%20no%0AMaxAuthTries%202%0AAllowTcpForwarding%20no%0AAllowAgentForwarding%20no%0AAuthorizedKeysFile%20.ssh%2Fauthorized_keys" }
} }
] ]
} }

View File

@ -12,7 +12,11 @@ network_region = "eu-central" # change to `us-east` if location is as
agent_server_type = "cpx21" agent_server_type = "cpx21"
control_plane_server_type = "cpx11" control_plane_server_type = "cpx11"
lb_server_type = "lb11" lb_server_type = "lb11"
servers_num = 2
# At least 3 server nodes is recommended for HA, otherwise you need to turn off automatic upgrade (see ReadMe).
servers_num = 3
# For agent nodes, at least 2 is recommended for HA, but you can keep automatic upgrades.
agents_num = 2 agents_num = 2
# If you want to use a specific Hetzner CCM and CSI version, set them below, otherwise leave as is for the latest versions # If you want to use a specific Hetzner CCM and CSI version, set them below, otherwise leave as is for the latest versions