Merge branch 'kube-hetzner:master' into master

This commit is contained in:
Michael 2022-03-05 07:44:38 +01:00 committed by GitHub
commit e636be1a0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 203 additions and 118 deletions

5
.gitignore vendored
View File

@ -4,5 +4,6 @@ crash.log
kubeconfig.yaml
kubeconfig.yaml-e
terraform.tfvars
plans.yaml
traefik_config.yaml
plans-custom.yaml
traefik-custom.yaml
kured-custom.yaml

View File

@ -149,7 +149,6 @@ spec:
tls:
- hosts:
- example.com
secretName: example-tls
rules:
- host: example.com
http:
@ -166,6 +165,21 @@ spec:
</details>
<details>
<summary>single-node cluster</summary>
Running a development cluster on a single node, without any high-availability is possible as well.
In this case, we don't deploy an external load-balancer, but use [k3s service load balancer](https://rancher.com/docs/k3s/latest/en/networking/#service-load-balancer) on the host itself and open up port 80 & 443 in the firewall.
``` terraform
control_plane_count = 1
allow_scheduling_on_control_plane = true
agent_nodepools = {}
```
</details>
## Debugging
First and foremost, it depends, but it's always good to have a quick look into Hetzner quickly without having to login to the UI. That is where the `hcloud` cli comes in.

View File

@ -44,11 +44,11 @@ resource "null_resource" "agents" {
provisioner "file" {
content = yamlencode({
node-name = module.agents[each.key].name
server = "https://${local.first_control_plane_network_ipv4}:6443"
server = "https://${module.control_planes[0].private_ipv4_address}:6443"
token = random_password.k3s_token.result
kubelet-arg = "cloud-provider=external"
flannel-iface = "eth1"
node-ip = module.agents[each.key].ipv4_address
node-ip = module.agents[each.key].private_ipv4_address
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
})
destination = "/tmp/config.yaml"

View File

@ -53,7 +53,6 @@ resource "null_resource" "control_planes" {
kubelet-arg = "cloud-provider=external"
node-ip = module.control_planes[count.index].private_ipv4_address
advertise-address = module.control_planes[count.index].private_ipv4_address
tls-san = module.control_planes[count.index].private_ipv4_address
node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
})

View File

@ -9,7 +9,6 @@ spec:
tls:
- hosts:
- example.com
secretName: example-tls
rules:
- host: example.com
http:

21
init.tf
View File

@ -13,12 +13,11 @@ resource "null_resource" "first_control_plane" {
token = random_password.k3s_token.result
cluster-init = true
disable-cloud-controller = true
disable = ["servicelb", "local-storage"]
disable = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"])
flannel-iface = "eth1"
kubelet-arg = "cloud-provider=external"
node-ip = module.control_planes[0].private_ipv4_address
advertise-address = module.control_planes[0].private_ipv4_address
tls-san = module.control_planes[0].private_ipv4_address
node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
})
@ -30,7 +29,7 @@ resource "null_resource" "first_control_plane" {
inline = local.install_k3s_server
}
# Upon reboot verify that the k3s server is starts, and wait for k3s to be ready to receive commands
# Upon reboot start k3s and wait for it to be ready to receive commands
provisioner "remote-exec" {
inline = [
"systemctl start k3s",
@ -75,13 +74,12 @@ resource "null_resource" "kustomization" {
content = yamlencode({
apiVersion = "kustomize.config.k8s.io/v1beta1"
kind = "Kustomization"
resources = [
resources = concat([
"https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/download/${local.ccm_version}/ccm-networks.yaml",
"https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml",
"https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml",
"https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml",
"traefik.yaml",
]
], local.is_single_node_cluster ? [] : ["traefik.yaml"]),
patchesStrategicMerge = [
file("${path.module}/kustomize/kured.yaml"),
file("${path.module}/kustomize/ccm.yaml"),
@ -93,7 +91,7 @@ resource "null_resource" "kustomization" {
# Upload traefik config
provisioner "file" {
content = templatefile(
content = local.is_single_node_cluster ? "" : templatefile(
"${path.module}/templates/traefik_config.yaml.tpl",
{
load_balancer_disable_ipv6 = var.load_balancer_disable_ipv6
@ -127,7 +125,7 @@ resource "null_resource" "kustomization" {
# Deploy our post-installation kustomization
provisioner "remote-exec" {
inline = [
inline = concat([
"set -ex",
# This ugly hack is here, because terraform serializes the
# embedded yaml files with "- |2", when there is more than
@ -141,8 +139,9 @@ resource "null_resource" "kustomization" {
"kubectl apply -k /tmp/post_install",
"echo 'Waiting for the system-upgrade-controller deployment to become available...'",
"kubectl -n system-upgrade wait --for=condition=available --timeout=120s deployment/system-upgrade-controller",
"kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml",
<<-EOT
"kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml"
],
local.is_single_node_cluster ? [] : [<<-EOT
timeout 120 bash <<EOF
until [ -n "\$(kubectl get -n kube-system service/traefik --output=jsonpath='{.status.loadBalancer.ingress[0].ip}' 2> /dev/null)" ]; do
echo "Waiting for load-balancer to get an IP..."
@ -150,7 +149,7 @@ resource "null_resource" "kustomization" {
done
EOF
EOT
]
])
}
depends_on = [

View File

@ -1,7 +1,7 @@
locals {
first_control_plane_network_ipv4 = module.control_planes[0].private_ipv4_address
ssh_public_key = trimspace(file(var.public_key))
# if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB
is_single_node_cluster = var.control_plane_count + length(keys(var.agent_nodepools)) == 1
ssh_public_key = trimspace(file(var.public_key))
# ssh_private_key is either the contents of var.private_key or null to use a ssh agent.
ssh_private_key = var.private_key == null ? null : trimspace(file(var.private_key))
# ssh_identity is not set if the private key is passed directly, but if ssh agent is used, the public key tells ssh agent which private key to use.
@ -29,7 +29,7 @@ locals {
"127.0.0.1/32",
]
base_firewall_rules = [
base_firewall_rules = concat([
# Allowing internal cluster traffic and Hetzner metadata service and cloud API IPs
{
direction = "in"
@ -133,7 +133,26 @@ locals {
"0.0.0.0/0"
]
}
]
], !local.is_single_node_cluster ? [] : [
# Allow incoming web traffic for single node clusters, because we are using k3s servicelb there,
# not an external load-balancer.
{
direction = "in"
protocol = "tcp"
port = "80"
source_ips = [
"0.0.0.0/0"
]
},
{
direction = "in"
protocol = "tcp"
port = "443"
source_ips = [
"0.0.0.0/0"
]
}
])
common_commands_install_k3s = [
"set -ex",
@ -145,9 +164,10 @@ locals {
"[ -e /etc/rancher/k3s/k3s.yaml ] && exit 0",
]
install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"])
apply_k3s_selinux = ["/sbin/semodule -v -i /usr/share/selinux/packages/k3s.pp"]
install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_SKIP_START=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"])
install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux)
install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux)
agent_nodepools = merge([
for nodepool_name, nodepool_obj in var.agent_nodepools : {

13
main.tf
View File

@ -13,12 +13,22 @@ resource "hcloud_network" "k3s" {
ip_range = var.network_ipv4_range
}
# This is the default subnet to be used by the load balancer.
resource "hcloud_network_subnet" "default" {
network_id = hcloud_network.k3s.id
type = "cloud"
network_zone = var.network_region
ip_range = "10.0.0.0/16"
}
resource "hcloud_network_subnet" "subnet" {
for_each = var.network_ipv4_subnets
network_id = hcloud_network.k3s.id
type = "cloud"
network_zone = var.network_region
ip_range = each.value
depends_on = [hcloud_network_subnet.default]
}
resource "hcloud_firewall" "k3s" {
@ -46,7 +56,8 @@ resource "hcloud_placement_group" "k3s" {
}
data "hcloud_load_balancer" "traefik" {
name = "traefik"
count = local.is_single_node_cluster ? 0 : 1
name = "traefik"
depends_on = [null_resource.kustomization]
}

View File

@ -10,65 +10,4 @@ locals {
ssh_identity_file = var.private_key == null ? var.public_key : var.private_key
# shared flags for ssh to ignore host keys, to use our ssh identity file for all connections during provisioning.
ssh_args = "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i ${local.ssh_identity_file}"
microOS_install_commands = [
"set -ex",
"apt-get update",
"apt-get install -y aria2",
"aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4",
"qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda",
"sgdisk -e /dev/sda",
"parted -s /dev/sda resizepart 4 99%",
"parted -s /dev/sda mkpart primary ext2 99% 100%",
"partprobe /dev/sda && udevadm settle && fdisk -l /dev/sda",
"mount /dev/sda4 /mnt/ && btrfs filesystem resize max /mnt && umount /mnt",
"mke2fs -L ignition /dev/sda5",
"mount /dev/sda5 /mnt",
"mkdir /mnt/ignition",
"cp /root/config.ign /mnt/ignition/config.ign",
"mkdir /mnt/combustion",
"cp /root/script /mnt/combustion/script",
"umount /mnt"
]
ignition_config = jsonencode({
ignition = {
version = "3.0.0"
}
passwd = {
users = [{
name = "root"
sshAuthorizedKeys = concat([local.ssh_public_key], var.additional_public_keys)
}]
}
storage = {
files = [
{
path = "/etc/sysconfig/network/ifcfg-eth1"
mode = 420
overwrite = true
contents = { "source" = "data:,BOOTPROTO%3D%27dhcp%27%0ASTARTMODE%3D%27auto%27" }
},
{
path = "/etc/ssh/sshd_config.d/kube-hetzner.conf"
mode = 420
overwrite = true
contents = { "source" = "data:,PasswordAuthentication%20no%0AX11Forwarding%20no%0AMaxAuthTries%202%0AAllowTcpForwarding%20no%0AAllowAgentForwarding%20no%0AAuthorizedKeysFile%20.ssh%2Fauthorized_keys" }
}
]
}
})
combustion_script = <<EOF
#!/bin/bash
sed -i 's#NETCONFIG_NIS_SETDOMAINNAME="yes"#NETCONFIG_NIS_SETDOMAINNAME="no"#g' /etc/sysconfig/network/config
sed -i 's#WAIT_FOR_INTERFACES="30"#WAIT_FOR_INTERFACES="60"#g' /etc/sysconfig/network/config
sed -i 's#CHECK_DUPLICATE_IP="yes"#CHECK_DUPLICATE_IP="no"#g' /etc/sysconfig/network/config
# combustion: network
rpm --import https://rpm.rancher.io/public.key
zypper refresh
zypper --gpg-auto-import-keys install -y https://rpm.rancher.io/k3s/stable/common/microos/noarch/k3s-selinux-0.4-1.sle.noarch.rpm
udevadm settle || true
EOF
}

View File

@ -8,10 +8,20 @@ resource "hcloud_server" "server" {
ssh_keys = var.ssh_keys
firewall_ids = var.firewall_ids
placement_group_id = var.placement_group_id
user_data = data.template_cloudinit_config.config.rendered
labels = var.labels
# Prevent destroying the whole cluster if the user changes
# any of the attributes that force to recreate the servers.
lifecycle {
ignore_changes = [
location,
ssh_keys,
user_data,
]
}
connection {
user = "root"
private_key = local.ssh_private_key
@ -19,30 +29,21 @@ resource "hcloud_server" "server" {
host = self.ipv4_address
}
provisioner "file" {
content = local.ignition_config
destination = "/root/config.ign"
}
# Combustion script file to install k3s-selinux
provisioner "file" {
content = local.combustion_script
destination = "/root/script"
}
# Install MicroOS
provisioner "remote-exec" {
inline = local.microOS_install_commands
inline = [
"set -ex",
"apt-get update",
"apt-get install -y aria2",
"aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-OpenStack-Cloud.qcow2.meta4",
"qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda",
]
}
# Issue a reboot command
provisioner "local-exec" {
command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3"
}
# Wait for MicroOS to reboot and be ready
# Issue a reboot command and wait for MicroOS to reboot and be ready
provisioner "local-exec" {
command = <<-EOT
ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3
until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null
do
echo "Waiting for MicroOS to reboot and become available..."
@ -51,16 +52,25 @@ resource "hcloud_server" "server" {
EOT
}
# Install k3s-selinux (compatible version)
provisioner "remote-exec" {
inline = [
# Disable automatic reboot (after transactional updates), and configure the reboot method as kured
"set -ex",
"rebootmgrctl set-strategy off",
"echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf",
# set the hostname
"hostnamectl set-hostname ${self.name}"
"transactional-update pkg install -y k3s-selinux"
]
}
# Issue a reboot command and wait for MicroOS to reboot and be ready
provisioner "local-exec" {
command = <<-EOT
ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3
until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null
do
echo "Waiting for MicroOS to reboot and become available..."
sleep 3
done
EOT
}
}
resource "hcloud_server_network" "server" {
@ -68,3 +78,21 @@ resource "hcloud_server_network" "server" {
server_id = hcloud_server.server.id
subnet_id = var.ipv4_subnet_id
}
data "template_cloudinit_config" "config" {
gzip = true
base64_encode = true
# Main cloud-config configuration file.
part {
filename = "init.cfg"
content_type = "text/cloud-config"
content = templatefile(
"${path.module}/templates/userdata.yaml.tpl",
{
hostname = var.name
sshAuthorizedKeys = concat([local.ssh_public_key], var.additional_public_keys)
}
)
}
}

View File

@ -0,0 +1,58 @@
#cloud-config
write_files:
# Configure the private network interface
- content: |
BOOTPROTO='dhcp'
STARTMODE='auto'
path: /etc/sysconfig/network/ifcfg-eth1
# Disable ssh password authentication
- content: |
PasswordAuthentication no
X11Forwarding no
MaxAuthTries 2
AllowTcpForwarding no
AllowAgentForwarding no
AuthorizedKeysFile .ssh/authorized_keys
path: /etc/ssh/sshd_config.d/kube-hetzner.conf
# Set reboot method as "kured"
- content: |
REBOOT_METHOD=kured
path: /etc/transactional-update.conf
# Add ssh authorized keys
ssh_authorized_keys:
%{ for key in sshAuthorizedKeys ~}
- ${key}
%{ endfor ~}
# Resize /var, not /, as that's the last partition in MicroOS image.
growpart:
devices: ["/var"]
# Make sure the hostname is set correctly
hostname: ${hostname}
preserve_hostname: true
runcmd:
# As above, make sure the hostname is not reset
- [sed, '-i', 's/NETCONFIG_NIS_SETDOMAINNAME="yes"/NETCONFIG_NIS_SETDOMAINNAME="no"/g', /etc/sysconfig/network/config]
- [sed, '-i', 's/DHCLIENT_SET_HOSTNAME="yes"/DHCLIENT_SET_HOSTNAME="no"/g', /etc/sysconfig/network/dhcp]
# We set Cloudflare DNS servers, followed by Google as a backup
- [sed, '-i', 's/NETCONFIG_DNS_STATIC_SERVERS=""/NETCONFIG_DNS_STATIC_SERVERS="1.1.1.1 1.0.0.1 8.8.8.8"/g', /etc/sysconfig/network/config]
# Bounds the amount of logs that can survive on the system
- [sed, '-i', 's/#SystemMaxUse=/SystemMaxUse=3G/g', /etc/systemd/journald.conf]
- [sed, '-i', 's/#MaxRetentionSec=/MaxRetentionSec=1week/g', /etc/systemd/journald.conf]
# Reduces the default number of snapshots from 2-10 number limit, to 4 and from 4-10 number limit important, to 2
- [sed, '-i', 's/NUMBER_LIMIT="2-10"/NUMBER_LIMIT="4"/g', /etc/snapper/configs/root]
- [sed, '-i', 's/NUMBER_LIMIT_IMPORTANT="4-10"/NUMBER_LIMIT_IMPORTANT="3"/g', /etc/snapper/configs/root]
# Disables unneeded services
- [systemctl, disable, '--now', 'rebootmgr.service']

View File

@ -12,5 +12,9 @@ terraform {
source = "tenstad/remote"
version = "~> 0.0.23"
}
template = {
source = "hashicorp/template"
version = "~> 2.2.0"
}
}
}

View File

@ -12,7 +12,7 @@ output "agents_public_ipv4" {
output "load_balancer_public_ipv4" {
description = "The public IPv4 address of the Hetzner load balancer"
value = data.hcloud_load_balancer.traefik.ipv4
value = local.is_single_node_cluster ? module.control_planes[0].ipv4_address : data.hcloud_load_balancer.traefik[0].ipv4
}
output "kubeconfig_file" {

View File

@ -1,7 +1,15 @@
# You need to replace these
# Only the first values starting with a * are obligatory, the rest can remain with their default values, or you
# could adapt them to your needs.
#
# Note that some values, notably "location" and "public_key" have no effect after the initial cluster has been setup.
# This is in order to keep terraform from re-provisioning all nodes at once which would loose data. If you want to update,
# those, you should instead change the value here and then manually re-provision each node one-by-one. Grep for "lifecycle".
# * Your Hetzner project API token
hcloud_token = "xxxxxxxxxxxxxxxxxxYYYYYYYYYYYYYYYYYYYzzzzzzzzzzzzzzzzzzzzz"
# * Your public key
public_key = "/home/username/.ssh/id_ed25519.pub"
# Must be "private_key = null" when you want to use ssh-agent, for a Yubikey like device auth or an SSH key-pair with passphrase
# * Your private key, must be "private_key = null" when you want to use ssh-agent, for a Yubikey like device auth or an SSH key-pair with passphrase
private_key = "/home/username/.ssh/id_ed25519"
# These can be customized, or left with the default values
@ -10,9 +18,6 @@ private_key = "/home/username/.ssh/id_ed25519"
location = "fsn1" # change to `ash` for us-east Ashburn, Virginia location
network_region = "eu-central" # change to `us-east` if location is ash
# It's best to leave the network range as is, unless you know what you are doing. The default is "10.0.0.0/8".
# network_ipv4_range = "10.0.0.0/8"
# You can have up to as many subnets as you want (preferably if the form of 10.X.0.0/16),
# their primary use is to logically separate the nodes.
# The control_plane network is mandatory.
@ -25,13 +30,14 @@ network_ipv4_subnets = {
# At least 3 server nodes is recommended for HA, otherwise you need to turn off automatic upgrade (see ReadMe).
# As per rancher docs, it must be always an odd number, never even! See https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/
# For instance, 1 is ok (non-HA), 2 not ok, 3 is ok (becomes HA).
control_plane_count = 3
control_plane_count = 3
# The type of control plane nodes, see https://www.hetzner.com/cloud, the minimum instance supported is cpx11 (just a few cents more than cx11)
control_plane_server_type = "cpx11"
# As for the agent nodepools, below is just an example, if you do not want nodepools, just use one,
# and change the name to what you want, it need not be "agent-big" or "agent-small", also give them the subnet prefer.
# For single node clusters set this equal to {}
agent_nodepools = {
agent-big = {
server_type = "cpx21",
@ -48,6 +54,11 @@ agent_nodepools = {
# That will depend on how much load you want it to handle, see https://www.hetzner.com/cloud/load-balancer
load_balancer_type = "lb11"
### The following values are fully optional
# It's best to leave the network range as is, unless you know what you are doing. The default is "10.0.0.0/8".
# network_ipv4_range = "10.0.0.0/8"
# If you want to use a specific Hetzner CCM and CSI version, set them below, otherwise leave as is for the latest versions
# hetzner_ccm_version = ""
# hetzner_csi_version = ""
@ -57,6 +68,7 @@ load_balancer_type = "lb11"
# traefik_acme_email = "mail@example.com"
# If you want to allow non-control-plane workloads to run on the control-plane nodes set "true" below. The default is "false".
# Also good for single node clusters.
# allow_scheduling_on_control_plane = true
# If you want to disable automatic upgrade of k3s, you can set this to false, default is "true".

View File

@ -65,6 +65,7 @@ variable "load_balancer_disable_ipv6" {
variable "agent_nodepools" {
description = "Number of agent nodes."
type = map(any)
default = {}
}
variable "hetzner_ccm_version" {