Merge pull request #149 from kube-hetzner/control-plane-pool

Control plane pool
This commit is contained in:
Karim Naufal 2022-04-12 18:52:02 +02:00 committed by GitHub
commit 3578d3b6a5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 121 additions and 82 deletions

View File

@ -33,9 +33,10 @@ _Please note that we are not affiliated to Hetzner, this is just an open source
- Maintenance free with auto-upgrade to the latest version of MicroOS and k3s.
- Proper use of the Hetzner private network to minimize latency and remove the need for encryption.
- Automatic HA with the default setting of three control-plane nodes and two agent nodepools.
- Automatic HA with the default setting of three control-plane nodes and two agent nodes.
- Node pools for both control-plane and agent nodes. Different locations possible (super-HA).
- Ability to add or remove as many nodes as you want while the cluster stays running.
- Automatic Traefik ingress controller attached to a Hetzner load balancer with proxy protocol turned on.
- Traefik ingress controller attached to a Hetzner load balancer with proxy protocol turned on.
- Tons of flexible configuration options to suits all needs.
_It uses Terraform to deploy as it's easy to use, and Hetzner provides a great [Hetzner Terraform Provider](https://registry.terraform.io/providers/hetznercloud/hcloud/latest/docs)._
@ -169,14 +170,9 @@ spec:
<summary>Single-node cluster</summary>
Running a development cluster on a single node, without any high-availability is possible as well.
In this case, we don't deploy an external load-balancer, but use [k3s service load balancer](https://rancher.com/docs/k3s/latest/en/networking/#service-load-balancer) on the host itself and open up port 80 & 443 in the firewall.
Running a development cluster on a single node, without any high-availability is possible as well. You need one control plane nodepool with a count of 1, and one agent nodepool with a count of 0.
``` terraform
control_plane_count = 1
allow_scheduling_on_control_plane = true
agent_nodepools = []
```
In this case, we don't deploy an external load-balancer, but use the default [k3s service load balancer](https://rancher.com/docs/k3s/latest/en/networking/#service-load-balancer) on the host itself and open up port 80 & 443 in the firewall (done automatically).
</details>

View File

@ -12,11 +12,11 @@ module "agents" {
placement_group_id = hcloud_placement_group.k3s.id
location = each.value.location
server_type = each.value.server_type
ipv4_subnet_id = hcloud_network_subnet.subnet[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0] + 2].id
ipv4_subnet_id = hcloud_network_subnet.subnet[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0] + length(var.control_plane_nodepools) + 1].id
# We leave some room so 100 eventual Hetzner LBs that can be created perfectly safely
# It leaves the subnet with 254 x 254 - 100 = 64416 IPs to use, so probably enough.
private_ipv4 = cidrhost(local.network_ipv4_subnets[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0] + 2], each.value.index + 101)
private_ipv4 = cidrhost(local.network_ipv4_subnets[[for i, v in var.agent_nodepools : i if v.name == each.value.nodepool_name][0] + length(var.control_plane_nodepools) + 1], each.value.index + 101)
labels = {
"provisioner" = "terraform",
@ -46,7 +46,7 @@ resource "null_resource" "agents" {
provisioner "file" {
content = yamlencode({
node-name = module.agents[each.key].name
server = "https://${module.control_planes[0].private_ipv4_address}:6443"
server = "https://${module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443"
token = random_password.k3s_token.result
kubelet-arg = "cloud-provider=external"
flannel-iface = "eth1"

View File

@ -1,21 +1,22 @@
module "control_planes" {
source = "./modules/host"
count = var.control_plane_count
name = "${var.use_cluster_name_in_node_name ? "${var.cluster_name}-" : ""}control-plane"
for_each = local.control_plane_nodepools
name = "${var.use_cluster_name_in_node_name ? "${var.cluster_name}-" : ""}${each.value.nodepool_name}"
ssh_keys = [hcloud_ssh_key.k3s.id]
public_key = var.public_key
private_key = var.private_key
additional_public_keys = var.additional_public_keys
firewall_ids = [hcloud_firewall.k3s.id]
placement_group_id = hcloud_placement_group.k3s.id
location = var.location
server_type = var.control_plane_server_type
ipv4_subnet_id = hcloud_network_subnet.subnet[1].id
location = each.value.location
server_type = each.value.server_type
ipv4_subnet_id = hcloud_network_subnet.subnet[[for i, v in var.control_plane_nodepools : i if v.name == each.value.nodepool_name][0] + 1].id
# We leave some room so 100 eventual Hetzner LBs that can be created perfectly safely
# It leaves the subnet with 254 x 254 - 100 = 64416 IPs to use, so probably enough.
private_ipv4 = cidrhost(local.network_ipv4_subnets[1], count.index + 101)
private_ipv4 = cidrhost(local.network_ipv4_subnets[[for i, v in var.control_plane_nodepools : i if v.name == each.value.nodepool_name][0] + 1], each.value.index + 101)
labels = {
"provisioner" = "terraform",
@ -28,33 +29,33 @@ module "control_planes" {
}
resource "null_resource" "control_planes" {
count = var.control_plane_count
for_each = local.control_plane_nodepools
triggers = {
control_plane_id = module.control_planes[count.index].id
control_plane_id = module.control_planes[each.key].id
}
connection {
user = "root"
private_key = local.ssh_private_key
agent_identity = local.ssh_identity
host = module.control_planes[count.index].ipv4_address
host = module.control_planes[each.key].ipv4_address
}
# Generating k3s server config file
provisioner "file" {
content = yamlencode({
node-name = module.control_planes[count.index].name
server = "https://${element(module.control_planes.*.private_ipv4_address, count.index > 0 ? 0 : 1)}:6443"
node-name = module.control_planes[each.key].name
server = length(module.control_planes) == 1 ? null : "https://${module.control_planes[each.key].private_ipv4_address == module.control_planes[keys(module.control_planes)[0]].private_ipv4_address ? module.control_planes[keys(module.control_planes)[1]].private_ipv4_address : module.control_planes[keys(module.control_planes)[0]].private_ipv4_address}:6443"
token = random_password.k3s_token.result
disable-cloud-controller = true
disable = local.disable_extras
flannel-iface = "eth1"
kubelet-arg = "cloud-provider=external"
node-ip = module.control_planes[count.index].private_ipv4_address
advertise-address = module.control_planes[count.index].private_ipv4_address
node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
node-ip = module.control_planes[each.key].private_ipv4_address
advertise-address = module.control_planes[each.key].private_ipv4_address
node-label = each.value.labels
node-taint = each.value.taints
})
destination = "/tmp/config.yaml"
}

16
init.tf
View File

@ -3,23 +3,23 @@ resource "null_resource" "first_control_plane" {
user = "root"
private_key = local.ssh_private_key
agent_identity = local.ssh_identity
host = module.control_planes[0].ipv4_address
host = module.control_planes[keys(module.control_planes)[0]].ipv4_address
}
# Generating k3s master config file
provisioner "file" {
content = yamlencode({
node-name = module.control_planes[0].name
node-name = module.control_planes[keys(module.control_planes)[0]].name
token = random_password.k3s_token.result
cluster-init = true
disable-cloud-controller = true
disable = local.disable_extras
flannel-iface = "eth1"
kubelet-arg = "cloud-provider=external"
node-ip = module.control_planes[0].private_ipv4_address
advertise-address = module.control_planes[0].private_ipv4_address
node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]
node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []
node-ip = module.control_planes[keys(module.control_planes)[0]].private_ipv4_address
advertise-address = module.control_planes[keys(module.control_planes)[0]].private_ipv4_address
node-taint = local.control_plane_nodepools[keys(module.control_planes)[0]].taints
node-label = local.control_plane_nodepools[keys(module.control_planes)[0]].labels
})
destination = "/tmp/config.yaml"
}
@ -66,7 +66,7 @@ resource "null_resource" "kustomization" {
user = "root"
private_key = local.ssh_private_key
agent_identity = local.ssh_identity
host = module.control_planes[0].ipv4_address
host = module.control_planes[keys(module.control_planes)[0]].ipv4_address
}
# Upload kustomization.yaml, containing Hetzner CSI & CSM, as well as kured.
@ -97,7 +97,7 @@ resource "null_resource" "kustomization" {
name = "${var.cluster_name}-traefik"
load_balancer_disable_ipv6 = var.load_balancer_disable_ipv6
load_balancer_type = var.load_balancer_type
location = var.location
location = var.load_balancer_location
traefik_acme_tls = var.traefik_acme_tls
traefik_acme_email = var.traefik_acme_email
traefik_additional_options = var.traefik_additional_options

View File

@ -1,7 +1,7 @@
data "remote_file" "kubeconfig" {
conn {
host = module.control_planes[0].ipv4_address
host = module.control_planes[keys(module.control_planes)[0]].ipv4_address
port = 22
user = "root"
private_key = local.ssh_private_key
@ -13,7 +13,7 @@ data "remote_file" "kubeconfig" {
}
locals {
kubeconfig_external = replace(data.remote_file.kubeconfig.content, "127.0.0.1", module.control_planes[0].ipv4_address)
kubeconfig_external = replace(data.remote_file.kubeconfig.content, "127.0.0.1", module.control_planes[keys(module.control_planes)[0]].ipv4_address)
kubeconfig_parsed = yamldecode(local.kubeconfig_external)
kubeconfig_data = {
host = local.kubeconfig_parsed["clusters"][0]["cluster"]["server"]

View File

@ -1,6 +1,6 @@
locals {
# if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB
is_single_node_cluster = var.control_plane_count + sum(concat([for v in var.agent_nodepools : v.count], [0])) == 1
is_single_node_cluster = sum(concat([for v in var.control_plane_nodepools : v.count], [0])) + sum(concat([for v in var.agent_nodepools : v.count], [0])) == 1
ssh_public_key = trimspace(file(var.public_key))
# ssh_private_key is either the contents of var.private_key or null to use a ssh agent.
ssh_private_key = var.private_key == null ? null : trimspace(file(var.private_key))
@ -169,16 +169,30 @@ locals {
install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux)
install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux)
agent_nodepools = merge([
for nodepool_obj in var.agent_nodepools : {
for index in range(nodepool_obj.count) :
format("%s-%s", nodepool_obj.name, index) => {
control_plane_nodepools = merge([
for pool_index, nodepool_obj in var.control_plane_nodepools : {
for node_index in range(nodepool_obj.count) :
format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => {
nodepool_name : nodepool_obj.name,
server_type : nodepool_obj.server_type,
location : nodepool_obj.location,
labels : concat(local.default_labels, nodepool_obj.labels),
labels : concat(local.default_control_plane_labels, nodepool_obj.labels),
taints : concat(local.default_control_plane_taints, nodepool_obj.taints),
index : node_index
}
}
]...)
agent_nodepools = merge([
for pool_index, nodepool_obj in var.agent_nodepools : {
for node_index in range(nodepool_obj.count) :
format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => {
nodepool_name : nodepool_obj.name,
server_type : nodepool_obj.server_type,
location : nodepool_obj.location,
labels : concat(local.default_agent_labels, nodepool_obj.labels),
taints : nodepool_obj.taints,
index : index
index : node_index
}
}
]...)
@ -188,12 +202,17 @@ locals {
# The first two subnets are respectively the default subnet 10.0.0.0/16 use for potientially anything and 10.1.0.0/16 used for control plane nodes.
# the rest of the subnets are for agent nodes in each nodepools.
network_ipv4_subnets = [for index in range(length(var.agent_nodepools) + 2) : cidrsubnet(local.network_ipv4_cidr, 8, index)]
network_ipv4_subnets = [for index in range(length(var.control_plane_nodepools) + length(var.agent_nodepools) + 1) : cidrsubnet(local.network_ipv4_cidr, 8, index)]
# disable k3s extras
disable_extras = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"], var.traefik_enabled ? [] : ["traefik"], var.metrics_server_enabled ? [] : ["metrics-server"])
# Default k3s node labels
default_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
default_agent_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
default_control_plane_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
allow_scheduling_on_control_plane = local.is_single_node_cluster ? true : var.allow_scheduling_on_control_plane
# Default k3s node taints
default_control_plane_taints = concat([], local.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"])
}

View File

@ -4,7 +4,9 @@ output "cluster_name" {
}
output "control_planes_public_ipv4" {
value = module.control_planes.*.ipv4_address
value = [
for obj in module.control_planes : obj.ipv4_address
]
description = "The public IPv4 addresses of the controlplane server."
}
@ -17,7 +19,9 @@ output "agents_public_ipv4" {
output "load_balancer_public_ipv4" {
description = "The public IPv4 address of the Hetzner load balancer"
value = local.is_single_node_cluster ? module.control_planes[0].ipv4_address : var.traefik_enabled == false ? null : data.hcloud_load_balancer.traefik[0].ipv4
value = local.is_single_node_cluster ? [
for obj in module.control_planes : obj.ipv4_address
][0] : var.traefik_enabled == false ? null : data.hcloud_load_balancer.traefik[0].ipv4
}
output "kubeconfig_file" {

View File

@ -14,37 +14,58 @@ private_key = "/home/username/.ssh/id_ed25519"
# These can be customized, or left with the default values
# For Hetzner locations see https://docs.hetzner.com/general/others/data-centers-and-connection/
location = "fsn1" # change to `ash` for us-east Ashburn, Virginia location
network_region = "eu-central" # change to `us-east` if location is ash
# At least 3 server nodes is recommended for HA, otherwise you need to turn off automatic upgrade (see ReadMe).
# For the control-planes, at least 3 nodes is recommended for HA, otherwise you need to turn off automatic upgrade (see ReadMe).
# As per rancher docs, it must be always an odd number, never even! See https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/
# For instance, 1 is ok (non-HA), 2 not ok, 3 is ok (becomes HA).
control_plane_count = 3
# For instance, 1 is ok (non-HA), 2 not ok, 3 is ok (becomes HA). It does not matter if they are in the same nodepool or not! So they can be in different locations, and of different types.
# The type of control plane nodes, see https://www.hetzner.com/cloud, the minimum instance supported is cpx11 (just a few cents more than cx11)
control_plane_server_type = "cpx11"
# Of course, you can choose any number of nodepools you want, with the location you want. The only contraint on the location is that you need to stay in the same network region, basically Europe or US, see above.
# For the server type, # The type of control plane nodes, the minimum instance supported is cpx11 (just a few cents more than cx11), see https://www.hetzner.com/cloud.
# As for the agent nodepools, below is just an example, if you do not want nodepools, just use one,
# and change the name to what you want, it need not be "agent-big" or "agent-small", also give them the subnet prefer.
# For single node clusters set this equal to [] or just set the counts to 0.
# IMPORTANT: Once the cluster is created, you can change the count, and even set it to 0, but do not remove a nodepool from the list.
# You can add others at the end of the list if you want.
# For Hetzner locations see https://docs.hetzner.com/general/others/data-centers-and-connection/
# For Hetzner server types see https://www.hetzner.com/cloud
agent_nodepools = [
# IMPORTANT: Once the cluster is created, you can change nodepool count, and even set it to 0 (in the case of the first control-plane nodepool, the minimum is 1),
# You can also rename it (if the count is taken to 0), but do not remove a nodepool from the list after the cluster is created. This is due to how IPs are allocated.
# Once the cluster is initialized, you cannot add more control plane nodepools. You can freely add others agent nodepools the end of the list if you want!
# Also, before decreasing the count of any nodepools to 0, it's important to drain and cordon it the nodes in question, otherwise it will leave your cluster in a bad state.
# Before initializing the cluster, you can change all parameters and add or remove any nodepools.
# If you want to have a single node cluster, just have 1 control plane nodepools with a count of 1, and one agent nodepool with a count of 0.
# Example below:
control_plane_nodepools = [
{
name = "agent-small",
name = "control-plane-fsn1",
server_type = "cpx11",
location = "fsn1",
labels = [],
taints = [],
count = 2
},
{
name = "control-plane-nbg1",
server_type = "cpx11",
location = "nbg1",
labels = [],
taints = [],
count = 1
}
]
agent_nodepools = [
{
name = "agent-small",
server_type = "cpx11",
location = "fsn1",
labels = [],
taints = [],
count = 1
},
{
name = "agent-large",
server_type = "cpx21",
location = "fsn1",
location = "nbg1",
labels = [],
taints = [],
count = 1
@ -66,6 +87,8 @@ agent_nodepools = [
# That will depend on how much load you want it to handle, see https://www.hetzner.com/cloud/load-balancer
load_balancer_type = "lb11"
load_balancer_location = "fsn1"
### The following values are fully optional
# If you want to use a specific Hetzner CCM and CSI version, set them below, otherwise leave as is for the latest versions
@ -84,7 +107,7 @@ load_balancer_type = "lb11"
# metrics_server_enabled = false
# If you want to allow non-control-plane workloads to run on the control-plane nodes set "true" below. The default is "false".
# Also good for single node clusters.
# True by default for single node clusters.
# allow_scheduling_on_control_plane = true
# If you want to disable automatic upgrade of k3s, you can set this to false, default is "true".

View File

@ -20,26 +20,16 @@ variable "additional_public_keys" {
default = []
}
variable "location" {
description = "Default server location"
type = string
}
variable "network_region" {
description = "Default region for network"
type = string
}
variable "control_plane_server_type" {
description = "Default control plane server type"
variable "load_balancer_location" {
description = "Default load balancer location"
type = string
}
variable "control_plane_count" {
description = "Number of control plane nodes."
type = number
}
variable "load_balancer_type" {
description = "Default load balancer server type"
type = string
@ -51,6 +41,12 @@ variable "load_balancer_disable_ipv6" {
default = false
}
variable "control_plane_nodepools" {
description = "Number of control plane nodes."
type = list(any)
default = []
}
variable "agent_nodepools" {
description = "Number of agent nodes."
type = list(any)