diff --git a/.gitignore b/.gitignore index 5f02e99..7d77aef 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,6 @@ kubeconfig.yaml-e terraform.tfvars plans-custom.yaml traefik-custom.yaml -kured-custom.yaml \ No newline at end of file +kured-custom.yaml +kustomization.yaml +kustomization_backup.yaml \ No newline at end of file diff --git a/README.md b/README.md index 0af61ae..e7a8518 100644 --- a/README.md +++ b/README.md @@ -114,25 +114,38 @@ Otherwise, it's essential to turn off automatic OS upgrades (k3s can continue to ## Automatic Upgrade +### The Default Setting + By default, MicroOS gets upgraded automatically on each node and reboot safely via [Kured](https://github.com/weaveworks/kured) installed in the cluster. As for k3s, it also automatically upgrades thanks to Rancher's [system upgrade controller](https://github.com/rancher/system-upgrade-controller). By default, it follows the k3s `stable` channel, but you can also change to the `latest` one if needed or specify a target version to upgrade to via the upgrade plan. You can copy and modify the [one in the templates](https://github.com/kube-hetzner/kube-hetzner/blob/master/templates/plans.yaml.tpl) for that! More on the subject in [k3s upgrades](https://rancher.com/docs/k3s/latest/en/upgrades/basic/). -_If you wish to turn off automatic MicroOS upgrades on a specific node, you need to ssh into it and issue the following command:_ +### Turning Off Automatic Upgrade + +_If you wish to turn off automatic MicroOS upgrades (Important if you are not launching an HA setup which requires at least 3 control-plane nodes), you need to ssh into each node and issue the following command:_ ```sh systemctl --now disable transactional-update.timer ``` -_To turn off k3s upgrades, you can either set the `k3s_upgrade=true` label in the node you want or set it to `false`. To remove it, apply:_ +_To turn off k3s upgrades, you can either remove the `k3s_upgrade=true` label or set it to `false`. This needs to happen for all the nodes too! To remove it, apply:_ ```sh kubectl -n system-upgrade label node k3s_upgrade- ``` +### Individual Components Upgrade + +Rarely needed, but can be handy in the long run. During the installation, we automatically download a backup of the kustomization to a `kustomization_backup.yaml` file. You will find it next to your `kubeconfig.yaml` at the root of your project. + +1. First create a duplicate of that file and name it `kustomization.yaml`, keeping the original file intact, in case you need to restore the old config. +2. Edit the `kustomization.yaml` file; you want to go to the very bottom where you have the links to the different source files; grab the latest versions for each on Github, and replace. +3. If present, remove any local reference to `traefik_config.yaml`, as Traefik is updated automatically by the system upgrade controller. +4. Apply the the updated `kustomization.yaml` with `kubectl apply -k ./`. + ## Examples
diff --git a/init.tf b/init.tf index dd72fbb..54534f6 100644 --- a/init.tf +++ b/init.tf @@ -40,7 +40,7 @@ resource "null_resource" "first_control_plane" { inline = [ "systemctl start k3s", # prepare the post_install directory - "mkdir -p /tmp/post_install", + "mkdir -p /var/post_install", # wait for k3s to become ready <<-EOT timeout 120 bash < { + nodepool_name : nodepool_obj.name, + server_type : nodepool_obj.server_type, + location : nodepool_obj.location, + labels : concat(local.default_control_plane_labels, nodepool_obj.labels), + taints : concat(local.default_control_plane_taints, nodepool_obj.taints), + index : node_index + } + } + ]...) + + agent_nodes = merge([ + for pool_index, nodepool_obj in var.agent_nodepools : { + for node_index in range(nodepool_obj.count) : + format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => { + nodepool_name : nodepool_obj.name, + server_type : nodepool_obj.server_type, + location : nodepool_obj.location, + labels : concat(local.default_agent_labels, nodepool_obj.labels), + taints : nodepool_obj.taints, + index : node_index + } + } + ]...) + + # The main network cidr that all subnets will be created upon + network_ipv4_cidr = "10.0.0.0/8" + + # The first two subnets are respectively the default subnet 10.0.0.0/16 use for potientially anything and 10.1.0.0/16 used for control plane nodes. + # the rest of the subnets are for agent nodes in each nodepools. + network_ipv4_subnets = [for index in range(256) : cidrsubnet(local.network_ipv4_cidr, 8, index)] + + # if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB + control_plane_count = sum([for v in var.control_plane_nodepools : v.count]) + agent_count = sum([for v in var.agent_nodepools : v.count]) + is_single_node_cluster = local.control_plane_count + local.agent_count == 1 + + # disable k3s extras + disable_extras = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"], var.traefik_enabled ? [] : ["traefik"], var.metrics_server_enabled ? [] : ["metrics-server"]) + + # Default k3s node labels + default_agent_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []) + default_control_plane_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []) + + allow_scheduling_on_control_plane = local.is_single_node_cluster ? true : var.allow_scheduling_on_control_plane + + # Default k3s node taints + default_control_plane_taints = concat([], local.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]) + # The following IPs are important to be whitelisted because they communicate with Hetzner services and enable the CCM and CSI to work properly. # Source https://github.com/hetznercloud/csi-driver/issues/204#issuecomment-848625566 hetzner_metadata_service_ipv4 = "169.254.169.254/32" @@ -156,66 +218,4 @@ locals { ] } ]) - - common_commands_install_k3s = [ - "set -ex", - # prepare the k3s config directory - "mkdir -p /etc/rancher/k3s", - # move the config file into place - "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", - # if the server has already been initialized just stop here - "[ -e /etc/rancher/k3s/k3s.yaml ] && exit 0", - ] - - apply_k3s_selinux = ["/sbin/semodule -v -i /usr/share/selinux/packages/k3s.pp"] - - install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux) - install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux) - - control_plane_nodes = merge([ - for pool_index, nodepool_obj in var.control_plane_nodepools : { - for node_index in range(nodepool_obj.count) : - format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => { - nodepool_name : nodepool_obj.name, - server_type : nodepool_obj.server_type, - location : nodepool_obj.location, - labels : concat(local.default_control_plane_labels, nodepool_obj.labels), - taints : concat(local.default_control_plane_taints, nodepool_obj.taints), - index : node_index - } - } - ]...) - - agent_nodes = merge([ - for pool_index, nodepool_obj in var.agent_nodepools : { - for node_index in range(nodepool_obj.count) : - format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => { - nodepool_name : nodepool_obj.name, - server_type : nodepool_obj.server_type, - location : nodepool_obj.location, - labels : concat(local.default_agent_labels, nodepool_obj.labels), - taints : nodepool_obj.taints, - index : node_index - } - } - ]...) - - # The main network cidr that all subnets will be created upon - network_ipv4_cidr = "10.0.0.0/8" - - # The first two subnets are respectively the default subnet 10.0.0.0/16 use for potientially anything and 10.1.0.0/16 used for control plane nodes. - # the rest of the subnets are for agent nodes in each nodepools. - network_ipv4_subnets = [for index in range(256) : cidrsubnet(local.network_ipv4_cidr, 8, index)] - - # disable k3s extras - disable_extras = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"], var.traefik_enabled ? [] : ["traefik"], var.metrics_server_enabled ? [] : ["metrics-server"]) - - # Default k3s node labels - default_agent_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []) - default_control_plane_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : []) - - allow_scheduling_on_control_plane = local.is_single_node_cluster ? true : var.allow_scheduling_on_control_plane - - # Default k3s node taints - default_control_plane_taints = concat([], local.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"]) } diff --git a/terraform.tfvars.example b/terraform.tfvars.example index 8c5e274..f78d929 100644 --- a/terraform.tfvars.example +++ b/terraform.tfvars.example @@ -33,7 +33,7 @@ network_region = "eu-central" # change to `us-east` if location is ash # Also, before decreasing the count of any nodepools to 0, it's essential to drain and cordon the nodes in question. Otherwise, it will leave your cluster in a bad state. # Before initializing the cluster, you can change all parameters and add or remove any nodepools. You need at least one nodepool of each kind, control plane, and agent. -# The nodepool names are entirely arbitrary, you can choose whatever you want, but no special characters or underscore; only alphanumeric characters and dashes are allowed. +# The nodepool names are entirely arbitrary, you can choose whatever you want, but no special characters or underscore, and they must be unique; only alphanumeric characters and dashes are allowed. # If you want to have a single node cluster, have one control plane nodepools with a count of 1, and one agent nodepool with a count of 0. @@ -108,11 +108,13 @@ load_balancer_location = "fsn1" # hetzner_csi_version = "" -# If you want to use letsencrypt with tls Challenge, the email address is used to send you certificates expiration notices +# We give you the possibility to use letsencrypt directly with Traefik because it's an easy setup, however it's not optimal, +# as the free version of Traefik causes a little bit of downtime when when the certificates get renewed. For proper SSL management, +# we instead recommend you to use cert-manager, that you can easily deploy with helm; see https://cert-manager.io/. # traefik_acme_tls = true # traefik_acme_email = "mail@example.com" -# If you want to disable the Traefik ingress controller, you can. Default is "true". +# If you want to disable the Traefik ingress controller, you can can set this to "false". Default is "true". # traefik_enabled = false # If you want to disable the metric server, you can! Default is "true". @@ -162,6 +164,8 @@ load_balancer_location = "fsn1" # If you want to configure a different CNI for k3s, use this flag # possible values: flannel (Default), calico +# Cilium or other would be easy to add, you can mirror how Calico was added. PRs are welcome! +# CAVEATS: Calico is not supported for single node setups, because of the following issue https://github.com/k3s-io/klipper-lb/issues/6. # cni_plugin = "calico" # If you want to disable the k3s default network policy controller, use this flag