added kustomization backup

This commit is contained in:
Karim Naufal 2022-04-14 13:59:05 +02:00
parent 0dd979a763
commit 353a12aba6
No known key found for this signature in database
GPG Key ID: 9CB4A7C28C139CA5
6 changed files with 119 additions and 82 deletions

4
.gitignore vendored
View File

@ -6,4 +6,6 @@ kubeconfig.yaml-e
terraform.tfvars
plans-custom.yaml
traefik-custom.yaml
kured-custom.yaml
kured-custom.yaml
kustomization.yaml
kustomization_backup.yaml

View File

@ -114,25 +114,38 @@ Otherwise, it's essential to turn off automatic OS upgrades (k3s can continue to
## Automatic Upgrade
### The Default Setting
By default, MicroOS gets upgraded automatically on each node and reboot safely via [Kured](https://github.com/weaveworks/kured) installed in the cluster.
As for k3s, it also automatically upgrades thanks to Rancher's [system upgrade controller](https://github.com/rancher/system-upgrade-controller). By default, it follows the k3s `stable` channel, but you can also change to the `latest` one if needed or specify a target version to upgrade to via the upgrade plan.
You can copy and modify the [one in the templates](https://github.com/kube-hetzner/kube-hetzner/blob/master/templates/plans.yaml.tpl) for that! More on the subject in [k3s upgrades](https://rancher.com/docs/k3s/latest/en/upgrades/basic/).
_If you wish to turn off automatic MicroOS upgrades on a specific node, you need to ssh into it and issue the following command:_
### Turning Off Automatic Upgrade
_If you wish to turn off automatic MicroOS upgrades (Important if you are not launching an HA setup which requires at least 3 control-plane nodes), you need to ssh into each node and issue the following command:_
```sh
systemctl --now disable transactional-update.timer
```
_To turn off k3s upgrades, you can either set the `k3s_upgrade=true` label in the node you want or set it to `false`. To remove it, apply:_
_To turn off k3s upgrades, you can either remove the `k3s_upgrade=true` label or set it to `false`. This needs to happen for all the nodes too! To remove it, apply:_
```sh
kubectl -n system-upgrade label node <node-name> k3s_upgrade-
```
### Individual Components Upgrade
Rarely needed, but can be handy in the long run. During the installation, we automatically download a backup of the kustomization to a `kustomization_backup.yaml` file. You will find it next to your `kubeconfig.yaml` at the root of your project.
1. First create a duplicate of that file and name it `kustomization.yaml`, keeping the original file intact, in case you need to restore the old config.
2. Edit the `kustomization.yaml` file; you want to go to the very bottom where you have the links to the different source files; grab the latest versions for each on Github, and replace.
3. If present, remove any local reference to `traefik_config.yaml`, as Traefik is updated automatically by the system upgrade controller.
4. Apply the the updated `kustomization.yaml` with `kubectl apply -k ./`.
## Examples
<details>

16
init.tf
View File

@ -40,7 +40,7 @@ resource "null_resource" "first_control_plane" {
inline = [
"systemctl start k3s",
# prepare the post_install directory
"mkdir -p /tmp/post_install",
"mkdir -p /var/post_install",
# wait for k3s to become ready
<<-EOT
timeout 120 bash <<EOF
@ -85,7 +85,7 @@ resource "null_resource" "kustomization" {
"https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml",
"https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml",
"https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml",
], local.is_single_node_cluster ? [] : var.traefik_enabled ? ["traefik.yaml"] : []
], local.is_single_node_cluster ? [] : var.traefik_enabled ? ["traefik_config.yaml"] : []
, var.cni_plugin == "calico" ? ["https://projectcalico.docs.tigera.io/manifests/calico.yaml"] : []),
patchesStrategicMerge = concat([
file("${path.module}/kustomize/kured.yaml"),
@ -93,7 +93,7 @@ resource "null_resource" "kustomization" {
file("${path.module}/kustomize/system-upgrade-controller.yaml")
], var.cni_plugin == "calico" ? [file("${path.module}/kustomize/calico.yaml")] : [])
})
destination = "/tmp/post_install/kustomization.yaml"
destination = "/var/post_install/kustomization.yaml"
}
# Upload traefik config
@ -109,7 +109,7 @@ resource "null_resource" "kustomization" {
traefik_acme_email = var.traefik_acme_email
traefik_additional_options = var.traefik_additional_options
})
destination = "/tmp/post_install/traefik.yaml"
destination = "/var/post_install/traefik_config.yaml"
}
# Upload the system upgrade controller plans config
@ -119,7 +119,7 @@ resource "null_resource" "kustomization" {
{
channel = var.initial_k3s_channel
})
destination = "/tmp/post_install/plans.yaml"
destination = "/var/post_install/plans.yaml"
}
# Deploy secrets, logging is automatically disabled due to sensitive variables
@ -143,11 +143,11 @@ resource "null_resource" "kustomization" {
# replace lines like "- |3" by "- |" (yaml block syntax).
# due to indendation this should not changes the embedded
# manifests themselves
"sed -i 's/^- |[0-9]\\+$/- |/g' /tmp/post_install/kustomization.yaml",
"kubectl apply -k /tmp/post_install",
"sed -i 's/^- |[0-9]\\+$/- |/g' /var/post_install/kustomization.yaml",
"kubectl apply -k /var/post_install",
"echo 'Waiting for the system-upgrade-controller deployment to become available...'",
"kubectl -n system-upgrade wait --for=condition=available --timeout=120s deployment/system-upgrade-controller",
"kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml"
"kubectl -n system-upgrade apply -f /var/post_install/plans.yaml"
],
local.is_single_node_cluster || var.traefik_enabled == false ? [] : [<<-EOT
timeout 120 bash <<EOF

18
kustomization_backup.tf Normal file
View File

@ -0,0 +1,18 @@
data "remote_file" "kustomization_backup" {
conn {
host = module.control_planes[keys(module.control_planes)[0]].ipv4_address
port = 22
user = "root"
private_key = local.ssh_private_key
agent = var.private_key == null
}
path = "/var/post_install/kustomization.yaml"
depends_on = [null_resource.kustomization]
}
resource "local_file" "kustomization_backup" {
content = data.remote_file.kustomization_backup.content
filename = "kustomization_backup.yaml"
file_permission = "600"
}

136
locals.tf
View File

@ -1,10 +1,5 @@
locals {
# if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB
total_node_count = sum(concat([for v in var.control_plane_nodepools : v.count], [0])) + sum(concat([for v in var.agent_nodepools : v.count], [0]))
control_plane_count = sum(concat([for v in var.control_plane_nodepools : v.count], [0]))
agent_count = sum(concat([for v in var.agent_nodepools : v.count], [0]))
is_single_node_cluster = local.total_node_count == 1
ssh_public_key = trimspace(file(var.public_key))
ssh_public_key = trimspace(file(var.public_key))
# ssh_private_key is either the contents of var.private_key or null to use a ssh agent.
ssh_private_key = var.private_key == null ? null : trimspace(file(var.private_key))
# ssh_identity is not set if the private key is passed directly, but if ssh agent is used, the public key tells ssh agent which private key to use.
@ -20,6 +15,73 @@ locals {
csi_version = var.hetzner_csi_version != null ? var.hetzner_csi_version : data.github_release.hetzner_csi.release_tag
kured_version = data.github_release.kured.release_tag
common_commands_install_k3s = [
"set -ex",
# prepare the k3s config directory
"mkdir -p /etc/rancher/k3s",
# move the config file into place
"mv /tmp/config.yaml /etc/rancher/k3s/config.yaml",
# if the server has already been initialized just stop here
"[ -e /etc/rancher/k3s/k3s.yaml ] && exit 0",
]
apply_k3s_selinux = ["/sbin/semodule -v -i /usr/share/selinux/packages/k3s.pp"]
install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux)
install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux)
control_plane_nodes = merge([
for pool_index, nodepool_obj in var.control_plane_nodepools : {
for node_index in range(nodepool_obj.count) :
format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => {
nodepool_name : nodepool_obj.name,
server_type : nodepool_obj.server_type,
location : nodepool_obj.location,
labels : concat(local.default_control_plane_labels, nodepool_obj.labels),
taints : concat(local.default_control_plane_taints, nodepool_obj.taints),
index : node_index
}
}
]...)
agent_nodes = merge([
for pool_index, nodepool_obj in var.agent_nodepools : {
for node_index in range(nodepool_obj.count) :
format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => {
nodepool_name : nodepool_obj.name,
server_type : nodepool_obj.server_type,
location : nodepool_obj.location,
labels : concat(local.default_agent_labels, nodepool_obj.labels),
taints : nodepool_obj.taints,
index : node_index
}
}
]...)
# The main network cidr that all subnets will be created upon
network_ipv4_cidr = "10.0.0.0/8"
# The first two subnets are respectively the default subnet 10.0.0.0/16 use for potientially anything and 10.1.0.0/16 used for control plane nodes.
# the rest of the subnets are for agent nodes in each nodepools.
network_ipv4_subnets = [for index in range(256) : cidrsubnet(local.network_ipv4_cidr, 8, index)]
# if we are in a single cluster config, we use the default klipper lb instead of Hetzner LB
control_plane_count = sum([for v in var.control_plane_nodepools : v.count])
agent_count = sum([for v in var.agent_nodepools : v.count])
is_single_node_cluster = local.control_plane_count + local.agent_count == 1
# disable k3s extras
disable_extras = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"], var.traefik_enabled ? [] : ["traefik"], var.metrics_server_enabled ? [] : ["metrics-server"])
# Default k3s node labels
default_agent_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
default_control_plane_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
allow_scheduling_on_control_plane = local.is_single_node_cluster ? true : var.allow_scheduling_on_control_plane
# Default k3s node taints
default_control_plane_taints = concat([], local.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"])
# The following IPs are important to be whitelisted because they communicate with Hetzner services and enable the CCM and CSI to work properly.
# Source https://github.com/hetznercloud/csi-driver/issues/204#issuecomment-848625566
hetzner_metadata_service_ipv4 = "169.254.169.254/32"
@ -156,66 +218,4 @@ locals {
]
}
])
common_commands_install_k3s = [
"set -ex",
# prepare the k3s config directory
"mkdir -p /etc/rancher/k3s",
# move the config file into place
"mv /tmp/config.yaml /etc/rancher/k3s/config.yaml",
# if the server has already been initialized just stop here
"[ -e /etc/rancher/k3s/k3s.yaml ] && exit 0",
]
apply_k3s_selinux = ["/sbin/semodule -v -i /usr/share/selinux/packages/k3s.pp"]
install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=server sh -"], local.apply_k3s_selinux)
install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_SKIP_SELINUX_RPM=true INSTALL_K3S_CHANNEL=${var.initial_k3s_channel} INSTALL_K3S_EXEC=agent sh -"], local.apply_k3s_selinux)
control_plane_nodes = merge([
for pool_index, nodepool_obj in var.control_plane_nodepools : {
for node_index in range(nodepool_obj.count) :
format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => {
nodepool_name : nodepool_obj.name,
server_type : nodepool_obj.server_type,
location : nodepool_obj.location,
labels : concat(local.default_control_plane_labels, nodepool_obj.labels),
taints : concat(local.default_control_plane_taints, nodepool_obj.taints),
index : node_index
}
}
]...)
agent_nodes = merge([
for pool_index, nodepool_obj in var.agent_nodepools : {
for node_index in range(nodepool_obj.count) :
format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => {
nodepool_name : nodepool_obj.name,
server_type : nodepool_obj.server_type,
location : nodepool_obj.location,
labels : concat(local.default_agent_labels, nodepool_obj.labels),
taints : nodepool_obj.taints,
index : node_index
}
}
]...)
# The main network cidr that all subnets will be created upon
network_ipv4_cidr = "10.0.0.0/8"
# The first two subnets are respectively the default subnet 10.0.0.0/16 use for potientially anything and 10.1.0.0/16 used for control plane nodes.
# the rest of the subnets are for agent nodes in each nodepools.
network_ipv4_subnets = [for index in range(256) : cidrsubnet(local.network_ipv4_cidr, 8, index)]
# disable k3s extras
disable_extras = concat(["local-storage"], local.is_single_node_cluster ? [] : ["servicelb"], var.traefik_enabled ? [] : ["traefik"], var.metrics_server_enabled ? [] : ["metrics-server"])
# Default k3s node labels
default_agent_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
default_control_plane_labels = concat([], var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [])
allow_scheduling_on_control_plane = local.is_single_node_cluster ? true : var.allow_scheduling_on_control_plane
# Default k3s node taints
default_control_plane_taints = concat([], local.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"])
}

View File

@ -33,7 +33,7 @@ network_region = "eu-central" # change to `us-east` if location is ash
# Also, before decreasing the count of any nodepools to 0, it's essential to drain and cordon the nodes in question. Otherwise, it will leave your cluster in a bad state.
# Before initializing the cluster, you can change all parameters and add or remove any nodepools. You need at least one nodepool of each kind, control plane, and agent.
# The nodepool names are entirely arbitrary, you can choose whatever you want, but no special characters or underscore; only alphanumeric characters and dashes are allowed.
# The nodepool names are entirely arbitrary, you can choose whatever you want, but no special characters or underscore, and they must be unique; only alphanumeric characters and dashes are allowed.
# If you want to have a single node cluster, have one control plane nodepools with a count of 1, and one agent nodepool with a count of 0.
@ -108,11 +108,13 @@ load_balancer_location = "fsn1"
# hetzner_csi_version = ""
# If you want to use letsencrypt with tls Challenge, the email address is used to send you certificates expiration notices
# We give you the possibility to use letsencrypt directly with Traefik because it's an easy setup, however it's not optimal,
# as the free version of Traefik causes a little bit of downtime when when the certificates get renewed. For proper SSL management,
# we instead recommend you to use cert-manager, that you can easily deploy with helm; see https://cert-manager.io/.
# traefik_acme_tls = true
# traefik_acme_email = "mail@example.com"
# If you want to disable the Traefik ingress controller, you can. Default is "true".
# If you want to disable the Traefik ingress controller, you can can set this to "false". Default is "true".
# traefik_enabled = false
# If you want to disable the metric server, you can! Default is "true".
@ -162,6 +164,8 @@ load_balancer_location = "fsn1"
# If you want to configure a different CNI for k3s, use this flag
# possible values: flannel (Default), calico
# Cilium or other would be easy to add, you can mirror how Calico was added. PRs are welcome!
# CAVEATS: Calico is not supported for single node setups, because of the following issue https://github.com/k3s-io/klipper-lb/issues/6.
# cni_plugin = "calico"
# If you want to disable the k3s default network policy controller, use this flag