From 7ac37f0ddf88366ef1d38be1c3e7463a439f4af8 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Tue, 15 Feb 2022 23:54:55 +0100 Subject: [PATCH 01/16] k3s-install init --- agents.tf => temp/agents.tf | 0 output.tf => temp/output.tf | 0 servers.tf => temp/servers.tf | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename agents.tf => temp/agents.tf (100%) rename output.tf => temp/output.tf (100%) rename servers.tf => temp/servers.tf (100%) diff --git a/agents.tf b/temp/agents.tf similarity index 100% rename from agents.tf rename to temp/agents.tf diff --git a/output.tf b/temp/output.tf similarity index 100% rename from output.tf rename to temp/output.tf diff --git a/servers.tf b/temp/servers.tf similarity index 100% rename from servers.tf rename to temp/servers.tf From 1f0c825b234b5cf5a8b2a081c1b1cc9b7ebedd3d Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 00:13:02 +0100 Subject: [PATCH 02/16] switch image to standard microos --- locals.tf | 4 ++-- main.tf | 4 ++-- master.tf | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/locals.tf b/locals.tf index 4cb851a..479cb3d 100644 --- a/locals.tf +++ b/locals.tf @@ -21,8 +21,8 @@ locals { MicroOS_install_commands = [ "set -ex", "apt-get install -y aria2", - "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-k3s-kvm-and-xen.qcow2.meta4", - "qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*k3s.*qcow2$') /dev/sda", + "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4", + "qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda", "sgdisk -e /dev/sda", "parted -s /dev/sda resizepart 4 99%", "parted -s /dev/sda mkpart primary ext2 99% 100%", diff --git a/main.tf b/main.tf index 230d9d5..ccf919b 100644 --- a/main.tf +++ b/main.tf @@ -153,8 +153,8 @@ resource "hcloud_placement_group" "k3s" { } } -data "hcloud_load_balancer" "traefik" { +/* data "hcloud_load_balancer" "traefik" { name = "traefik" depends_on = [hcloud_server.agents[0]] -} +} */ diff --git a/master.tf b/master.tf index 48a1d4c..6d62863 100644 --- a/master.tf +++ b/master.tf @@ -67,7 +67,7 @@ resource "hcloud_server" "first_control_plane" { destination = "/etc/rancher/k3s/config.yaml" } - # Run the first control plane + /* # Run the first control plane provisioner "remote-exec" { inline = [ # set the hostname in a persistent fashion @@ -154,7 +154,7 @@ resource "hcloud_server" "first_control_plane" { "sed -i 's/^- |[0-9]\\+$/- |/g' /tmp/post_install/kustomization.yaml", "kubectl apply -k /tmp/post_install", ] - } + } */ network { network_id = hcloud_network.k3s.id From fec695086ad9edc4d9bf53d7c1d06b27945f6962 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 03:18:40 +0100 Subject: [PATCH 03/16] k3s-install ready for testing --- temp/agents.tf => agents.tf | 70 ++++++++++++++++++++++------------- locals.tf | 14 ++++++- master.tf | 66 +++++++++++++++++++++++---------- temp/output.tf => output.tf | 0 temp/servers.tf => servers.tf | 46 ++++++++++++++--------- templates/agent.conf.tpl | 3 -- templates/plans.yaml.tpl | 50 +++++++++++++++++++++++++ terraform.tfvars.example | 9 +++++ variables.tf | 13 +++++++ 9 files changed, 205 insertions(+), 66 deletions(-) rename temp/agents.tf => agents.tf (60%) rename temp/output.tf => output.tf (100%) rename temp/servers.tf => servers.tf (72%) delete mode 100644 templates/agent.conf.tpl create mode 100644 templates/plans.yaml.tpl diff --git a/temp/agents.tf b/agents.tf similarity index 60% rename from temp/agents.tf rename to agents.tf index 9b107d3..31a26c3 100644 --- a/temp/agents.tf +++ b/agents.tf @@ -33,15 +33,13 @@ resource "hcloud_server" "agents" { # Install MicroOS provisioner "remote-exec" { - inline = local.MicroOS_install_commands + inline = local.microOS_install_commands } - # Issue a reboot command + # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" } - - # Wait for MicroOS to reboot and be ready provisioner "local-exec" { command = <<-EOT until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null @@ -52,16 +50,6 @@ resource "hcloud_server" "agents" { EOT } - - # Generating and uploading the agent.conf file - provisioner "file" { - content = templatefile("${path.module}/templates/agent.conf.tpl", { - server = "https://${local.first_control_plane_network_ip}:6443" - token = random_password.k3s_token.result - }) - destination = "/etc/rancher/k3s/agent.conf" - } - # Generating k3s agent config file provisioner "file" { content = yamlencode({ @@ -69,30 +57,60 @@ resource "hcloud_server" "agents" { kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) - destination = "/etc/rancher/k3s/config.yaml" + destination = "/tmp/config.yaml" } - # Run the agent + # Install k3s agent provisioner "remote-exec" { inline = [ - # set the hostname in a persistent fashion - "hostnamectl set-hostname ${self.name}", + "set -ex", # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # then we start k3s agent and join the cluster - "systemctl enable k3s-agent", + # prepare the k3s config directory + "mkdir -p /etc/rancher/k3s", + # move the config file into place + "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", + # install k3s <<-EOT - until systemctl status k3s-agent > /dev/null - do - systemctl start k3s-agent - echo "Starting k3s-agent and joining the cluster..." - sleep 2 - done + INSTALL_K3S_SKIP_START=true \ + K3S_URL=${"https://${local.first_control_plane_network_ip}:6443"} \ + K3S_TOKEN=${random_password.k3s_token.result} \ + curl -sfL https://get.k3s.io | sh - EOT ] } + # Issue a reboot command and wait for the node to reboot + provisioner "local-exec" { + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + } + provisioner "local-exec" { + command = <<-EOT + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 2 + done + EOT + } + + # Upon reboot verify that k3s agent starts correctly + provisioner "remote-exec" { + inline = [ + <<-EOT + timeout 120 bash < /dev/null; do + echo "Waiting for the k3s agent to start..." + sleep 1 + done + EOF + EOT + ] + } + + network { network_id = hcloud_network.k3s.id ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) diff --git a/locals.tf b/locals.tf index 479cb3d..68ac33c 100644 --- a/locals.tf +++ b/locals.tf @@ -18,7 +18,7 @@ locals { csi_version = var.hetzner_csi_version != null ? var.hetzner_csi_version : data.github_release.hetzner_csi.release_tag kured_version = data.github_release.kured.release_tag - MicroOS_install_commands = [ + microOS_install_commands = [ "set -ex", "apt-get install -y aria2", "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4", @@ -34,4 +34,16 @@ locals { "cp /root/config.ign /mnt/ignition/config.ign", "umount /mnt" ] + + install_k3s_server = [ + "set -ex", + # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured + "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", + # prepare the k3s config directory + "mkdir -p /etc/rancher/k3s", + # move the config file into place + "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", + # install k3s + "INSTALL_K3S_SKIP_START=true curl -sfL https://get.k3s.io | sh -", + ] } diff --git a/master.tf b/master.tf index 6d62863..6eb8991 100644 --- a/master.tf +++ b/master.tf @@ -31,15 +31,13 @@ resource "hcloud_server" "first_control_plane" { # Install MicroOS provisioner "remote-exec" { - inline = local.MicroOS_install_commands + inline = local.microOS_install_commands } - # Issue a reboot command + # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" } - - # Wait for MicroOS to reboot and be ready provisioner "local-exec" { command = <<-EOT until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null @@ -63,27 +61,42 @@ resource "hcloud_server" "first_control_plane" { advertise-address = local.first_control_plane_network_ip token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) - destination = "/etc/rancher/k3s/config.yaml" + destination = "/tmp/config.yaml" } - /* # Run the first control plane + + + # Install k3s server + provisioner "remote-exec" { + inline = local.install_k3s_server + } + + # Issue a reboot command and wait for the node to reboot + provisioner "local-exec" { + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + } + provisioner "local-exec" { + command = <<-EOT + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 2 + done + EOT + } + + # Upon reboot verify that the k3s server is starts, and wait for k3s to be ready to receive commands provisioner "remote-exec" { inline = [ - # set the hostname in a persistent fashion - "hostnamectl set-hostname ${self.name}", - # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured - "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # prepare a directory for our post-installation kustomizations + # prepare the post_install directory "mkdir -p /tmp/post_install", - # then we initiate the cluster - "systemctl enable k3s-server", - # wait for k3s to get ready + # wait for k3s to become ready <<-EOT timeout 120 bash < /dev/null; do - systemctl start k3s-server - echo "Initiating the cluster..." + until systemctl status k3s > /dev/null; do + echo "Waiting for the k3s server to start..." sleep 1 done until [ -e /etc/rancher/k3s/k3s.yaml ]; do @@ -108,7 +121,8 @@ resource "hcloud_server" "first_control_plane" { "https://github.com/hetznercloud/hcloud-cloud-controller-manager/releases/download/${local.ccm_version}/ccm-networks.yaml", "https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml", "https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml", - "./traefik.yaml" + "https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml", + "./traefik.yaml", ] patchesStrategicMerge = [ file("${path.module}/patches/kured.yaml"), @@ -132,9 +146,20 @@ resource "hcloud_server" "first_control_plane" { destination = "/tmp/post_install/traefik.yaml" } + # Upload the system upgrade controller plans config + provisioner "file" { + content = templatefile( + "${path.module}/templates/plans.yaml.tpl", + { + channel = var.k3s_upgrade_channel + }) + destination = "/tmp/post_install/plans.yaml" + } + # Deploy secrets, logging is automatically disabled due to sensitive variables provisioner "remote-exec" { inline = [ + "set -ex", "kubectl -n kube-system create secret generic hcloud --from-literal=token=${var.hcloud_token} --from-literal=network=${hcloud_network.k3s.name}", "kubectl -n kube-system create secret generic hcloud-csi --from-literal=token=${var.hcloud_token}", ] @@ -143,6 +168,7 @@ resource "hcloud_server" "first_control_plane" { # Deploy our post-installation kustomization provisioner "remote-exec" { inline = [ + "set -ex", # This ugly hack is here, because terraform serializes the # embedded yaml files with "- |2", when there is more than # one yamldocument in the embedded file. Kustomize does not understand @@ -153,8 +179,10 @@ resource "hcloud_server" "first_control_plane" { # manifests themselves "sed -i 's/^- |[0-9]\\+$/- |/g' /tmp/post_install/kustomization.yaml", "kubectl apply -k /tmp/post_install", + "echo 'Waiting for the system-upgrade-controller deployment to become available...' && kubectl -n system-upgrade wait --for=condition=available --timeout=300s deployment/system-upgrade-controller", + "kubectl apply -f /tmp/post_install/plans.yaml" ] - } */ + } network { network_id = hcloud_network.k3s.id diff --git a/temp/output.tf b/output.tf similarity index 100% rename from temp/output.tf rename to output.tf diff --git a/temp/servers.tf b/servers.tf similarity index 72% rename from temp/servers.tf rename to servers.tf index 5f1d82c..b3835e9 100644 --- a/temp/servers.tf +++ b/servers.tf @@ -32,15 +32,13 @@ resource "hcloud_server" "control_planes" { # Install MicroOS provisioner "remote-exec" { - inline = local.MicroOS_install_commands + inline = local.microOS_install_commands } - # Issue a reboot command + # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" } - - # Wait for MicroOS to reboot and be ready provisioner "local-exec" { command = <<-EOT until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null @@ -66,26 +64,40 @@ resource "hcloud_server" "control_planes" { tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) - destination = "/etc/rancher/k3s/config.yaml" + destination = "/tmp/config.yaml" } - # Run an other control plane server + # Install k3s server + provisioner "remote-exec" { + inline = local.install_k3s_server + } + + # Issue a reboot command and wait for the node to reboot + provisioner "local-exec" { + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + } + provisioner "local-exec" { + command = <<-EOT + until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null + do + echo "Waiting for MicroOS to reboot and become available..." + sleep 2 + done + EOT + } + + # Upon reboot verify that the k3s server starts correctly provisioner "remote-exec" { inline = [ - # set the hostname in a persistent fashion - "hostnamectl set-hostname ${self.name}", - # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured - "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # then then we start k3s in server mode and join the cluster - "systemctl enable k3s-server", <<-EOT - until systemctl status k3s-server > /dev/null - do - systemctl start k3s-server - echo "Waiting on other 'learning' control planes, patience is the mother of all virtues..." - sleep 2 + timeout 120 bash < /dev/null; do + echo "Waiting for the k3s server to start..." + sleep 1 done + EOF EOT ] } diff --git a/templates/agent.conf.tpl b/templates/agent.conf.tpl deleted file mode 100644 index fad0449..0000000 --- a/templates/agent.conf.tpl +++ /dev/null @@ -1,3 +0,0 @@ -SERVER_URL="${server}" -NODE_TOKEN="${token}" -AGENT_OPTS="" diff --git a/templates/plans.yaml.tpl b/templates/plans.yaml.tpl new file mode 100644 index 0000000..337aa59 --- /dev/null +++ b/templates/plans.yaml.tpl @@ -0,0 +1,50 @@ +# Doc: https://rancher.com/docs/k3s/latest/en/upgrades/automated/ +# agent plan +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: k3s-agent + namespace: system-upgrade + labels: + k3s_upgrade: agent +spec: + concurrency: 1 + channel: https://update.k3s.io/v1-release/channels/${channel} + nodeSelector: + matchExpressions: + - {key: k3s_upgrade, operator: Exists} + - {key: k3s_upgrade, operator: NotIn, values: ["disabled", "false"]} + - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]} + serviceAccountName: system-upgrade + prepare: + image: rancher/k3s-upgrade + args: ["prepare", "k3s-server"] + drain: + force: true + skipWaitForDeleteTimeout: 60 + upgrade: + image: rancher/k3s-upgrade +--- +# server plan +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: k3s-server + namespace: system-upgrade + labels: + k3s_upgrade: server +spec: + concurrency: 1 + channel: https://update.k3s.io/v1-release/channels/${channel} + nodeSelector: + matchExpressions: + - {key: k3s_upgrade, operator: Exists} + - {key: k3s_upgrade, operator: NotIn, values: ["disabled", "false"]} + - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]} + tolerations: + - {key: node-role.kubernetes.io/master, effect: NoSchedule, operator: Exists} + - {key: CriticalAddonsOnly, effect: NoExecute, operator: Exists} + serviceAccountName: system-upgrade + cordon: true + upgrade: + image: rancher/k3s-upgrade \ No newline at end of file diff --git a/terraform.tfvars.example b/terraform.tfvars.example index d0bd3a1..d2876d1 100644 --- a/terraform.tfvars.example +++ b/terraform.tfvars.example @@ -29,3 +29,12 @@ agents_num = 2 # If you want to allow non-control-plane workloads to run on the control-plane nodes set "true" below. The default is "false". # allow_scheduling_on_control_plane = true + +# If you want to disable automatic upgrade of k3s (stable channel), you can set this to false, default is "true". +# automatically_upgrade_k3s = false + +# If you would like to specify the k3s upgrade channel from the get go, you can do so, the default is "stable". +# For a list of available channels, see https://rancher.com/docs/k3s/latest/en/upgrades/basic/ and https://update.k3s.io/v1-release/channels +# k3s_upgrade_channel = "latest" + + diff --git a/variables.tf b/variables.tf index e8734df..b8ed1d1 100644 --- a/variables.tf +++ b/variables.tf @@ -84,3 +84,16 @@ variable "allow_scheduling_on_control_plane" { default = false description = "Whether to allow non-control-plane workloads to run on the control-plane nodes" } + +variable "k3s_upgrade_channel" { + type = string + default = "stable" + description = "Allows you to specify the k3s upgrade channel" +} + +variable "automatically_upgrade_k3s" { + type = bool + default = true + description = "Whether to automatically upgrade k3s based on the selected channel" +} + From d9201326948d8d0b0e3071af66b60349be773705 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 04:24:20 +0100 Subject: [PATCH 04/16] k3s install method ok --- agents.tf | 21 ++++----------------- locals.tf | 16 ++++++++++++++-- main.tf | 4 ++-- master.tf | 9 +++++---- servers.tf | 4 ++-- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/agents.tf b/agents.tf index 31a26c3..c21cdf0 100644 --- a/agents.tf +++ b/agents.tf @@ -54,6 +54,8 @@ resource "hcloud_server" "agents" { provisioner "file" { content = yamlencode({ node-name = self.name + server = "https://${local.first_control_plane_network_ip}:6443" + token = random_password.k3s_token.result kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) @@ -64,22 +66,7 @@ resource "hcloud_server" "agents" { # Install k3s agent provisioner "remote-exec" { - inline = [ - "set -ex", - # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured - "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # prepare the k3s config directory - "mkdir -p /etc/rancher/k3s", - # move the config file into place - "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", - # install k3s - <<-EOT - INSTALL_K3S_SKIP_START=true \ - K3S_URL=${"https://${local.first_control_plane_network_ip}:6443"} \ - K3S_TOKEN=${random_password.k3s_token.result} \ - curl -sfL https://get.k3s.io | sh - - EOT - ] + inline = local.install_k3s_agent } # Issue a reboot command and wait for the node to reboot @@ -103,7 +90,7 @@ resource "hcloud_server" "agents" { timeout 120 bash < /dev/null; do echo "Waiting for the k3s agent to start..." - sleep 1 + sleep 2 done EOF EOT diff --git a/locals.tf b/locals.tf index 68ac33c..b6085bc 100644 --- a/locals.tf +++ b/locals.tf @@ -43,7 +43,19 @@ locals { "mkdir -p /etc/rancher/k3s", # move the config file into place "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", - # install k3s - "INSTALL_K3S_SKIP_START=true curl -sfL https://get.k3s.io | sh -", + # install k3s server + "curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC=server sh -", + ] + + install_k3s_agent = [ + "set -ex", + # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured + "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", + # prepare the k3s config directory + "mkdir -p /etc/rancher/k3s", + # move the config file into place + "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", + # install k3s server + "curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC=agent sh -", ] } diff --git a/main.tf b/main.tf index ccf919b..230d9d5 100644 --- a/main.tf +++ b/main.tf @@ -153,8 +153,8 @@ resource "hcloud_placement_group" "k3s" { } } -/* data "hcloud_load_balancer" "traefik" { +data "hcloud_load_balancer" "traefik" { name = "traefik" depends_on = [hcloud_server.agents[0]] -} */ +} diff --git a/master.tf b/master.tf index 6eb8991..7aeeda0 100644 --- a/master.tf +++ b/master.tf @@ -97,15 +97,15 @@ resource "hcloud_server" "first_control_plane" { timeout 120 bash < /dev/null; do echo "Waiting for the k3s server to start..." - sleep 1 + sleep 2 done until [ -e /etc/rancher/k3s/k3s.yaml ]; do echo "Waiting for kubectl config..." - sleep 1 + sleep 2 done until [[ "\$(kubectl get --raw='/readyz' 2> /dev/null)" == "ok" ]]; do echo "Waiting for the cluster to become ready..." - sleep 1 + sleep 2 done EOF EOT @@ -179,7 +179,8 @@ resource "hcloud_server" "first_control_plane" { # manifests themselves "sed -i 's/^- |[0-9]\\+$/- |/g' /tmp/post_install/kustomization.yaml", "kubectl apply -k /tmp/post_install", - "echo 'Waiting for the system-upgrade-controller deployment to become available...' && kubectl -n system-upgrade wait --for=condition=available --timeout=300s deployment/system-upgrade-controller", + "echo 'Waiting for the system-upgrade-controller deployment to become available...'", + "kubectl -n system-upgrade wait --for=condition=available --timeout=300s deployment/system-upgrade-controller", "kubectl apply -f /tmp/post_install/plans.yaml" ] } diff --git a/servers.tf b/servers.tf index b3835e9..7092847 100644 --- a/servers.tf +++ b/servers.tf @@ -54,6 +54,7 @@ resource "hcloud_server" "control_planes" { content = yamlencode({ node-name = self.name server = "https://${local.first_control_plane_network_ip}:6443" + token = random_password.k3s_token.result cluster-init = true disable-cloud-controller = true disable = "servicelb, local-storage" @@ -62,7 +63,6 @@ resource "hcloud_server" "control_planes" { node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) advertise-address = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) - token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) @@ -95,7 +95,7 @@ resource "hcloud_server" "control_planes" { timeout 120 bash < /dev/null; do echo "Waiting for the k3s server to start..." - sleep 1 + sleep 2 done EOF EOT From dcf9af0e3c1880ad1a2db95addb13d96822f7511 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 05:01:13 +0100 Subject: [PATCH 05/16] k3s install method ok --- master.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/master.tf b/master.tf index 7aeeda0..9e46956 100644 --- a/master.tf +++ b/master.tf @@ -180,7 +180,7 @@ resource "hcloud_server" "first_control_plane" { "sed -i 's/^- |[0-9]\\+$/- |/g' /tmp/post_install/kustomization.yaml", "kubectl apply -k /tmp/post_install", "echo 'Waiting for the system-upgrade-controller deployment to become available...'", - "kubectl -n system-upgrade wait --for=condition=available --timeout=300s deployment/system-upgrade-controller", + "kubectl -n system-upgrade wait --for=condition=available --timeout=120s deployment/system-upgrade-controller", "kubectl apply -f /tmp/post_install/plans.yaml" ] } From bffc921ed87ee824ddde4a26acb0e66cce423673 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 05:01:53 +0100 Subject: [PATCH 06/16] k3s install method ok --- master.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/master.tf b/master.tf index 9e46956..19272fd 100644 --- a/master.tf +++ b/master.tf @@ -181,7 +181,7 @@ resource "hcloud_server" "first_control_plane" { "kubectl apply -k /tmp/post_install", "echo 'Waiting for the system-upgrade-controller deployment to become available...'", "kubectl -n system-upgrade wait --for=condition=available --timeout=120s deployment/system-upgrade-controller", - "kubectl apply -f /tmp/post_install/plans.yaml" + "kubectl -n system-upgrade apply -f /tmp/post_install/plans.yaml" ] } From addf44c39fc9e1f20e18b320d1d5fd1fc8dae1d9 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 05:27:52 +0100 Subject: [PATCH 07/16] k3s install method ok --- locals.tf | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/locals.tf b/locals.tf index b6085bc..e9a90ac 100644 --- a/locals.tf +++ b/locals.tf @@ -35,27 +35,17 @@ locals { "umount /mnt" ] - install_k3s_server = [ + common_commands_install_k3s = [ "set -ex", # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", # prepare the k3s config directory "mkdir -p /etc/rancher/k3s", # move the config file into place - "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", - # install k3s server - "curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC=server sh -", + "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml" ] - install_k3s_agent = [ - "set -ex", - # first we disable automatic reboot (after transactional updates), and configure the reboot method as kured - "rebootmgrctl set-strategy off && echo 'REBOOT_METHOD=kured' > /etc/transactional-update.conf", - # prepare the k3s config directory - "mkdir -p /etc/rancher/k3s", - # move the config file into place - "mv /tmp/config.yaml /etc/rancher/k3s/config.yaml", - # install k3s server - "curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC=agent sh -", - ] + install_k3s_server = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC=server sh -"]) + + install_k3s_agent = concat(local.common_commands_install_k3s, ["curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_START=true INSTALL_K3S_EXEC=agent sh -"]) } From bcdc5603a368ea53868dcb412f2fae04957e58c2 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 05:30:09 +0100 Subject: [PATCH 08/16] k3s install method ok --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3ec4b3d..6dad3b4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,6 @@ crash.log kubeconfig.yaml kubeconfig.yaml-e -terraform.tfvars \ No newline at end of file +terraform.tfvars +plans.yaml +traefik_config.yaml \ No newline at end of file From 4aabee0dadd458fdd72c6bbeb9fbd2bbcf34973d Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 09:23:28 +0100 Subject: [PATCH 09/16] fix the upgrade node label --- agents.tf | 2 +- master.tf | 2 +- servers.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/agents.tf b/agents.tf index c21cdf0..0242a0d 100644 --- a/agents.tf +++ b/agents.tf @@ -59,7 +59,7 @@ resource "hcloud_server" "agents" { kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) - node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] + node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] }) destination = "/tmp/config.yaml" } diff --git a/master.tf b/master.tf index 19272fd..d1ec8b7 100644 --- a/master.tf +++ b/master.tf @@ -61,7 +61,7 @@ resource "hcloud_server" "first_control_plane" { advertise-address = local.first_control_plane_network_ip token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] - node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] + node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] }) destination = "/tmp/config.yaml" } diff --git a/servers.tf b/servers.tf index 7092847..65294c2 100644 --- a/servers.tf +++ b/servers.tf @@ -64,7 +64,7 @@ resource "hcloud_server" "control_planes" { advertise-address = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] - node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] + node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] }) destination = "/tmp/config.yaml" } From 1165389fc3e71561a639b41fbc0be7b4f95099ce Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 10:56:22 +0100 Subject: [PATCH 10/16] fix ip_not_available error --- agents.tf | 4 ++-- locals.tf | 2 +- servers.tf | 8 ++++---- templates/traefik_config.yaml.tpl | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/agents.tf b/agents.tf index 0242a0d..58f3af5 100644 --- a/agents.tf +++ b/agents.tf @@ -58,7 +58,7 @@ resource "hcloud_server" "agents" { token = random_password.k3s_token.result kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" - node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) + node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 513 + count.index) node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] }) destination = "/tmp/config.yaml" @@ -100,7 +100,7 @@ resource "hcloud_server" "agents" { network { network_id = hcloud_network.k3s.id - ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257 + count.index) + ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 513 + count.index) } depends_on = [ diff --git a/locals.tf b/locals.tf index e9a90ac..6afe266 100644 --- a/locals.tf +++ b/locals.tf @@ -1,5 +1,5 @@ locals { - first_control_plane_network_ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 2) + first_control_plane_network_ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 257) hcloud_image_name = "ubuntu-20.04" ssh_public_key = trimspace(file(var.public_key)) diff --git a/servers.tf b/servers.tf index 65294c2..f85a617 100644 --- a/servers.tf +++ b/servers.tf @@ -60,9 +60,9 @@ resource "hcloud_server" "control_planes" { disable = "servicelb, local-storage" flannel-iface = "eth1" kubelet-arg = "cloud-provider=external" - node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) - advertise-address = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) - tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) + node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 258 + count.index) + advertise-address = cidrhost(hcloud_network_subnet.k3s.ip_range, 258 + count.index) + tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 258 + count.index) node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] }) @@ -104,7 +104,7 @@ resource "hcloud_server" "control_planes" { network { network_id = hcloud_network.k3s.id - ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 3 + count.index) + ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 258 + count.index) } depends_on = [ diff --git a/templates/traefik_config.yaml.tpl b/templates/traefik_config.yaml.tpl index 62e9c26..f8156f8 100644 --- a/templates/traefik_config.yaml.tpl +++ b/templates/traefik_config.yaml.tpl @@ -10,9 +10,9 @@ spec: type: LoadBalancer annotations: "load-balancer.hetzner.cloud/name": "traefik" - # make hetzners load-balancer connect to our nodes via our private k3s-net. + # make hetzners load-balancer connect to our nodes via our private k3s "load-balancer.hetzner.cloud/use-private-ip": "true" - # keep hetzner-ccm from exposing our private ingress ip, which in general isn't routeable from the public internet. + # keep hetzner-ccm from exposing our private ingress ip, which in general isn't routeable from the public internet "load-balancer.hetzner.cloud/disable-private-ingress": "true" # disable ipv6 by default, because external-dns doesn't support AAAA for hcloud yet https://github.com/kubernetes-sigs/external-dns/issues/2044 "load-balancer.hetzner.cloud/ipv6-disabled": "${lb_disable_ipv6}" From 9fa2fc9edc3bdc0fde17cce7b108c23725006073 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 11:06:47 +0100 Subject: [PATCH 11/16] fix ip_not_available error --- agents.tf | 2 +- master.tf | 2 +- servers.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/agents.tf b/agents.tf index 58f3af5..46a57c0 100644 --- a/agents.tf +++ b/agents.tf @@ -59,7 +59,7 @@ resource "hcloud_server" "agents" { kubelet-arg = "cloud-provider=external" flannel-iface = "eth1" node-ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 513 + count.index) - node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) destination = "/tmp/config.yaml" } diff --git a/master.tf b/master.tf index d1ec8b7..19272fd 100644 --- a/master.tf +++ b/master.tf @@ -61,7 +61,7 @@ resource "hcloud_server" "first_control_plane" { advertise-address = local.first_control_plane_network_ip token = random_password.k3s_token.result node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] - node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) destination = "/tmp/config.yaml" } diff --git a/servers.tf b/servers.tf index f85a617..0db1ea5 100644 --- a/servers.tf +++ b/servers.tf @@ -64,7 +64,7 @@ resource "hcloud_server" "control_planes" { advertise-address = cidrhost(hcloud_network_subnet.k3s.ip_range, 258 + count.index) tls-san = cidrhost(hcloud_network_subnet.k3s.ip_range, 258 + count.index) node-taint = var.allow_scheduling_on_control_plane ? [] : ["node-role.kubernetes.io/master:NoSchedule"] - node-label = var.automatically_upgrade_k3s ? ["k3s-upgrade=true"] : [] + node-label = var.automatically_upgrade_k3s ? ["k3s_upgrade=true"] : [] }) destination = "/tmp/config.yaml" } From f59225bf9cd86067f5b06718179777fa45e5ee71 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Wed, 16 Feb 2022 12:12:05 +0100 Subject: [PATCH 12/16] updated readme for k3s install --- README.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index f9a2a77..d4c0581 100644 --- a/README.md +++ b/README.md @@ -128,20 +128,26 @@ By default, we have 3 control planes configured and 2 agents, with automatic upg **But if you want to remain HA, it's important to keep a number of control planes nodes of at least 3 (2 to maintain quorum when 1 goes down for automated upgrades and reboot for instance), see [Rancher's doc on HA](https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/).** -Otherwise, it's important to turn off automatic upgrades (see below) and reboots for the control-plane nodes (2 or less), and do the maintenance yourself. +Otherwise, it's important to turn off automatic upgrades (see below) for the control-plane nodes (2 or less), and do the maintenance yourself. ## Automatic upgrade -By default, MicroOS and its embedded k3s instance get upgraded automatically on each node, and reboot safely via [Kured](https://github.com/weaveworks/kured) installed in the cluster. +By default, MicroOS gets upgraded automatically on each node, and reboot safely via [Kured](https://github.com/weaveworks/kured) installed in the cluster. -_About [Kured](https://github.com/weaveworks/kured), it does not have a latest tag present for its image, but it's pretty compatible, so you can just manually update the tag from once every year for instance._ +As for k3s it is also automatically upgrades thanks to Rancher's . By default it follows the k3s `stable` channel, but you can also change to `latest` one if needed, or specify a target version to upgrade to via the upgrade plan. You can copy and modify the one in the templates for that! More on the subject in [k3s upgrades basic](https://rancher.com/docs/k3s/latest/en/upgrades/basic/). -_Last but not least, if you wish to turn off automatic upgrade on a specific node, you need to ssh into it and issue the following command:_ +_If you wish to turn off automatic MicroOS upgrades on a specific node, you need to ssh into it and issue the following command:_ ```sh systemctl --now disable transactional-update.timer ``` +_To turn off k3s upgrades, you can either set the `k3s_upgrade=true` label in the node you want, or set it to `false`. To just remove it, apply:_ + +```sh +kubectl -n system-upgrade label node k3s_upgrade- +``` + ## Takedown If you want to takedown the cluster, you can proceed as follows: From b12bc56b095625aa74a8a8531969843462a2b0bd Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Thu, 17 Feb 2022 09:44:18 +0100 Subject: [PATCH 13/16] Make sure apt indexes are up-to-date in recovery environment --- agents.tf | 1 - locals.tf | 1 + terraform.tfvars.example | 2 -- variables.tf | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/agents.tf b/agents.tf index 46a57c0..276d5e3 100644 --- a/agents.tf +++ b/agents.tf @@ -97,7 +97,6 @@ resource "hcloud_server" "agents" { ] } - network { network_id = hcloud_network.k3s.id ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 513 + count.index) diff --git a/locals.tf b/locals.tf index 6afe266..acb0d21 100644 --- a/locals.tf +++ b/locals.tf @@ -20,6 +20,7 @@ locals { microOS_install_commands = [ "set -ex", + "apt-get update", "apt-get install -y aria2", "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4", "qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda", diff --git a/terraform.tfvars.example b/terraform.tfvars.example index d2876d1..323955c 100644 --- a/terraform.tfvars.example +++ b/terraform.tfvars.example @@ -36,5 +36,3 @@ agents_num = 2 # If you would like to specify the k3s upgrade channel from the get go, you can do so, the default is "stable". # For a list of available channels, see https://rancher.com/docs/k3s/latest/en/upgrades/basic/ and https://update.k3s.io/v1-release/channels # k3s_upgrade_channel = "latest" - - diff --git a/variables.tf b/variables.tf index b8ed1d1..e23c2bc 100644 --- a/variables.tf +++ b/variables.tf @@ -96,4 +96,3 @@ variable "automatically_upgrade_k3s" { default = true description = "Whether to automatically upgrade k3s based on the selected channel" } - From 1f0a03b33532c452b2b5a82beb0c120bbe05dbc6 Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Thu, 17 Feb 2022 13:19:21 +0100 Subject: [PATCH 14/16] moved k3s-selinux install to combustion --- agents.tf | 21 ++++++--------------- locals.tf | 16 ++++++++++++++-- main.tf | 2 ++ master.tf | 23 +++++++---------------- output.tf | 2 ++ servers.tf | 21 +++++++-------------- 6 files changed, 38 insertions(+), 47 deletions(-) diff --git a/agents.tf b/agents.tf index 46a57c0..f83962f 100644 --- a/agents.tf +++ b/agents.tf @@ -31,6 +31,12 @@ resource "hcloud_server" "agents" { destination = "/root/config.ign" } + # Combustion script file to install k3s-selinux + provisioner "file" { + content = local.combustion_script + destination = "/root/script" + } + # Install MicroOS provisioner "remote-exec" { inline = local.microOS_install_commands @@ -69,20 +75,6 @@ resource "hcloud_server" "agents" { inline = local.install_k3s_agent } - # Issue a reboot command and wait for the node to reboot - provisioner "local-exec" { - command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" - } - provisioner "local-exec" { - command = <<-EOT - until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null - do - echo "Waiting for MicroOS to reboot and become available..." - sleep 2 - done - EOT - } - # Upon reboot verify that k3s agent starts correctly provisioner "remote-exec" { inline = [ @@ -97,7 +89,6 @@ resource "hcloud_server" "agents" { ] } - network { network_id = hcloud_network.k3s.id ip = cidrhost(hcloud_network_subnet.k3s.ip_range, 513 + count.index) diff --git a/locals.tf b/locals.tf index 6afe266..6c5a399 100644 --- a/locals.tf +++ b/locals.tf @@ -20,6 +20,7 @@ locals { microOS_install_commands = [ "set -ex", + "apt-get update", "apt-get install -y aria2", "aria2c --follow-metalink=mem https://download.opensuse.org/tumbleweed/appliances/openSUSE-MicroOS.x86_64-kvm-and-xen.qcow2.meta4", "qemu-img convert -p -f qcow2 -O host_device $(ls -a | grep -ie '^opensuse.*microos.*qcow2$') /dev/sda", @@ -32,9 +33,20 @@ locals { "mount /dev/sda5 /mnt", "mkdir /mnt/ignition", "cp /root/config.ign /mnt/ignition/config.ign", + "mkdir /mnt/combustion", + "cp /root/script /mnt/combustion/script", "umount /mnt" ] + combustion_script = < /dev/null - do - echo "Waiting for MicroOS to reboot and become available..." - sleep 2 - done - EOT - } - # Upon reboot verify that the k3s server is starts, and wait for k3s to be ready to receive commands provisioner "remote-exec" { inline = [ + "systemctl start k3s", # prepare the post_install directory "mkdir -p /tmp/post_install", # wait for k3s to become ready diff --git a/output.tf b/output.tf index 310a0dd..330d587 100644 --- a/output.tf +++ b/output.tf @@ -8,10 +8,12 @@ output "agents_public_ip" { description = "The public IP addresses of the agent server." } +/* output "load_balancer_public_ip" { description = "The public IPv4 address of the Hetzner load balancer" value = data.hcloud_load_balancer.traefik.ipv4 } +*/ output "kubeconfig_file" { value = local.kubeconfig_external diff --git a/servers.tf b/servers.tf index 0db1ea5..7e918d9 100644 --- a/servers.tf +++ b/servers.tf @@ -30,6 +30,12 @@ resource "hcloud_server" "control_planes" { destination = "/root/config.ign" } + # Combustion script file to install k3s-selinux + provisioner "file" { + content = local.combustion_script + destination = "/root/script" + } + # Install MicroOS provisioner "remote-exec" { inline = local.microOS_install_commands @@ -74,23 +80,10 @@ resource "hcloud_server" "control_planes" { inline = local.install_k3s_server } - # Issue a reboot command and wait for the node to reboot - provisioner "local-exec" { - command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" - } - provisioner "local-exec" { - command = <<-EOT - until ssh ${local.ssh_args} -o ConnectTimeout=2 root@${self.ipv4_address} true 2> /dev/null - do - echo "Waiting for MicroOS to reboot and become available..." - sleep 2 - done - EOT - } - # Upon reboot verify that the k3s server starts correctly provisioner "remote-exec" { inline = [ + "systemctl start k3s", <<-EOT timeout 120 bash < /dev/null; do From 74829deaa71a8b7a41c6e80fabc595078e643a77 Mon Sep 17 00:00:00 2001 From: Marco Nenciarini Date: Thu, 17 Feb 2022 15:18:01 +0100 Subject: [PATCH 15/16] Fix system upgrade controller certificates --- master.tf | 5 +++-- patches/system-upgrade-controller.yaml | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 patches/system-upgrade-controller.yaml diff --git a/master.tf b/master.tf index d2303f4..003d5dd 100644 --- a/master.tf +++ b/master.tf @@ -113,11 +113,12 @@ resource "hcloud_server" "first_control_plane" { "https://raw.githubusercontent.com/hetznercloud/csi-driver/${local.csi_version}/deploy/kubernetes/hcloud-csi.yml", "https://github.com/weaveworks/kured/releases/download/${local.kured_version}/kured-${local.kured_version}-dockerhub.yaml", "https://raw.githubusercontent.com/rancher/system-upgrade-controller/master/manifests/system-upgrade-controller.yaml", - "./traefik.yaml", + "traefik.yaml", ] patchesStrategicMerge = [ file("${path.module}/patches/kured.yaml"), - file("${path.module}/patches/ccm.yaml") + file("${path.module}/patches/ccm.yaml"), + file("${path.module}/patches/system-upgrade-controller.yaml") ] }) destination = "/tmp/post_install/kustomization.yaml" diff --git a/patches/system-upgrade-controller.yaml b/patches/system-upgrade-controller.yaml new file mode 100644 index 0000000..fc904de --- /dev/null +++ b/patches/system-upgrade-controller.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: system-upgrade-controller + namespace: system-upgrade +spec: + template: + spec: + containers: + - name: system-upgrade-controller + volumeMounts: + - name: ca-certificates + mountPath: /var/lib/ca-certificates + volumes: + - name: ca-certificates + hostPath: + path: /var/lib/ca-certificates + type: Directory From cb5aa2410d41c5ede2511282b30418124f0fc87b Mon Sep 17 00:00:00 2001 From: Karim Naufal Date: Thu, 17 Feb 2022 21:49:03 +0100 Subject: [PATCH 16/16] adjust ssh check loop time from sleep 3 to 5, to reduce the number of outputs --- agents.tf | 2 +- master.tf | 2 +- servers.tf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/agents.tf b/agents.tf index f83962f..9faed3b 100644 --- a/agents.tf +++ b/agents.tf @@ -44,7 +44,7 @@ resource "hcloud_server" "agents" { # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { - command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 5" } provisioner "local-exec" { command = <<-EOT diff --git a/master.tf b/master.tf index 003d5dd..a0f695b 100644 --- a/master.tf +++ b/master.tf @@ -42,7 +42,7 @@ resource "hcloud_server" "first_control_plane" { # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { - command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 5" } provisioner "local-exec" { command = <<-EOT diff --git a/servers.tf b/servers.tf index 7e918d9..99a076d 100644 --- a/servers.tf +++ b/servers.tf @@ -43,7 +43,7 @@ resource "hcloud_server" "control_planes" { # Issue a reboot command and wait for the node to reboot provisioner "local-exec" { - command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 3" + command = "ssh ${local.ssh_args} root@${self.ipv4_address} '(sleep 2; reboot)&'; sleep 5" } provisioner "local-exec" { command = <<-EOT