Merge pull request #308090 from mogeko/service-rke2

nixos/rke2: add rke2 service
This commit is contained in:
Jonas Chevalier 2024-05-28 09:36:03 +02:00 committed by GitHub
commit 5d6d058646
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 619 additions and 2 deletions

@ -205,6 +205,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m
- [your_spotify](https://github.com/Yooooomi/your_spotify), a self hosted Spotify tracking dashboard. Available as [services.your_spotify](#opt-services.your_spotify.enable)
- [RKE2](https://github.com/rancher/rke2), also known as RKE Government, is Rancher's next-generation Kubernetes distribution. Available as [services.rke2](#opt-services.rke2.enable).
## Backward Incompatibilities {#sec-release-24.05-incompatibilities}
<!-- To avoid merge conflicts, consider adding your item at an arbitrary place in the list instead. -->

@ -418,6 +418,7 @@
./services/cluster/kubernetes/scheduler.nix
./services/cluster/pacemaker/default.nix
./services/cluster/patroni/default.nix
./services/cluster/rke2/default.nix
./services/cluster/spark/default.nix
./services/computing/boinc/client.nix
./services/computing/foldingathome/client.nix

@ -0,0 +1,311 @@
{ config, lib, pkgs, ... }:
with lib;
let
cfg = config.services.rke2;
in
{
imports = [ ];
options.services.rke2 = {
enable = mkEnableOption "rke2";
package = mkPackageOption pkgs "rke2" { };
role = mkOption {
type = types.enum [ "server" "agent" ];
description = ''
Whether rke2 should run as a server or agent.
If it's a server:
- By default it also runs workloads as an agent.
- any optionals is allowed.
If it's an agent:
- `serverAddr` is required.
- `token` or `tokenFile` is required.
- `agentToken` or `agentTokenFile` or `disable` or `cni` are not allowed.
'';
default = "server";
};
configPath = mkOption {
type = types.path;
description = "Load configuration from FILE.";
default = "/etc/rancher/rke2/config.yaml";
};
debug = mkOption {
type = types.bool;
description = "Turn on debug logs.";
default = false;
};
dataDir = mkOption {
type = types.path;
description = "The folder to hold state in.";
default = "/var/lib/rancher/rke2";
};
token = mkOption {
type = types.str;
description = ''
Shared secret used to join a server or agent to a cluster.
> WARNING: This option will expose store your token unencrypted world-readable in the nix store.
If this is undesired use the `tokenFile` option instead.
'';
default = "";
};
tokenFile = mkOption {
type = types.nullOr types.path;
description = "File path containing rke2 token to use when connecting to the server.";
default = null;
};
disable = mkOption {
type = types.listOf types.str;
description = "Do not deploy packaged components and delete any deployed components.";
default = [ ];
};
nodeName = mkOption {
type = types.nullOr types.str;
description = "Node name.";
default = null;
};
nodeLabel = mkOption {
type = types.listOf types.str;
description = "Registering and starting kubelet with set of labels.";
default = [ ];
};
nodeTaint = mkOption {
type = types.listOf types.str;
description = "Registering kubelet with set of taints.";
default = [ ];
};
nodeIP = mkOption {
type = types.nullOr types.str;
description = "IPv4/IPv6 addresses to advertise for node.";
default = null;
};
agentToken = mkOption {
type = types.str;
description = ''
Shared secret used to join agents to the cluster, but not servers.
> **WARNING**: This option will expose store your token unencrypted world-readable in the nix store.
If this is undesired use the `agentTokenFile` option instead.
'';
default = "";
};
agentTokenFile = mkOption {
type = types.nullOr types.path;
description = "File path containing rke2 agent token to use when connecting to the server.";
default = null;
};
serverAddr = mkOption {
type = types.str;
description = "The rke2 server to connect to, used to join a cluster.";
example = "https://10.0.0.10:6443";
default = "";
};
selinux = mkOption {
type = types.bool;
description = "Enable SELinux in containerd.";
default = false;
};
cni = mkOption {
type = types.enum [ "none" "canal" "cilium" "calico" "flannel" ];
description = ''
CNI Plugins to deploy, one of `none`, `calico`, `canal`, `cilium` or `flannel`.
All CNI plugins get installed via a helm chart after the main components are up and running
and can be [customized by modifying the helm chart options](https://docs.rke2.io/helm).
[Learn more about RKE2 and CNI plugins](https://docs.rke2.io/networking/basic_network_options)
> **WARNING**: Flannel support in RKE2 is currently experimental.
'';
default = "canal";
};
cisHardening = mkOption {
type = types.bool;
description = ''
Enable CIS Hardening for RKE2.
It will set the configurations and controls required to address Kubernetes benchmark controls
from the Center for Internet Security (CIS).
Learn more about [CIS Hardening for RKE2](https://docs.rke2.io/security/hardening_guide).
> **NOTICE**:
>
> You may need restart the `systemd-sysctl` muaually by:
>
> ```shell
> sudo systemctl restart systemd-sysctl
> ```
'';
default = false;
};
extraFlags = mkOption {
type = types.listOf types.str;
description = ''
Extra flags to pass to the rke2 service/agent.
Here you can find all the available flags:
- [Server Configuration Reference](https://docs.rke2.io/reference/server_config)
- [Agent Configuration Reference](https://docs.rke2.io/reference/linux_agent_config)
'';
example = [ "--disable-kube-proxy" "--cluster-cidr=10.24.0.0/16" ];
default = [ ];
};
environmentVars = mkOption {
type = types.attrsOf types.str;
description = ''
Environment variables for configuring the rke2 service/agent.
Here you can find all the available environment variables:
- [Server Configuration Reference](https://docs.rke2.io/reference/server_config)
- [Agent Configuration Reference](https://docs.rke2.io/reference/linux_agent_config)
Besides the options above, you can also active environment variables by edit/create those files:
- `/etc/default/rke2`
- `/etc/sysconfig/rke2`
- `/usr/local/lib/systemd/system/rke2.env`
'';
# See: https://github.com/rancher/rke2/blob/master/bundle/lib/systemd/system/rke2-server.env#L1
default = {
HOME = "/root";
};
};
};
config = mkIf cfg.enable {
assertions = [
{
assertion = cfg.role == "agent" -> (builtins.pathExists cfg.configPath || cfg.serverAddr != "");
message = "serverAddr or configPath (with 'server' key) should be set if role is 'agent'";
}
{
assertion = cfg.role == "agent" -> (builtins.pathExists cfg.configPath || cfg.tokenFile != null || cfg.token != "");
message = "token or tokenFile or configPath (with 'token' or 'token-file' keys) should be set if role is 'agent'";
}
{
assertion = cfg.role == "agent" -> ! (cfg.agentTokenFile != null || cfg.agentToken != "");
message = "agentToken or agentTokenFile should be set if role is 'agent'";
}
{
assertion = cfg.role == "agent" -> ! (cfg.disable != [ ]);
message = "disable should not be set if role is 'agent'";
}
{
assertion = cfg.role == "agent" -> ! (cfg.cni != "canal");
message = "cni should not be set if role is 'agent'";
}
];
environment.systemPackages = [ config.services.rke2.package ];
# To configure NetworkManager to ignore calico/flannel related network interfaces.
# See: https://docs.rke2.io/known_issues#networkmanager
environment.etc."NetworkManager/conf.d/rke2-canal.conf" = {
enable = config.networking.networkmanager.enable;
text = ''
[keyfile]
unmanaged-devices=interface-name:cali*;interface-name:flannel*
'';
};
# See: https://docs.rke2.io/security/hardening_guide#set-kernel-parameters
boot.kernel.sysctl = mkIf cfg.cisHardening {
"vm.panic_on_oom" = 0;
"vm.overcommit_memory" = 1;
"kernel.panic" = 10;
"kernel.panic_on_oops" = 1;
};
systemd.services.rke2 = {
description = "Rancher Kubernetes Engine v2";
documentation = [ "https://github.com/rancher/rke2#readme" ];
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = if cfg.role == "agent" then "exec" else "notify";
EnvironmentFile = [
"-/etc/default/%N"
"-/etc/sysconfig/%N"
"-/usr/local/lib/systemd/system/%N.env"
];
Environment = mapAttrsToList (k: v: "${k}=${v}") cfg.environmentVars;
KillMode = "process";
Delegate = "yes";
LimitNOFILE = 1048576;
LimitNPROC = "infinity";
LimitCORE = "infinity";
TasksMax = "infinity";
TimeoutStartSec = 0;
Restart = "always";
RestartSec = "5s";
ExecStartPre = [
# There is a conflict between RKE2 and `nm-cloud-setup.service`. This service add a routing table that
# interfere with the CNI plugin's configuration. This script checks if the service is enabled and if so,
# failed the RKE2 start.
# See: https://github.com/rancher/rke2/issues/1053
(pkgs.writeScript "check-nm-cloud-setup.sh" ''
#! ${pkgs.runtimeShell}
set -x
! /run/current-system/systemd/bin/systemctl is-enabled --quiet nm-cloud-setup.service
'')
"-${pkgs.kmod}/bin/modprobe br_netfilter"
"-${pkgs.kmod}/bin/modprobe overlay"
];
ExecStart = "${cfg.package}/bin/rke2 '${cfg.role}' ${escapeShellArgs (
(optional (cfg.configPath != "/etc/rancher/rke2/config.yaml") "--config=${cfg.configPath}")
++ (optional cfg.debug "--debug")
++ (optional (cfg.dataDir != "/var/lib/rancher/rke2") "--data-dir=${cfg.dataDir}")
++ (optional (cfg.token != "") "--token=${cfg.token}")
++ (optional (cfg.tokenFile != null) "--token-file=${cfg.tokenFile}")
++ (optionals (cfg.role == "server" && cfg.disable != [ ]) (map (d: "--disable=${d}") cfg.disable))
++ (optional (cfg.nodeName != null) "--node-name=${cfg.nodeName}")
++ (optionals (cfg.nodeLabel != [ ]) (map (l: "--node-label=${l}") cfg.nodeLabel))
++ (optionals (cfg.nodeTaint != [ ]) (map (t: "--node-taint=${t}") cfg.nodeTaint))
++ (optional (cfg.nodeIP != null) "--node-ip=${cfg.nodeIP}")
++ (optional (cfg.role == "server" && cfg.agentToken != "") "--agent-token=${cfg.agentToken}")
++ (optional (cfg.role == "server" && cfg.agentTokenFile != null) "--agent-token-file=${cfg.agentTokenFile}")
++ (optional (cfg.serverAddr != "") "--server=${cfg.serverAddr}")
++ (optional cfg.selinux "--selinux")
++ (optional (cfg.role == "server" && cfg.cni != "canal") "--cni=${cfg.cni}")
++ (optional cfg.cisHardening "--profile=${if cfg.package.version >= "1.25" then "cis-1.23" else "cis-1.6"}")
++ cfg.extraFlags
)}";
ExecStopPost = let
killProcess = pkgs.writeScript "kill-process.sh" ''
#! ${pkgs.runtimeShell}
/run/current-system/systemd/bin/systemd-cgls /system.slice/$1 | \
${pkgs.gnugrep}/bin/grep -Eo '[0-9]+ (containerd|kubelet)' | \
${pkgs.gawk}/bin/awk '{print $1}' | \
${pkgs.findutils}/bin/xargs -r ${pkgs.util-linux}/bin/kill
'';
in "-${killProcess} %n";
};
};
};
}

@ -800,6 +800,7 @@ in {
restic-rest-server = handleTest ./restic-rest-server.nix {};
restic = handleTest ./restic.nix {};
retroarch = handleTest ./retroarch.nix {};
rke2 = handleTestOn ["aarch64-linux" "x86_64-linux"] ./rke2 {};
rkvm = handleTest ./rkvm {};
robustirc-bridge = handleTest ./robustirc-bridge.nix {};
roundcube = handleTest ./roundcube.nix {};

@ -0,0 +1,13 @@
{ system ? builtins.currentSystem
, pkgs ? import ../../.. { inherit system; }
, lib ? pkgs.lib
}:
let
allRKE2 = lib.filterAttrs (n: _: lib.strings.hasPrefix "rke2" n) pkgs;
in
{
# Run a single node rke2 cluster and verify a pod can run
singleNode = lib.mapAttrs (_: rke2: import ./single-node.nix { inherit system pkgs rke2; }) allRKE2;
# Run a multi-node rke2 cluster and verify pod networking works across nodes
multiNode = lib.mapAttrs (_: rke2: import ./multi-node.nix { inherit system pkgs rke2; }) allRKE2;
}

@ -0,0 +1,176 @@
import ../make-test-python.nix ({ pkgs, lib, rke2, ... }:
let
pauseImage = pkgs.dockerTools.streamLayeredImage {
name = "test.local/pause";
tag = "local";
contents = pkgs.buildEnv {
name = "rke2-pause-image-env";
paths = with pkgs; [ tini bashInteractive coreutils socat ];
};
config.Entrypoint = [ "/bin/tini" "--" "/bin/sleep" "inf" ];
};
# A daemonset that responds 'server' on port 8000
networkTestDaemonset = pkgs.writeText "test.yml" ''
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: test
labels:
name: test
spec:
selector:
matchLabels:
name: test
template:
metadata:
labels:
name: test
spec:
containers:
- name: test
image: test.local/pause:local
imagePullPolicy: Never
resources:
limits:
memory: 20Mi
command: ["socat", "TCP4-LISTEN:8000,fork", "EXEC:echo server"]
'';
tokenFile = pkgs.writeText "token" "p@s$w0rd";
agentTokenFile = pkgs.writeText "agent-token" "p@s$w0rd";
in
{
name = "${rke2.name}-multi-node";
meta.maintainers = rke2.meta.maintainers;
nodes = {
server1 = { pkgs, ... }: {
networking.firewall.enable = false;
networking.useDHCP = false;
networking.defaultGateway = "192.168.1.1";
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
{ address = "192.168.1.1"; prefixLength = 24; }
];
virtualisation.memorySize = 1536;
virtualisation.diskSize = 4096;
services.rke2 = {
enable = true;
role = "server";
inherit tokenFile;
inherit agentTokenFile;
nodeName = "${rke2.name}-server1";
package = rke2;
nodeIP = "192.168.1.1";
disable = [
"rke2-coredns"
"rke2-metrics-server"
"rke2-ingress-nginx"
];
extraFlags = [
"--cluster-reset"
];
};
};
server2 = { pkgs, ... }: {
networking.firewall.enable = false;
networking.useDHCP = false;
networking.defaultGateway = "192.168.1.2";
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
{ address = "192.168.1.2"; prefixLength = 24; }
];
virtualisation.memorySize = 1536;
virtualisation.diskSize = 4096;
services.rke2 = {
enable = true;
role = "server";
serverAddr = "https://192.168.1.1:6443";
inherit tokenFile;
inherit agentTokenFile;
nodeName = "${rke2.name}-server2";
package = rke2;
nodeIP = "192.168.1.2";
disable = [
"rke2-coredns"
"rke2-metrics-server"
"rke2-ingress-nginx"
];
};
};
agent1 = { pkgs, ... }: {
networking.firewall.enable = false;
networking.useDHCP = false;
networking.defaultGateway = "192.168.1.3";
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
{ address = "192.168.1.3"; prefixLength = 24; }
];
virtualisation.memorySize = 1536;
virtualisation.diskSize = 4096;
services.rke2 = {
enable = true;
role = "agent";
tokenFile = agentTokenFile;
serverAddr = "https://192.168.1.2:6443";
nodeName = "${rke2.name}-agent1";
package = rke2;
nodeIP = "192.168.1.3";
};
};
};
testScript = let
kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml";
ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock";
jq = "${pkgs.jq}/bin/jq";
ping = "${pkgs.iputils}/bin/ping";
in ''
machines = [server1, server2, agent1]
for machine in machines:
machine.start()
machine.wait_for_unit("rke2")
# wait for the agent to show up
server1.succeed("${kubectl} get node ${rke2.name}-agent1")
for machine in machines:
machine.succeed("${pauseImage} | ${ctr} image import -")
server1.succeed("${kubectl} cluster-info")
server1.wait_until_succeeds("${kubectl} get serviceaccount default")
# Now create a pod on each node via a daemonset and verify they can talk to each other.
server1.succeed("${kubectl} apply -f ${networkTestDaemonset}")
server1.wait_until_succeeds(
f'[ "$(${kubectl} get ds test -o json | ${jq} .status.numberReady)" -eq {len(machines)} ]'
)
# Get pod IPs
pods = server1.succeed("${kubectl} get po -o json | ${jq} '.items[].metadata.name' -r").splitlines()
pod_ips = [
server1.succeed(f"${kubectl} get po {n} -o json | ${jq} '.status.podIP' -cr").strip() for n in pods
]
# Verify each server can ping each pod ip
for pod_ip in pod_ips:
server1.succeed(f"${ping} -c 1 {pod_ip}")
agent1.succeed(f"${ping} -c 1 {pod_ip}")
# Verify the pods can talk to each other
resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[0]} -- socat TCP:{pod_ips[1]}:8000 -")
assert resp.strip() == "server"
resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[1]} -- socat TCP:{pod_ips[0]}:8000 -")
assert resp.strip() == "server"
# Cleanup
server1.succeed("${kubectl} delete -f ${networkTestDaemonset}")
for machine in machines:
machine.shutdown()
'';
})

@ -0,0 +1,75 @@
import ../make-test-python.nix ({ pkgs, lib, rke2, ... }:
let
pauseImage = pkgs.dockerTools.streamLayeredImage {
name = "test.local/pause";
tag = "local";
contents = pkgs.buildEnv {
name = "rke2-pause-image-env";
paths = with pkgs; [ tini (hiPrio coreutils) busybox ];
};
config.Entrypoint = [ "/bin/tini" "--" "/bin/sleep" "inf" ];
};
testPodYaml = pkgs.writeText "test.yaml" ''
apiVersion: v1
kind: Pod
metadata:
name: test
spec:
containers:
- name: test
image: test.local/pause:local
imagePullPolicy: Never
command: ["sh", "-c", "sleep inf"]
'';
in
{
name = "${rke2.name}-single-node";
meta.maintainers = rke2.meta.maintainers;
nodes.machine = { pkgs, ... }: {
networking.firewall.enable = false;
networking.useDHCP = false;
networking.defaultGateway = "192.168.1.1";
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
{ address = "192.168.1.1"; prefixLength = 24; }
];
virtualisation.memorySize = 1536;
virtualisation.diskSize = 4096;
services.rke2 = {
enable = true;
role = "server";
package = rke2;
nodeIP = "192.168.1.1";
disable = [
"rke2-coredns"
"rke2-metrics-server"
"rke2-ingress-nginx"
];
extraFlags = [
"--cluster-reset"
];
};
};
testScript = let
kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml";
ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock";
in ''
start_all()
machine.wait_for_unit("rke2")
machine.succeed("${kubectl} cluster-info")
machine.wait_until_succeeds(
"${pauseImage} | ${ctr} -n k8s.io image import -"
)
machine.wait_until_succeeds("${kubectl} get serviceaccount default")
machine.succeed("${kubectl} apply -f ${testPodYaml}")
machine.succeed("${kubectl} wait --for 'condition=Ready' pod/test")
machine.succeed("${kubectl} delete -f ${testPodYaml}")
machine.shutdown()
'';
})

@ -1,4 +1,11 @@
{ lib, stdenv, buildGoModule, fetchFromGitHub }:
{ lib, stdenv, buildGoModule, fetchFromGitHub, makeWrapper, nix-update-script
# Runtime dependencies
, procps, coreutils, util-linux, ethtool, socat, iptables, bridge-utils, iproute2, kmod, lvm2
# Testing dependencies
, nixosTests, testers, rke2
}:
buildGoModule rec {
pname = "rke2";
@ -21,14 +28,45 @@ buildGoModule rec {
sed -e 's/STATIC_FLAGS=.*/STATIC_FLAGS=/g' -i scripts/build-binary
'';
nativeBuildInputs = [ makeWrapper ];
# Important utilities used by the kubelet.
# See: https://github.com/kubernetes/kubernetes/issues/26093#issuecomment-237202494
# Notice the list in that issue is stale, but as a redundancy reservation.
buildInputs = [
procps # pidof pkill
coreutils # uname touch env nice du
util-linux # lsblk fsck mkfs nsenter mount umount
ethtool # ethtool
socat # socat
iptables # iptables iptables-restore iptables-save
bridge-utils # brctl
iproute2 # ip tc
kmod # modprobe
lvm2 # dmsetup
];
buildPhase = ''
DRONE_TAG="v${version}" ./scripts/build-binary
'';
installPhase = ''
install -D ./bin/rke2 $out/bin/rke2
wrapProgram $out/bin/rke2 \
--prefix PATH : ${lib.makeBinPath buildInputs}
'';
passthru.updateScript = nix-update-script { };
passthru.tests = {
version = testers.testVersion {
package = rke2;
version = "v${version}";
};
} // lib.optionalAttrs stdenv.isLinux {
inherit (nixosTests) rke2;
};
meta = with lib; {
homepage = "https://github.com/rancher/rke2";
description = "RKE2, also known as RKE Government, is Rancher's next-generation Kubernetes distribution.";
@ -36,6 +74,6 @@ buildGoModule rec {
license = licenses.asl20;
maintainers = with maintainers; [ zimbatm zygot ];
mainProgram = "rke2";
broken = stdenv.isDarwin;
platforms = platforms.linux;
};
}