k3d#
rancher/k3d

常用命令#
k3d cluster list
kubectl config get-contexts
alias kctx='kubectl config use-context'
# 安装 k3d
wget -q -O - https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash
# https://hub.docker.com/r/rancher/k3s/tags
cat << 'EOF' > k3d-config-cluster-1.yaml
apiVersion: k3d.io/v1alpha5
kind: Simple
metadata:
name: cluster-1
image: rancher/k3s:v1.35.1-k3s1
servers: 1
agents: 2
registries:
config: |
mirrors:
"docker.io":
endpoint:
- https://hub.kingye.me
EOF
k3d cluster create --config k3d-config-cluster-1.yaml --verbose
安装配置#
GPU#
k3s-gpu#
# 为 Docker daemon 配置代理(永久生效)
sudo mkdir -p /etc/systemd/system/docker.service.d
sudo tee /etc/systemd/system/docker.service.d/http-proxy.conf > /dev/null <<'EOF'
[Service]
Environment="HTTP_PROXY=http://218.16.121.13:1080"
Environment="HTTPS_PROXY=http://218.16.121.13:1080"
Environment="NO_PROXY=localhost,127.0.0.1"
EOF
sudo systemctl daemon-reload
sudo systemctl restart docker
# https://hub.docker.com/r/nvidia/cuda/tags
cat << 'DOCKERFILE_EOF' > Dockerfile
FROM nvidia/cuda:12.8.1-base-ubuntu24.04
ARG K3S_VERSION="v1.35.1+k3s1"
ARG KUBECTL_VERSION="v1.35.1"
ARG DEBIAN_FRONTEND=noninteractive
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
# 1) 基础工具 + CNI plugins + NVIDIA Container Toolkit
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
ca-certificates curl gnupg \
iptables iproute2 \
containernetworking-plugins \
iputils-ping conntrack ethtool; \
mkdir -p /usr/share/keyrings; \
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg; \
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
> /etc/apt/sources.list.d/nvidia-container-toolkit.list; \
apt-get update; \
apt-get install -y --no-install-recommends \
nvidia-container-toolkit \
nvidia-container-runtime \
libnvidia-container-tools; \
\
# 关键:把发行版 CNI 目录映射到 k3s 常用路径
mkdir -p /opt/cni; \
if [ -d /usr/lib/cni ]; then ln -sf /usr/lib/cni /opt/cni/bin; fi; \
if [ -d /usr/libexec/cni ]; then ln -sf /usr/libexec/cni /opt/cni/bin; fi; \
\
command -v nvidia-container-runtime; \
nvidia-container-runtime --version || true; \
rm -rf /var/lib/apt/lists/*
# 2) 安装 k3s
RUN set -eux; \
curl -fsSL --retry 5 --retry-delay 1 \
-o /bin/k3s \
"https://proxy.kingye.me/proxy/https://github.com/k3s-io/k3s/releases/download/${K3S_VERSION}/k3s" && \
chmod +x /bin/k3s && \
/bin/k3s --version
RUN set -eux; \
curl -fL --retry 5 --retry-delay 1 \
-o /bin/kubectl \
"https://proxy.kingye.me/proxy/https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" && \
chmod +x /bin/kubectl && \
/bin/kubectl version --client=true --output=yaml
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends fuse-overlayfs; \
rm -rf /var/lib/apt/lists/*
# 4) entrypoint
RUN set -eux; \
cat > /usr/local/bin/entrypoint.sh <<'ENTRYPOINT_EOF'
#!/usr/bin/env bash
set -euo pipefail
# 如果没有参数:默认跑 k3s server
if [[ $# -eq 0 ]]; then
exec /bin/k3s server --disable=traefik --disable=servicelb
fi
case "$1" in
server|agent|--help|-h|kubectl|ctr|crictl)
exec /bin/k3s "$@"
;;
*)
# 允许调试:bash / sh / nvidia-smi / cat ...
exec "$@"
;;
esac
ENTRYPOINT_EOF
RUN chmod +x /usr/local/bin/entrypoint.sh
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["server", "--disable=traefik", "--disable=servicelb"]
DOCKERFILE_EOF
# 构建镜像(现在会使用代理)
docker build -t ikingye/k3s-gpu:v1.35.1-k3s1-cuda12.8.1 .
docker push ikingye/k3s-gpu:v1.35.1-k3s1-cuda12.8.1
# 依赖
sudo apt-get update
sudo apt-get install -y curl ca-certificates gnupg
# 添加仓库 key
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
# 添加 repo 列表
curl -fsSL https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list > /dev/null
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
# 验证 GPU 容器
docker run --rm --gpus all nvidia/cuda:12.8.1-base-ubuntu24.04 nvidia-smi
docker run --rm --gpus all ikingye/k3s-gpu:v1.35.1-k3s1-cuda13.1.0 nvidia-smi
docker run --rm --gpus all rancher/k3s:v1.35.1-k3s1 nvidia-smi
docker run --rm --gpus all ikingye/k3s-gpu:v1.35.1-k3s1-cuda12.8.1 nvidia-smi
cat << 'EOF' > k3d-config-cluster-1.yaml
apiVersion: k3d.io/v1alpha5
kind: Simple
metadata:
name: cluster-1
image: ikingye/k3s-gpu:v1.35.1-k3s1-cuda12.8.1
servers: 1
agents: 2
options:
runtime:
gpuRequest: all
k3s:
extraArgs:
- arg: "--disable=traefik"
nodeFilters:
- server:*
- arg: "--disable=servicelb"
nodeFilters:
- server:*
- arg: "--snapshotter=native"
nodeFilters:
- server:*
- agent:*
kubeconfig:
updateDefaultKubeconfig: true
switchCurrentContext: true
registries:
config: |
mirrors:
"docker.io":
endpoint:
- https://hub.kingye.me
EOF
# 1) 删旧集群
k3d cluster delete cluster-1 || true
# 2) 清理旧网络(避免脏状态)
docker network rm k3d-cluster-1 2>/dev/null || true
# 3) 创建(不要额外再加 --gpus all,YAML里已经有 gpuRequest: all)
k3d cluster create --config k3d-config-cluster-1.yaml --verbose
# k3d kubeconfig merge cluster-1 --switch-context
叶王 © 2013-2026 版权所有。如果本文档对你有所帮助,可以请作者喝饮料。