kubeadm安装k8s集群

安装kubeadm准备工作

安装工具

# 配置镜像
cat << EOF | tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 安装工具
yum install -y kubeadm kubelet kubectl --enablerepo=kubernetes

containerd安装

wget -O /etc/yum.repos.d/docker-ce.repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install containerd.io

# cni指定
cat << EOF | tee /etc/cni/net.d/10-containerd-net.conflist
{
 "cniVersion": "1.0.0",
 "name": "containerd-net",
 "plugins": [
   {
     "type": "bridge",
     "bridge": "cni0",
     "isGateway": true,
     "ipMasq": true,
     "promiscMode": true,
     "ipam": {
       "type": "host-local",
       "ranges": [
         [{
           "subnet": "10.88.0.0/16"
         }],
         [{
           "subnet": "2001:db8:4860::/64"
         }]
       ],
       "routes": [
         { "dst": "0.0.0.0/0" },
         { "dst": "::/0" }
       ]
     }
   },
   {
     "type": "portmap",
     "capabilities": {"portMappings": true},
     "externalSetMarkChain": "KUBE-MARK-MASQ"
   }
 ]
}
EOF

# 配置路由转发
echo 1 >/proc/sys/net/bridge/bridge-nf-call-iptables
echo 1 >/proc/sys/net/ipv4/ip_forward

初始化集群

kubeadm init --v=5 --image-repository=registry.cn-hangzhou.aliyuncs.com/google_containers

[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
I0318 17:13:06.333130   30977 kubeletfinalize.go:135] [kubelet-finalize] Restarting the kubelet to enable client certificate rotation
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 10.100.60.28:6443 --token 8upfhx.vl9d3m5myie3t89m \
    --discovery-token-ca-cert-hash sha256:8a34de03c83faeead828e6e5349cb66f597bb48f463384cdc21f296b253c88c3


使用kubectl proxy

kubectl proxy --port=8080 &

如果不小心没保存初始化成功的输出信息也没有关系，我们可以使用kubectl工具查看或者生成token

# 查看现有的token列表
$ kubeadm token list
TOKEN                     TTL         EXPIRES                USAGES                   DESCRIPTION                                                EXTRA GROUPS
abcdef.0123456789abcdef   23h         2022-05-08T06:27:34Z   authentication,signing                                                        system:bootstrappers:kubeadm:default-node-token

# 如果token已经失效，那就再创建一个新的token
$ kubeadm token create
pyab3u.j1a9ld7vk03znbk8
$ kubeadm token list
TOKEN                     TTL         EXPIRES                USAGES                   DESCRIPTION                                                EXTRA GROUPS
abcdef.0123456789abcdef   23h         2022-05-08T06:27:34Z   authentication,signing                                                        system:bootstrappers:kubeadm:default-node-token
pyab3u.j1a9ld7vk03znbk8   23h         2022-05-08T06:34:28Z   authentication,signing                                                        system:bootstrappers:kubeadm:default-node-token

# 如果找不到--discovery-token-ca-cert-hash参数，则可以在master节点上使用openssl工具来获取
$ openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
d6cdc5a3bc40cbb0ae85776eb4fcdc1854942e2dd394470ae0f2f97714dd9fb9

删除node节点

kubectl delete node 节点名称
#在节点上执行
systemctl stop kubelet
systemctl stop containerd
rm -rf /var/lib/cni/
rm -rf /var/lib/kubelet/*
rm -rf /etc/cni/
ifconfig flannel.1 down
ip link delete flannel.1

问题

1. 提示CRI异常？

kubeadm init
[init] Using Kubernetes version: v1.29.3
[preflight] Running pre-flight checks
error execution phase preflight: [preflight] Some fatal errors occurred:
    [ERROR CRI]: container runtime is not running: output: time="2024-03-18T16:26:13+08:00" level=fatal msg="validate service connection: validate CRI v1 runtime API for endpoint \"unix:///var/run/containerd/containerd.sock\": rpc error: code = Unimplemented desc = unknown service runtime.v1.RuntimeService"
, error: exit status 1
[preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`
To see the stack trace of this error execute with --v=5 or higher

# 查看containerd版本
containerd -v
containerd github.com/containerd/containerd 1.2.4
# 更新containerd版本
``` shell
wget https://github.com/containerd/containerd/releases/download/v1.7.14/containerd-1.7.14-linux-amd64.tar.gz
tar -zxvf containerd-1.7.14-linux-amd64.tar.gz
mv bin/* /usr/local/bin/

#配置systemd文件
vi /etc/systemd/system/containerd.service
# Copyright The containerd Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target local-fs.target

[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStart=/usr/local/bin/containerd

Type=notify
Delegate=yes
KillMode=process
Restart=always
RestartSec=5

# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity

# Comment TasksMax if your systemd version does not supports it.
# Only systemd 226 and above support this version.
TasksMax=infinity
OOMScoreAdjust=-999

[Install]
WantedBy=multi-user.target

systemctl daemon-reload
systemctl restart containerd
systemctl status containerd
# 查看containerd版本
containerd -v
containerd github.com/containerd/containerd v1.7.14 dcf2847247e18caba8dce86522029642f60fe96b

2. coredns安装失败

kubectl get pod -A
NAMESPACE     NAME                                              READY   STATUS              RESTARTS      AGE
kube-system   coredns-5f98f8d567-2c4g9                          0/1     Terminating         0             61m
kube-system   coredns-5f98f8d567-6c6sg                          0/1     ContainerCreating   0             38m
kube-system   coredns-5f98f8d567-h572m                          0/1     ContainerCreating   0             61m

kubectl describe pod coredns-5f98f8d567-6c6sg -n kube-system
...
Events:
  Type     Reason                  Age                  From               Message
  ----     ------                  ----                 ----               -------
  Warning  FailedScheduling        33m                  default-scheduler  0/1 nodes are available: 1 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.
  Warning  FailedScheduling        27m                  default-scheduler  0/2 nodes are available: 2 node(s) had untolerated taint {node.kubernetes.io/not-ready: }. preemption: 0/2 nodes are available: 2 Preemption is not helpful for scheduling.
  Normal   Scheduled               25m                  default-scheduler  Successfully assigned kube-system/coredns-5f98f8d567-6c6sg to tx7-ops-beian-temp81.bj
  Warning  FailedCreatePodSandBox  25m                  kubelet            Failed to create pod sandbox: rpc error: code = Unknown desc = failed to setup network for sandbox "377bcf7984f12632bf9c55704f85744283ce8b75f6396f815e17a1548181d236": plugin type="flannel" failed (add): failed to find plugin "flannel" in path [/opt/cni/bin]
  Normal   SandboxChanged          17s (x117 over 25m)  kubelet            Pod sandbox changed, it will be killed and re-created.

指定的cni找不到官方文档

cat << EOF | tee /etc/cni/net.d/10-containerd-net.conflist
{
 "cniVersion": "1.0.0",
 "name": "containerd-net",
 "plugins": [
   {
     "type": "bridge",
     "bridge": "cni0",
     "isGateway": true,
     "ipMasq": true,
     "promiscMode": true,
     "ipam": {
       "type": "host-local",
       "ranges": [
         [{
           "subnet": "10.88.0.0/16"
         }],
         [{
           "subnet": "2001:db8:4860::/64"
         }]
       ],
       "routes": [
         { "dst": "0.0.0.0/0" },
         { "dst": "::/0" }
       ]
     }
   },
   {
     "type": "portmap",
     "capabilities": {"portMappings": true},
     "externalSetMarkChain": "KUBE-MARK-MASQ"
   }
 ]
}
EOF

重启containerd

3. 提示kubelet超时

...
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
I0318 17:07:14.119153   18564 waitcontrolplane.go:83] [wait-control-plane] Waiting for the API server to be healthy
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.

Unfortunately, an error has occurred:
    timed out waiting for the condition

This error is likely caused by:
    - The kubelet is not running
    - The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)

If you are on a systemd-powered system, you can try to troubleshoot the error with the following commands:
    - 'systemctl status kubelet'
    - 'journalctl -xeu kubelet'

Additionally, a control plane component may have crashed or exited when started by the container runtime.

# 启动kubelet超时 
# 主机名解析配置
cat /etc/hosts

node cni异常

kubectl describe node tx7-ops-beian-temp82.bj
Conditions:
  Type             Status  LastHeartbeatTime                 LastTransitionTime                Reason                       Message
  ----             ------  -----------------                 ------------------                ------                       -------
  MemoryPressure   False   Mon, 18 Mar 2024 18:36:14 +0800   Mon, 18 Mar 2024 18:31:38 +0800   KubeletHasSufficientMemory   kubelet has sufficient memory available
  DiskPressure     False   Mon, 18 Mar 2024 18:36:14 +0800   Mon, 18 Mar 2024 18:31:38 +0800   KubeletHasNoDiskPressure     kubelet has no disk pressure
  PIDPressure      False   Mon, 18 Mar 2024 18:36:14 +0800   Mon, 18 Mar 2024 18:31:38 +0800   KubeletHasSufficientPID      kubelet has sufficient PID available
  Ready            False   Mon, 18 Mar 2024 18:36:14 +0800   Mon, 18 Mar 2024 18:31:38 +0800   KubeletNotReady              container runtime network not ready: NetworkReady=false reason:NetworkPluginNotReady message:Network plugin returns error: cni plugin not initialized
Addresses:
...

查看Node节点kubelet日志

journalctl -u kubelet
kuberuntime_manager.go:258] "Container runtime initialized" containerRuntime="containerd" version="v1.7.14" apiVersion="v1"

解决方法跟问题2

配置路由转发失败

    [ERROR FileContent--proc-sys-net-bridge-bridge-nf-call-iptables]: /proc/sys/net/bridge/bridge-nf-call-iptables does not exist

解决方法：

modprobe br_netfilter
echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables

kubeadm join 10.100.60.67:6443 –token nuluvn.go8gi8wcbd38pt8j
–discovery-token-ca-cert-hash sha256:e6567fd2a447002313dcf7de669d6811045e191d61e005da7fdc256bebc8295d

daydayops

https://crhstack.daydayops.com/2024/03/18/kubeadm-an-zhuang-k8s-ji-qun/