目录

1. kubectl定制显示栏位

新建一个文件:custom-columns-depolyment-requests.txt

NAMESPACE                            NAME                                        Request_CPU 
metadata.namespace                   metadata.name                               spec.template.spec.containers[0].resources.requests.cpu

然后使用参数-o custom-columns-file=custom-columns-depolyment-requests.txt来查看:

[root@sh-saas-k8scs2-master-online-01 ~]# kubectl get deployments --all-namespaces -o custom-columns-file=custom-columns-depolyment-requests.txt  | grep -v -E "00m" | grep -v "none"
NAMESPACE                          NAME                                                        Request_CPU
public-oa-node-online              public-oa-weekly-service-node-online                        250m
saas-caiwu-tomcat-online           saas-caiwu-wmpay-adapter-tomcat-online                      1
saas-caiwu-tomcat-online           saas-caiwu-wmpay-service-tomcat-online                      1
saas-ec-tomcat-online              saas-ec-coupon-management-tomcat-online                     1
saas-ec-tomcat-online              saas-ec-discount-service-tomcat-online                      1
saas-ec-tomcat-online              saas-ec-guide-management-tomcat-online                      1
saas-ec-tomcat-online              saas-ec-merchant-management-tomcat-online                   1
saas-ec-tomcat-online              saas-ec-openapi-tomcat-online                               4
saas-ec-tomcat-online              saas-ec-platform-web-tomcat-online                          2
saas-ec-tomcat-online              saas-ec-promotion-service-tomcat-online                     1
saas-inter-tomcat-ol               saas-interactive-address-service-tomcat-online              1
saas-inter-tomcat-ol               saas-interactive-address-web-tomcat-online                  1
saas-inter-tomcat-ol               saas-interactive-logistics-service-tomcat-online            1
saas-inter-tomcat-ol               saas-interactive-logistics-web-tomcat-online                1
saas-jcpt-tomcat-online            saas-jcpt-uc-base-core-tomcat-online                        1
saas-jcpt-tomcat-online            saas-mc-base-core-service-tomcat-online                     1
saas-kf-tomcat-online              saas-kf-aal-tomcat-online                                   1
saas-kf-tomcat-online              saas-kf-base-tomcat-online                                  1

2. 批量禁止调度节点

kubectl get nodes | awk '{print $1}' | grep 10.10.84.10 | xargs -L1 -i echo kubectl cordon {} 
kubectl get nodes | awk '{print $1}' | grep 10.10.84.10 | xargs -L1 -i kubectl cordon {} 

3. 批量下架节点

先确保待下架的节点为SchedulingDisabled状态。

再执行以下批量删除节点脚本delete_node.sh:

#!/bin/sh

host_file="/etc/ansible/hosts"
date=`date +"%Y%m%d%H%M"`
wait_time=90

/bin/cp -rf $host_file $host_file.$date

for node in `kubectl get nodes | grep SchedulingDisabled  | awk '{print $1}'`
do
    echo "kubectl drain --ignore-daemonsets --delete-local-data $node"
    kubectl drain --ignore-daemonsets --delete-local-data $node
    
    echo "kubectl delete nodes $node"
    kubectl delete nodes $node
    
    sleep $wait_time
    
    sed -i "/^$node$/d" $host_file
    #sed  "/^$node$/d" $host_file
done

4. 批量升级集群脚本

#!/bin/sh

cd /etc/ansible

#update master
for node in `kubectl get nodes | grep master  | awk '{print $1}'`
do
    echo "ansible-playbook -t upgrade_k8s 22.upgrade.yml --limit $node"
    ansible-playbook -t upgrade_k8s 22.upgrade.yml --limit $node

    sleep 30
done

#update node
for node in `kubectl get nodes | grep node | awk '{print $1}'`
do
    echo "kubectl drain --ignore-daemonsets --delete-local-data $node"
    kubectl drain --ignore-daemonsets --delete-local-data $node

    echo "ansible-playbook -t upgrade_k8s 22.upgrade.yml --limit $node"
    ansible-playbook -t upgrade_k8s 22.upgrade.yml --limit $node

    echo "kubectl uncordon $node"
    kubectl uncordon $node

    sleep 10
done

5. 将应用固定到某些节点

#给节点打标签和污点
kubectl taint node 10.11.96.33 role=task:NoSchedule
kubectl label nodes 10.11.96.33 role=task

#迁移应用过去
kubectl patch deployments.apps -n public-arch-tomcat-qa public-arch-ipservice-impl-tomcat-qa -p '{"spec":{"template":{"spec": {"nodeSelector": {"role": "task"},"tolerations": [ {"effect": "NoSchedule","key": "role","value": "task"}]}}}}'

#把应用迁出来
kubectl patch deployments.apps -n public-arch-tomcat-qa public-arch-ipservice-impl-tomcat-qa -p '{"spec":{"template":{"spec": {"nodeSelector": null,"tolerations": null}}}}'

6. ubuntu18 POD内安装deb包

curl http://mirrors.cloud.tencent.com/repo/ubuntu18_sources.list > /etc/apt/sources.list
apt-get update
apt-get install sysbench mbw

7. 内存性能测试

root@saas-jcpt-uc-base-core-tomcat-online-5786884b9f-jln2k:/etc/apt# apt-get install mbw
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  mbw
0 upgraded, 1 newly installed, 0 to remove and 12 not upgraded.
Need to get 6776 B of archives.
After this operation, 23.6 kB of additional disk space will be used.
Get:1 http://mirrors.cloud.tencent.com/ubuntu bionic/universe amd64 mbw amd64 1.2.2-1build1 [6776 B]
Fetched 6776 B in 0s (51.0 kB/s)
debconf: delaying package configuration, since apt-utils is not installed
Selecting previously unselected package mbw.
(Reading database ... 7403 files and directories currently installed.)
Preparing to unpack .../mbw_1.2.2-1build1_amd64.deb ...
Unpacking mbw (1.2.2-1build1) ...
Setting up mbw (1.2.2-1build1) ...

root@saas-jcpt-uc-base-core-tomcat-online-5786884b9f-jln2k:/etc/apt# mbw -n 10 256
Long uses 8 bytes. Allocating 2*33554432 elements = 536870912 bytes of memory.
Using 262144 bytes as blocks for memcpy block copy test.
Getting down to business... Doing 10 runs per test.
0       Method: MEMCPY  Elapsed: 0.05202        MiB: 256.00000  Copy: 4921.468 MiB/s
1       Method: MEMCPY  Elapsed: 0.04936        MiB: 256.00000  Copy: 5186.596 MiB/s
2       Method: MEMCPY  Elapsed: 0.04870        MiB: 256.00000  Copy: 5256.134 MiB/s
3       Method: MEMCPY  Elapsed: 0.05055        MiB: 256.00000  Copy: 5063.992 MiB/s
4       Method: MEMCPY  Elapsed: 0.04870        MiB: 256.00000  Copy: 5256.781 MiB/s
5       Method: MEMCPY  Elapsed: 0.05087        MiB: 256.00000  Copy: 5032.238 MiB/s
6       Method: MEMCPY  Elapsed: 0.05141        MiB: 256.00000  Copy: 4979.576 MiB/s
7       Method: MEMCPY  Elapsed: 0.05064        MiB: 256.00000  Copy: 5055.192 MiB/s


sh-4.4# sysbench --memory-total-size=512M --memory-block-size=8K  memory run
sysbench 1.0.11 (using system LuaJIT 2.1.0-beta3)

Running the test with following options:
Number of threads: 1
Initializing random number generator from current time


Running memory speed test with the following options:
  block size: 8KiB
  total size: 512MiB
  operation: write
  scope: global

Initializing worker threads...

Threads started!

Total operations: 65536 (1387580.79 per second)

512.00 MiB transferred (10840.47 MiB/sec)


General statistics:
    total time:                          0.0455s
    total number of events:              65536

Latency (ms):
         min:                                  0.00
         avg:                                  0.00
         max:                                  0.03
         95th percentile:                      0.00
         sum:                                 35.89

Threads fairness:
    events (avg/stddev):           65536.0000/0.00
    execution time (avg/stddev):   0.0359/0.00

8. kubectl获取监控信息

通过metrics读取:

kubectl get --raw /apis/metrics.k8s.io/v1beta1/nodes/<NODE_NAME> | jq

kubectl get --raw /apis/metrics.k8s.io/v1beta1/namespaces/<NAMESPACE>/pods/<POD_NAME> | jq

直接读kubelet的metrics:

kubectl get --raw "/api/v1/nodes/10.12.97.25/proxy/metrics"
kubectl get --raw "/api/v1/nodes/10.12.97.25/proxy/metrics/cadvisor"

9. 批量将多个副本的应用POD数调成一个

kubectl get deployments.apps -A  | grep -v '1/1' | grep -v '0/1' | grep -v '0/0' | grep -v -E "weimob-ops|thanos|public-arch-java-dev|kube-system|k8s-csi-tencentcloud|istio-system|NAMESPACE" | awk '{print $1"    "$2}' | xargs -L1  kubectl patch deployments.apps -p '{"spec":{"replicas":1}}' -n 

或者:

kubectl get deployments.apps -A  | grep -v '1/1' | grep -v '0/1' | grep -v '0/0' | grep -v -E "weimob-ops|thanos|public-arch-java-dev|kube-system|k8s-csi-tencentcloud|istio-system|NAMESPACE" | awk '{print $1"    "$2}' | xargs -L1 kubectl scale --replicas=1 deployment -n 

注意:xargs -i 加{}配合kubectl有问题,所以尽量把参数留在最后,否则有问题

10. 批量关闭经常重启的应用

#先新建一个custom-pod-list.txt文件:
[root@sh-saas-k8s1-master-qa-01 ~]# cat custom-pod-list.txt
NAMESPACE              APP                              NAME                       restartCount 
metadata.namespace     metadata.labels.app              metadata.name              status.containerStatuses[0].restartCount 

[root@sh-saas-k8s1-master-qa-01 ~]# kubectl  get pod  -o custom-columns-file=custom-pod-list.txt -A | awk '$4 > 40 {print $0}' | grep -v -E '<none>|NAMESPACE'
default                        predator                                                 predator-6cb546b45c-bmldm                                         43
public-alg-tomcat-qa           public-alg-inforecommend-web-tomcat-qa                   public-alg-inforecommend-web-tomcat-qa-68df6fff5-bcxf7            65
public-arch-java-qa            public-arch-dubbo-consuming-java-qa                      public-arch-dubbo-consuming-java-qa-594dccb49f-jwrpx              3271
public-arch-java-qa            public-arch-dubbo-consuming-java-qa                      public-arch-dubbo-consuming-java-qa-64ddd7dfc5-8j89s              3267
public-fe-node-qa              public-fe-o2o-node-qa                                    public-fe-o2o-node-qa-848fb54c64-h6bkr                            89
public-fe-node-qa              public-fe-zhan-node-qa                                   public-fe-zhan-node-qa-5cf848cfc9-wbrgk                           1206
public-tjpm-java-qa            public-tjpm-cloud-mercury-es-java-qa                     public-tjpm-cloud-mercury-es-java-qa-65549b7b74-8xjm9             1441
public-tjpm-java-qa            public-tjpm-develop01-thor-monitor-java-qa               public-tjpm-develop01-thor-monitor-java-qa-0                      1426
public-tjpm-tomcat-qa          public-tjpm-aries-tomcat-qa                              public-tjpm-aries-tomcat-qa-56fc4f9c6d-td8mw                      1344
qa-default                     requirement-management-service                           requirement-management-service-java-qa-6cb4df9f95-2ctfs           2964
qa-default                     requirement-management-service                           requirement-management-service-qa-default-7874f6bb7b-mtns6        3336
qa-java-qa                     requirement-management-service                           requirement-management-service-java-qa-7448b7dbc5-48gz6           2971
saas-caiwu-java-qa             saas-caiwu-fin-elec-invoice-task-java-qa                 saas-caiwu-fin-elec-invoice-task-java-qa-6fcbdfbd88-9kks6         47
saas-cdp-java-qa               saas-jcpt-cdp-label-submit-service-java-qa               saas-jcpt-cdp-label-submit-service-java-qa-5c5b85d84c-jqchh       1423
saas-ec-tomcat-qa              saas-ec-draw-marketing-management-tomcat-qa              saas-ec-draw-marketing-management-tomcat-qa-685ffdcb44-c7dsl      1534
saas-ec-tomcat-qa              saas-ec-erp-bala-service-tomcat-qa                       saas-ec-erp-bala-service-tomcat-qa-74cb46fd5c-t26s9               1537
saas-ec-tomcat-qa              saas-ec-navigation-ext-web-tomcat-qa-183                 saas-ec-navigation-ext-web-tomcat-qa-183-598d689cd7-dlxvv         5090
saas-ec-tomcat-qa              saas-ec-navigation-ext-web-tomcat-qa-183                 saas-ec-navigation-ext-web-tomcat-qa-183-7d9ddfd95-wxjtx          1442
saas-ec-tomcat-qa              saas-ec-recommend-alg-service-tomcat-qa                  saas-ec-recommend-alg-service-tomcat-qa-8676d4cc8f-t2jk9          1802
saas-ec-tomcat-qa              saas-ec-scrm-activity-core-service-tomcat-qa             saas-ec-scrm-activity-core-service-tomcat-qa-7688f85bd5-hptbb     244
saas-inter-java-qa             saas-interactive-lcode-prize-service-java-qa             saas-interactive-lcode-prize-service-java-qa-6589db96fc-j2vmq     1419
saas-inter-java-qa             saas-interactive-lcode-prize-service-java-qa             saas-interactive-lcode-prize-service-java-qa-c757d758f-2rv6l      1563
saas-inter-tomcat-qa           saas-interactive-activity-management-server-tomcat-qa    saas-interactive-activity-management-server-tomcat-qa-7bfc5w69k   3274
saas-inter-tomcat-qa           saas-interactive-activity-management-server-tomcat-qa    saas-interactive-activity-management-server-tomcat-qa-84d4hwfxq   5042
saas-inter-tomcat-qa           saas-interactive-activitymanagement-service-tomcat-qa    saas-interactive-activitymanagement-service-tomcat-qa-58c8nc7pr   3284
saas-jcpt-tomcat-qa            saas-jcpt-scrm-senew-search-service-tomcat-qa            saas-jcpt-scrm-senew-search-service-tomcat-qa-7fd65d5b9b-cr6dh    3234
saas-live-java-qa              saas-live-finance-service-java-qa                        saas-live-finance-service-java-qa-7678fddcbc-2k9fr                3234
saas-mall-tomcat-qa            saas-mall-order-management-tomcat-qa                     saas-mall-order-management-tomcat-qa-69fc9c94ff-wl9v6             71
saas-mall-tomcat-qa            saas-mall-order-management-tomcat-qa                     saas-mall-order-management-tomcat-qa-c7cc976d5-9dsnc              76
saas-mcloud-java-qa            saas-mcloud-cdp-appendable-sync-service-java-qa          saas-mcloud-cdp-appendable-sync-service-java-qa-669cfbb6fcmthch   1432
saas-mcloud-java-qa            saas-mcloud-cdp-appendable-sync-service-java-qa          saas-mcloud-cdp-appendable-sync-service-java-qa-d56f98c9c-rnhb9   1426
saas-mcloud-java-qa            saas-mcloud-cdp-http-history-sync-service-java-qa        saas-mcloud-cdp-http-history-sync-service-java-qa-7957d49f5hqc2   3258
saas-mcloud-java-qa            saas-mcloud-uc-cdp-tag-core-task-new-java-qa             saas-mcloud-uc-cdp-tag-core-task-new-java-qa-84dcfcfb6d-vvdbx     2505
saas-mp-java-qa                saas-mp-rule-engine-java-qa                              saas-mp-rule-engine-java-qa-6ff8cffd58-m7h5q                      1434
saas-mp-tomcat-qa              saas-mp-channel-biz-service-tomcat-qa                    saas-mp-channel-biz-service-tomcat-qa-6f64c577db-q5lqx            1296
saas-o2o-group-tomcat-qa       saas-o2o-public-cip-caas4api-tomcat-qa                   saas-o2o-public-cip-caas4api-tomcat-qa-7b499fcf58-8phpm           1423
saas-o2o-tomcat-qa             saas-o2o-public-goods-services-job-tomcat-qa             saas-o2o-public-goods-services-job-tomcat-qa-56785fdcb7-6f4pq     3616
saas-o2o-yzhy-tomcat-qa        saas-o2o-yzhy-yazuo-trade-report-center-api-tomcat-qa    saas-o2o-yzhy-yazuo-trade-report-center-api-tomcat-qa-7f4b8q7l2   1846
saas-yhcd-tomcat-qa            saas-yhcd-mengdian-push-service-tomcat-qa                saas-yhcd-mengdian-push-service-tomcat-qa-56b7b67bd-sj5l4         1530

[root@sh-saas-k8s1-master-qa-01 ~]# kubectl  get pod  -o custom-columns-file=custom-pod-list.txt -A | awk '$4 > 40 {print $0}' | grep -v -E '<none>|NAMESPACE' | awk '{print $1"    "$2}' | xargs -L1 kubectl scale --replicas=0 deployment -n 

11. 资源删除删不掉时的处理

如下面这个PVC,就发现删不掉,主要原因是腾讯云的CFS已经删除,现在需要在集群上删除时删不掉了。

[root@sh5-saas-k8scs1-node-online-194 data]# kubectl  get pvc -n public-devops-golang-online   public-devops-share-storage-test-other-online-pvc -o yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  annotations:
    pv.kubernetes.io/bind-completed: "yes"
    volume.beta.kubernetes.io/storage-provisioner: com.tencent.cloud.csi.cfs
  creationTimestamp: "2021-08-09T05:53:52Z"
  deletionGracePeriodSeconds: 0
  deletionTimestamp: "2021-12-22T07:38:55Z"
  finalizers:
  - kubernetes.io/pvc-protection
  labels:
    app: public-devops-share-storage-test-other-online
  name: public-devops-share-storage-test-other-online-pvc
  namespace: public-devops-golang-online
spec:
  accessModes:
  - ReadWriteMany
  resources:
    requests:
      storage: 30Gi
  storageClassName: cfs-sd
  volumeMode: Filesystem
  volumeName: pvc-f8c1db81-e325-4d35-8b4c-627e10c16c89
status:
  accessModes:
  - ReadWriteMany
  capacity:
    storage: 30Gi
  phase: Bound

[root@sh5-saas-k8scs1-node-online-194 data]# kubectl delete pvc -n public-devops-golang-online public-devops-share-storage-test-other-online-pvc
persistentvolumeclaim "public-devops-share-storage-test-other-online-pvc" deleted
^C
[root@sh5-saas-k8scs1-node-online-194 data]# kubectl delete pvc -n public-devops-golang-online public-devops-share-storage-test-other-online-pvc --force
warning: Immediate deletion does not wait for confirmation that the running resource has been terminated. The resource may continue to run on the cluster indefinitely.
persistentvolumeclaim "public-devops-share-storage-test-other-online-pvc" force deleted
^C

可以发现强制删除也不行,主要原因是加了finalizers,需要去finalizers先去掉:

[root@sh5-saas-k8scs1-node-online-194 data]# kubectl patch pvc -n public-devops-golang-online public-devops-share-storage-test-other-online-pvc -p '{"metadata":{"finalizers":null}}'
persistentvolumeclaim/public-devops-share-storage-test-other-online-pvc patched
[root@sh5-saas-k8scs1-node-online-194 data]# kubectl delete pvc -n public-devops-golang-online public-devops-share-storage-test-other-online-pvc
Error from server (NotFound): persistentvolumeclaims "public-devops-share-storage-test-other-online-pvc" not found

这样就可以删掉了。

12. 使用ServiceAccount Token配置kubeconfig

# 配置一个集群
kubectl config set-cluster k8s-dev --insecure-skip-tls-verify=true  --server=https://k8s-apiserver.internal.weimobdev.com:8443

# 使用token配置一个用户
kubectl config set-credentials k8s-dev-readonly --token={token}

# 配置一个context,将集群和用户关联
kubectl config set-context k8s-dev-readonly --user=k8s-dev-readonly --cluster=k8s-dev

# 使用context
kubectl config use-context k8s-dev-readonly

# 查找token
kubectl get secret oms-readonly-token-wzp6b -n kube-system -o jsonpath={.data.token} | base64 -d