From cca4f01d0d5dc494436f2f1405544a0e86ec171e Mon Sep 17 00:00:00 2001 From: "Mr.Luy" <9895493+mrluy@user.noreply.gitee.com> Date: Mon, 5 Dec 2022 11:24:37 +0800 Subject: [PATCH] add_node01 --- README.md | 87 +++++++++++++++++++ inventory_file | 4 + playbooks/97.worker_join.yaml | 64 ++++++++++++++ playbooks/98.node_banish.yaml | 14 +++ .../roles/mindx.k8s.autolabel/tasks/main.yml | 20 ++++- .../roles/mindx.k8s.node_delete/meta/main.yml | 9 ++ .../mindx.k8s.node_delete/tasks/main.yml | 12 +++ .../roles/mindx.k8s.uninstall/meta/main.yml | 9 ++ .../roles/mindx.k8s.uninstall/tasks/main.yml | 21 +++++ .../roles/mindx.k8s.worker/tasks/main.yml | 5 +- .../roles/mindx.resource/tasks/main.yaml | 1 + 11 files changed, 243 insertions(+), 3 deletions(-) create mode 100644 playbooks/97.worker_join.yaml create mode 100644 playbooks/98.node_banish.yaml create mode 100644 playbooks/roles/mindx.k8s.node_delete/meta/main.yml create mode 100644 playbooks/roles/mindx.k8s.node_delete/tasks/main.yml create mode 100644 playbooks/roles/mindx.k8s.uninstall/meta/main.yml create mode 100644 playbooks/roles/mindx.k8s.uninstall/tasks/main.yml diff --git a/README.md b/README.md index db05b3c..2d352e2 100644 --- a/README.md +++ b/README.md @@ -602,6 +602,93 @@ playbooks/ ```bash root@master:~/ascend-hccl-controller# ansible-playbook -i inventory_file playbooks/01.resource.yaml playbooks/99.chrony.yaml ``` +## 节点扩缩容 +目标:用此脚本搭建平台后,针对已有集群实现节点扩缩容 + +playbooks中97.worker_join.yaml任务可将worker节点加入已有集群 98.node_banish任务则可将对应节点踢出已有集群 + +### 加入节点操作: + +1.在inventory_file中填写原有集群[harbor]、[master]字段内容(若集群由此脚本安装保持原有配置不变即可) + +[harbor]为目标集群harbor对应ip, [master]为目标集群master信息 + +2.在inventory_file [worker_join]写入对应节点信息 +```ini +[harbor] +localhost ansible_connection=local + +[nfs_server] +localhost ansible_connection=local + +[master] +localhost ansible_connection=local set_hostname="master" kube_interface="enp125s0f1" apiserver_advertise_address="195.0.3.99" + +[master_backup] +192.0.3.100 set_hostname="master-backup-1" kube_interface="enp125s0f1" apiserver_advertise_address="195.0.3.100" + +[worker] +192.0.2.50 set_hostname="worker-1" + +[worker_join] +192.0.2.52 set_hostname="worker-3" +``` +3.参照步骤3 填写group_vars目录中的all.yaml文件 + +若集群由此脚本安装保持原有配置不变即可 + +若配置丢失请根据harbor信息正确填写harbor相关字段 并保持MYSQL_PASSWORD、REDIS_PASSWORD、APIGW_LOADBALANCER_IP字段非空(任意内容) + +4.执行以下命令即可将节点加入集群 +```bash +root@master:~/ascend-hccl-controller# ansible-playbook -i inventory_file playbooks/97.node_join.yaml +``` +若inventory_file已有[worker]节点,则会为[worker]中第一个节点添加apigw-business调度label + +若无worker节点,则会为[worker_join]中第一个节点添加apigw-business调度label + + + + +### 放逐节点操作: + +1.在inventory_file[master]字段填写入主节点信息 [node_banish]字段写入对应节点信息 +```ini +[harbor] +localhost ansible_connection=local + +[nfs_server] +localhost ansible_connection=local + +[master] +localhost ansible_connection=local set_hostname="master" kube_interface="enp125s0f1" apiserver_advertise_address="195.0.3.99" + +[master_backup] +192.0.3.100 set_hostname="master-backup-1" kube_interface="enp125s0f1" apiserver_advertise_address="195.0.3.100" + +[worker] +192.0.2.50 set_hostname="worker-1" +192.0.2.51 set_hostname="worker-2" + +[worker_join] +192.0.2.52 set_hostname="worker-3" + +[node_banish] +192.0.2.51 set_hostname="worker-2" +``` + +2.执行以下命令即可将节点踢出集群 +```bash +root@master:~/ascend-hccl-controller# ansible-playbook -i inventory_file playbooks/98.node_banish.yaml +``` +**!!!注意!!!** + +放逐节点为高危操作:请务必根据集群状态以及实际需求使用此功能 + +1.若集群主master节点被放逐出集群,将导致平台、集群整体崩溃 + +2.部分worker、master集群被放逐出集群可能导致平台部分业务不可用 + # FAQ diff --git a/inventory_file b/inventory_file index face0d0..1c46c0c 100644 --- a/inventory_file +++ b/inventory_file @@ -10,3 +10,7 @@ localhost ansible_connection=local [master_backup] [worker] + +[worker_join] + +[node_banish] diff --git a/playbooks/97.worker_join.yaml b/playbooks/97.worker_join.yaml new file mode 100644 index 0000000..dae096a --- /dev/null +++ b/playbooks/97.worker_join.yaml @@ -0,0 +1,64 @@ +--- +# node join k8s cluster + +# distribute resources +- hosts: + - worker_join + roles: + - role: mindx.resource + vars: + resource_list: + - linux_aarch64 + - linux_x86_64 + - ubuntu_18.04_aarch64 + - ubuntu_18.04_x86_64 + - ubuntu_20.04_x86_64 + - centos_7_aarch64 + - openEuler_20.03_x86_64 + - kylin_V10_x86_64 + +# install docker +- hosts: + - worker_join + roles: + - role: mindx.docker + +# docker login harbor +- hosts: + - master + - worker_join + roles: + - role: mindx.harbor.login + +# set basic config for mindxdl +- hosts: + - worker_join + tasks: + - include_tasks: roles/mindx.basic/tasks/common.yml + +- hosts: worker_join + gather_facts: False + tasks: + - include_tasks: roles/mindx.basic/tasks/worker.yml + +- hosts: + - harbor + - worker_join + tasks: + - name: set HARBOR_IP + include_tasks: task_set_harbor_ip.yaml + +# install k8s +- hosts: + - worker_join + gather_facts: False + roles: + - role: mindx.k8s.install + +# worker join k8s +- hosts: worker_join + gather_facts: False + roles: + - role: mindx.k8s.worker + - role: mindx.k8s.autolabel + diff --git a/playbooks/98.node_banish.yaml b/playbooks/98.node_banish.yaml new file mode 100644 index 0000000..5c76090 --- /dev/null +++ b/playbooks/98.node_banish.yaml @@ -0,0 +1,14 @@ +--- +# k8s cluster banish nodes + +# nodes uninstall k8s +- hosts: + - node_banish + roles: + - role: mindx.k8s.uninstall + +# k8s cluster delete node +- hosts: + - node_banish + roles: + - role: mindx.k8s.node_delete \ No newline at end of file diff --git a/playbooks/roles/mindx.k8s.autolabel/tasks/main.yml b/playbooks/roles/mindx.k8s.autolabel/tasks/main.yml index b4d4a48..5d1186c 100644 --- a/playbooks/roles/mindx.k8s.autolabel/tasks/main.yml +++ b/playbooks/roles/mindx.k8s.autolabel/tasks/main.yml @@ -73,10 +73,12 @@ https_proxy: "" HTTP_PROXY: "" HTTPS_PROXY: "" - when: "'worker' not in groups or groups['worker'] | length == 0" + when: + - "'worker' not in groups or groups['worker'] | length == 0" + - "'worker_join' not in groups or groups['worker_join'] | length == 0" - name: label apigw-selector worker - shell: kubectl label --overwrite node {{ hostvars[groups['worker'][0]]['ansible_hostname'] }} apigw-selector=apigw-business-worker-node + shell: kubectl label --overwrite node {{ hostvars[groups['worker'][0]]['set_hostname'] }} apigw-selector=apigw-business-worker-node delegate_to: "{{ groups['master'][0] }}" delegate_facts: true run_once: true @@ -86,3 +88,17 @@ HTTP_PROXY: "" HTTPS_PROXY: "" when: "'worker' in groups and groups['worker'] | length != 0" + +- name: label apigw-selector worker_join + shell: kubectl label --overwrite node {{ hostvars[groups['worker_join'][0]]['set_hostname'] }} apigw-selector=apigw-business-worker-node + delegate_to: "{{ groups['master'][0] }}" + delegate_facts: true + run_once: true + environment: + http_proxy: "" + https_proxy: "" + HTTP_PROXY: "" + HTTPS_PROXY: "" + when: + - "'worker' not in groups or groups['worker'] | length == 0" + - "'worker_join' in groups and groups['worker_join'] | length == 0" diff --git a/playbooks/roles/mindx.k8s.node_delete/meta/main.yml b/playbooks/roles/mindx.k8s.node_delete/meta/main.yml new file mode 100644 index 0000000..fe98e54 --- /dev/null +++ b/playbooks/roles/mindx.k8s.node_delete/meta/main.yml @@ -0,0 +1,9 @@ +galaxy_info: + role_name: mindx.k8s.node_delete + author: mindx + description: developer + license: Apache-2.0 + min_ansible_version: 2.1 + galaxy_tags: + - 'ascend' + - 'mindx' diff --git a/playbooks/roles/mindx.k8s.node_delete/tasks/main.yml b/playbooks/roles/mindx.k8s.node_delete/tasks/main.yml new file mode 100644 index 0000000..e7d63f3 --- /dev/null +++ b/playbooks/roles/mindx.k8s.node_delete/tasks/main.yml @@ -0,0 +1,12 @@ +- name: message + debug: + msg: "*************************start delete node***************************" + +- name: kubectl delete node + shell: kubectl delete node {{set_hostname}} + delegate_to: "{{ groups['master'][0] }}" + delegate_facts: true + failed_when: false + when: + - inventory_hostname not in groups['master'] + diff --git a/playbooks/roles/mindx.k8s.uninstall/meta/main.yml b/playbooks/roles/mindx.k8s.uninstall/meta/main.yml new file mode 100644 index 0000000..602e3a1 --- /dev/null +++ b/playbooks/roles/mindx.k8s.uninstall/meta/main.yml @@ -0,0 +1,9 @@ +galaxy_info: + role_name: k8s.uninstall + author: ascend + description: developer + company: none + license: Apache-2.0 + min_ansible_version: 2.1 + galaxy_tags: + - 'ascend' diff --git a/playbooks/roles/mindx.k8s.uninstall/tasks/main.yml b/playbooks/roles/mindx.k8s.uninstall/tasks/main.yml new file mode 100644 index 0000000..2328b03 --- /dev/null +++ b/playbooks/roles/mindx.k8s.uninstall/tasks/main.yml @@ -0,0 +1,21 @@ +- name: message + debug: + msg: "*************************start node uninstall k8s***************************" + +- name: check kubelet service + shell: systemctl is-active kubelet | grep '^active$' | wc -l + register: kubelet_status + +- name: message + debug: + msg: "kubelet is inactive and may already be uninstalled" + when: kubelet_status.stdout == "0" + +- name: uninstall k8s for node + shell: kubeadm reset -f; iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X; systemctl restart docker + failed_when: false + when: + - ansible_connection != "local" + - inventory_hostname not in groups['master'] + + diff --git a/playbooks/roles/mindx.k8s.worker/tasks/main.yml b/playbooks/roles/mindx.k8s.worker/tasks/main.yml index 2601ea5..7a56509 100644 --- a/playbooks/roles/mindx.k8s.worker/tasks/main.yml +++ b/playbooks/roles/mindx.k8s.worker/tasks/main.yml @@ -4,7 +4,10 @@ - name: worker join k8s include_tasks: worker_join.yml - when: ansible_connection != "local" and inventory_hostname not in groups['master_backup'] + when: + - ansible_connection != "local" + - (inventory_hostname in groups['worker'] or inventory_hostname in groups['worker_join']) + - inventory_hostname not in groups['master_backup'] - name: label worker shell: | diff --git a/playbooks/roles/mindx.resource/tasks/main.yaml b/playbooks/roles/mindx.resource/tasks/main.yaml index ad452bf..5a44297 100644 --- a/playbooks/roles/mindx.resource/tasks/main.yaml +++ b/playbooks/roles/mindx.resource/tasks/main.yaml @@ -27,6 +27,7 @@ run_once: true - name: unarchive on remote + ignore_errors: yes ansible.builtin.unarchive: src: "{{resource_dir}}/{{item}}.tar.gz" dest: "{{resource_dir}}" -- Gitee